2026-06-15 17:25:28 +08:00
|
|
|
|
"""标注数据湖:批次目录、任务列表、标注 JSON、媒体文件(CVAT 为唯一标注引擎)。"""
|
2026-06-03 11:40:21 +08:00
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import hashlib
|
|
|
|
|
|
import json
|
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
from urllib.parse import quote
|
|
|
|
|
|
|
|
|
|
|
|
from as_platform.data.batch import IMG_EXTS
|
|
|
|
|
|
from as_platform.data.core import load_wf, proj_root, resolve_pack_dir
|
|
|
|
|
|
from as_platform.db.engine import session_scope
|
|
|
|
|
|
from as_platform.db.models import LabelingCampaign, User
|
2026-06-15 17:25:28 +08:00
|
|
|
|
from as_platform.labeling.scope import enrich_batch_labels, load_dms_registry
|
2026-06-03 11:40:21 +08:00
|
|
|
|
|
2026-06-15 17:25:28 +08:00
|
|
|
|
# 历史目录名保留,导出脚本仍读取 labels/ls_annotations/
|
2026-06-03 11:40:21 +08:00
|
|
|
|
ANNOTATIONS_DIRNAME = "ls_annotations"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_campaign(campaign_id: str) -> LabelingCampaign | None:
|
|
|
|
|
|
with session_scope() as db:
|
|
|
|
|
|
return db.get(LabelingCampaign, campaign_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_campaign_batch_dir(camp: LabelingCampaign) -> Path:
|
|
|
|
|
|
wf = load_wf()
|
|
|
|
|
|
root = proj_root(wf, camp.project)
|
|
|
|
|
|
if camp.project == "dms":
|
2026-06-15 17:25:28 +08:00
|
|
|
|
import yaml
|
|
|
|
|
|
|
2026-06-03 11:40:21 +08:00
|
|
|
|
reg = yaml.safe_load((root / wf["projects"]["dms"]["registry"]).read_text(encoding="utf-8"))
|
|
|
|
|
|
tcfg = reg["tasks"][camp.task]
|
|
|
|
|
|
if camp.location == "sources":
|
|
|
|
|
|
if not camp.pack:
|
|
|
|
|
|
raise ValueError("sources 批次需要 pack")
|
|
|
|
|
|
pack_dir = resolve_pack_dir("dms", root, wf, camp.pack)
|
|
|
|
|
|
src_sub = (reg.get("ingest") or {}).get("sources_subdir", "sources")
|
|
|
|
|
|
return (pack_dir / tcfg["task_dir"] / src_sub / camp.batch).resolve()
|
|
|
|
|
|
if tcfg.get("type") == "multi" and camp.mode:
|
|
|
|
|
|
from as_platform.labeling.scope import _dms_registry_api
|
|
|
|
|
|
|
|
|
|
|
|
get_mode_config, resolve_task_id, _ = _dms_registry_api()
|
|
|
|
|
|
task_r, mode_r = resolve_task_id(camp.task, camp.mode)
|
|
|
|
|
|
mcfg = get_mode_config(task_r, mode_r, reg)
|
|
|
|
|
|
inbox_rel = mcfg.get("inbox")
|
|
|
|
|
|
if inbox_rel:
|
|
|
|
|
|
return (root / inbox_rel).resolve()
|
|
|
|
|
|
mode = camp.mode
|
|
|
|
|
|
if mode:
|
|
|
|
|
|
return (root / "inbox" / camp.task / mode / camp.batch).resolve()
|
|
|
|
|
|
return (root / "inbox" / camp.task / camp.batch).resolve()
|
2026-06-15 17:25:28 +08:00
|
|
|
|
if camp.project == "adas":
|
|
|
|
|
|
if not camp.task:
|
|
|
|
|
|
raise ValueError("adas campaign 需要 task")
|
|
|
|
|
|
return (root / "inbox" / camp.task / camp.batch).resolve()
|
2026-06-03 11:40:21 +08:00
|
|
|
|
if camp.location == "pack" and camp.pack:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from as_platform.data.core import resolve_pack
|
|
|
|
|
|
|
|
|
|
|
|
rel = resolve_pack("lane", root, wf, camp.pack)
|
|
|
|
|
|
return (root / rel).resolve()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
return (root / camp.pack).resolve()
|
|
|
|
|
|
return (root / "inbox" / camp.batch).resolve()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _iter_batch_images(batch_dir: Path) -> list[Path]:
|
|
|
|
|
|
if not batch_dir.is_dir():
|
|
|
|
|
|
return []
|
|
|
|
|
|
candidates: list[Path] = []
|
|
|
|
|
|
search_roots = [
|
|
|
|
|
|
batch_dir / "images",
|
|
|
|
|
|
batch_dir / "images" / "train",
|
|
|
|
|
|
batch_dir,
|
|
|
|
|
|
]
|
|
|
|
|
|
seen: set[str] = set()
|
|
|
|
|
|
for root in search_roots:
|
|
|
|
|
|
if not root.is_dir():
|
|
|
|
|
|
continue
|
|
|
|
|
|
for p in sorted(root.rglob("*")):
|
|
|
|
|
|
if not p.is_file() or p.suffix not in IMG_EXTS:
|
|
|
|
|
|
continue
|
|
|
|
|
|
key = str(p.resolve())
|
|
|
|
|
|
if key in seen:
|
|
|
|
|
|
continue
|
|
|
|
|
|
seen.add(key)
|
|
|
|
|
|
candidates.append(p.resolve())
|
|
|
|
|
|
return candidates
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _task_id_for_image(image_path: Path, batch_dir: Path) -> str:
|
|
|
|
|
|
try:
|
|
|
|
|
|
rel = image_path.relative_to(batch_dir)
|
|
|
|
|
|
stem = rel.as_posix()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
stem = image_path.stem
|
|
|
|
|
|
return hashlib.sha256(stem.encode()).hexdigest()[:16]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _annotations_dir(batch_dir: Path) -> Path:
|
|
|
|
|
|
d = batch_dir / "labels" / ANNOTATIONS_DIRNAME
|
|
|
|
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def campaign_bootstrap(campaign_id: str) -> dict[str, Any]:
|
|
|
|
|
|
with session_scope() as db:
|
|
|
|
|
|
camp = db.get(LabelingCampaign, campaign_id)
|
|
|
|
|
|
if not camp:
|
|
|
|
|
|
raise FileNotFoundError("campaign not found")
|
|
|
|
|
|
reg = load_dms_registry() if camp.project == "dms" else None
|
|
|
|
|
|
row = enrich_batch_labels(camp.to_dict(), reg)
|
|
|
|
|
|
try:
|
|
|
|
|
|
batch_dir = resolve_campaign_batch_dir(camp)
|
|
|
|
|
|
row["batch_path"] = str(batch_dir)
|
|
|
|
|
|
row["image_count"] = len(_iter_batch_images(batch_dir))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
row["batch_path"] = None
|
|
|
|
|
|
row["image_count"] = 0
|
|
|
|
|
|
row["batch_error"] = str(e)
|
2026-06-15 17:25:28 +08:00
|
|
|
|
row["editor"] = "cvat"
|
|
|
|
|
|
row["cvat_task_id"] = camp.cvat_task_id
|
|
|
|
|
|
row["cvat_job_url"] = camp.cvat_job_url
|
2026-06-03 11:40:21 +08:00
|
|
|
|
return row
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def campaign_tasks(
|
|
|
|
|
|
campaign_id: str,
|
|
|
|
|
|
*,
|
|
|
|
|
|
offset: int = 0,
|
|
|
|
|
|
limit: int = 50,
|
|
|
|
|
|
user: User | None = None,
|
|
|
|
|
|
assignee: str | None = None,
|
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
|
with session_scope() as db:
|
|
|
|
|
|
camp = db.get(LabelingCampaign, campaign_id)
|
|
|
|
|
|
if not camp:
|
|
|
|
|
|
raise FileNotFoundError("campaign not found")
|
|
|
|
|
|
batch_dir = resolve_campaign_batch_dir(camp)
|
|
|
|
|
|
images = _iter_batch_images(batch_dir)
|
|
|
|
|
|
from as_platform.labeling.progress import get_assigned_task_ids, user_is_coordinator
|
|
|
|
|
|
|
|
|
|
|
|
filter_ids: set[str] | None = None
|
|
|
|
|
|
if assignee == "me" and user:
|
|
|
|
|
|
filter_ids = get_assigned_task_ids(campaign_id, user.id)
|
|
|
|
|
|
if not filter_ids and not user_is_coordinator(user):
|
|
|
|
|
|
return {
|
|
|
|
|
|
"tasks": [],
|
|
|
|
|
|
"total": 0,
|
|
|
|
|
|
"offset": offset,
|
|
|
|
|
|
"limit": limit,
|
|
|
|
|
|
"hint": "暂无分配给您的任务,请联系协调员在送标工作台均分任务",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if filter_ids is not None:
|
|
|
|
|
|
filtered = [img for img in images if _task_id_for_image(img, batch_dir) in filter_ids]
|
|
|
|
|
|
images = filtered
|
|
|
|
|
|
|
|
|
|
|
|
total = len(images)
|
|
|
|
|
|
slice_imgs = images[offset : offset + limit]
|
|
|
|
|
|
tasks: list[dict[str, Any]] = []
|
|
|
|
|
|
for img in slice_imgs:
|
|
|
|
|
|
tid = _task_id_for_image(img, batch_dir)
|
|
|
|
|
|
try:
|
|
|
|
|
|
rel = img.relative_to(batch_dir).as_posix()
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
rel = img.name
|
|
|
|
|
|
media_path = quote(rel, safe="/")
|
|
|
|
|
|
tasks.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": tid,
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"image": f"/api/v1/labeling/media/{campaign_id}/{media_path}",
|
|
|
|
|
|
},
|
|
|
|
|
|
"meta": {"filename": img.name, "relative_path": rel},
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
out: dict[str, Any] = {"tasks": tasks, "total": total, "offset": offset, "limit": limit}
|
|
|
|
|
|
if filter_ids is not None and user and assignee == "me":
|
|
|
|
|
|
out["my_assigned"] = len(filter_ids)
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_media_file(campaign_id: str, rel_path: str) -> Path:
|
|
|
|
|
|
with session_scope() as db:
|
|
|
|
|
|
camp = db.get(LabelingCampaign, campaign_id)
|
|
|
|
|
|
if not camp:
|
|
|
|
|
|
raise FileNotFoundError("campaign not found")
|
|
|
|
|
|
batch_dir = resolve_campaign_batch_dir(camp)
|
|
|
|
|
|
clean = Path(rel_path)
|
|
|
|
|
|
if clean.is_absolute() or ".." in clean.parts:
|
|
|
|
|
|
raise PermissionError("invalid path")
|
|
|
|
|
|
target = (batch_dir / clean).resolve()
|
|
|
|
|
|
if not target.is_file() or not target.is_relative_to(batch_dir.resolve()):
|
|
|
|
|
|
raise FileNotFoundError("media not found")
|
|
|
|
|
|
return target
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_annotation(campaign_id: str, task_id: str) -> dict[str, Any]:
|
|
|
|
|
|
with session_scope() as db:
|
|
|
|
|
|
camp = db.get(LabelingCampaign, campaign_id)
|
|
|
|
|
|
if not camp:
|
|
|
|
|
|
raise FileNotFoundError("campaign not found")
|
|
|
|
|
|
batch_dir = resolve_campaign_batch_dir(camp)
|
|
|
|
|
|
path = _annotations_dir(batch_dir) / f"{task_id}.json"
|
|
|
|
|
|
if not path.is_file():
|
|
|
|
|
|
return {"task_id": task_id, "result": None, "annotations": []}
|
|
|
|
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_annotation(
|
|
|
|
|
|
campaign_id: str,
|
|
|
|
|
|
task_id: str,
|
|
|
|
|
|
payload: dict[str, Any],
|
|
|
|
|
|
*,
|
|
|
|
|
|
user: User | None = None,
|
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
|
from as_platform.labeling.progress import assert_can_save_task, mark_task_completed
|
|
|
|
|
|
|
|
|
|
|
|
if user:
|
|
|
|
|
|
assert_can_save_task(campaign_id, task_id, user)
|
|
|
|
|
|
with session_scope() as db:
|
|
|
|
|
|
camp = db.get(LabelingCampaign, campaign_id)
|
|
|
|
|
|
if not camp:
|
|
|
|
|
|
raise FileNotFoundError("campaign not found")
|
|
|
|
|
|
batch_dir = resolve_campaign_batch_dir(camp)
|
|
|
|
|
|
path = _annotations_dir(batch_dir) / f"{task_id}.json"
|
|
|
|
|
|
now = datetime.now(timezone.utc).isoformat()
|
2026-06-15 17:25:28 +08:00
|
|
|
|
extra: dict[str, Any] = {"source": "hsap", "saved_at": now}
|
2026-06-03 11:40:21 +08:00
|
|
|
|
if user:
|
|
|
|
|
|
extra["completed_by_user_id"] = user.id
|
|
|
|
|
|
extra["completed_at"] = now
|
|
|
|
|
|
out = {"task_id": task_id, **payload, **extra}
|
|
|
|
|
|
path.write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
|
|
|
|
if user and _annotation_has_result(path):
|
|
|
|
|
|
mark_task_completed(campaign_id, task_id, user.id)
|
|
|
|
|
|
return {"ok": True, "path": str(path)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _annotation_has_result(path: Path) -> bool:
|
|
|
|
|
|
if not path.is_file():
|
|
|
|
|
|
return False
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|
|
|
|
|
except (json.JSONDecodeError, OSError):
|
|
|
|
|
|
return False
|
|
|
|
|
|
result = data.get("result")
|
|
|
|
|
|
if result is None:
|
|
|
|
|
|
return False
|
|
|
|
|
|
if isinstance(result, list):
|
|
|
|
|
|
return len(result) > 0
|
|
|
|
|
|
if isinstance(result, dict):
|
|
|
|
|
|
return len(result) > 0
|
|
|
|
|
|
return bool(result)
|