Files
Chengfang Lu 672ef61e17 feat: CVAT 标注引擎、我的标注收件箱与 ADAS Cuboid 送标
- 统一标注引擎为 CVAT:客户端/配置/格式转换、iframe 标注页、docker-compose.cvat.yml 与 no_auth 补丁
- 移除 Label Studio 相关配置与构建脚本,清理 embedded.bak 备份与误提交的 node_modules
- 新增「我的标注」:跨 Campaign 收件箱、逐张清单、CVAT frame 跳转
- 飞书任务分配:通讯录同步选人、按量分配、分配后 DM 通知(含 my-tasks 链接)
- ADAS cuboid_7cls 数据湖接入:workflow 路径、register-batch、开标上传与标注同步
- 数据湖挂载 AS_DATA_LAKE_ROOT、datasets/adas 符号链接、reset_labeling 运维脚本
- 补充 docs/HANDOVER.md 项目交接文档

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-15 17:25:28 +08:00

257 lines
9.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""标注数据湖:批次目录、任务列表、标注 JSON、媒体文件CVAT 为唯一标注引擎)。"""
from __future__ import annotations
import hashlib
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from urllib.parse import quote
from as_platform.data.batch import IMG_EXTS
from as_platform.data.core import load_wf, proj_root, resolve_pack_dir
from as_platform.db.engine import session_scope
from as_platform.db.models import LabelingCampaign, User
from as_platform.labeling.scope import enrich_batch_labels, load_dms_registry
# 历史目录名保留,导出脚本仍读取 labels/ls_annotations/
ANNOTATIONS_DIRNAME = "ls_annotations"
def _load_campaign(campaign_id: str) -> LabelingCampaign | None:
with session_scope() as db:
return db.get(LabelingCampaign, campaign_id)
def resolve_campaign_batch_dir(camp: LabelingCampaign) -> Path:
wf = load_wf()
root = proj_root(wf, camp.project)
if camp.project == "dms":
import yaml
reg = yaml.safe_load((root / wf["projects"]["dms"]["registry"]).read_text(encoding="utf-8"))
tcfg = reg["tasks"][camp.task]
if camp.location == "sources":
if not camp.pack:
raise ValueError("sources 批次需要 pack")
pack_dir = resolve_pack_dir("dms", root, wf, camp.pack)
src_sub = (reg.get("ingest") or {}).get("sources_subdir", "sources")
return (pack_dir / tcfg["task_dir"] / src_sub / camp.batch).resolve()
if tcfg.get("type") == "multi" and camp.mode:
from as_platform.labeling.scope import _dms_registry_api
get_mode_config, resolve_task_id, _ = _dms_registry_api()
task_r, mode_r = resolve_task_id(camp.task, camp.mode)
mcfg = get_mode_config(task_r, mode_r, reg)
inbox_rel = mcfg.get("inbox")
if inbox_rel:
return (root / inbox_rel).resolve()
mode = camp.mode
if mode:
return (root / "inbox" / camp.task / mode / camp.batch).resolve()
return (root / "inbox" / camp.task / camp.batch).resolve()
if camp.project == "adas":
if not camp.task:
raise ValueError("adas campaign 需要 task")
return (root / "inbox" / camp.task / camp.batch).resolve()
if camp.location == "pack" and camp.pack:
try:
from as_platform.data.core import resolve_pack
rel = resolve_pack("lane", root, wf, camp.pack)
return (root / rel).resolve()
except ValueError:
return (root / camp.pack).resolve()
return (root / "inbox" / camp.batch).resolve()
def _iter_batch_images(batch_dir: Path) -> list[Path]:
if not batch_dir.is_dir():
return []
candidates: list[Path] = []
search_roots = [
batch_dir / "images",
batch_dir / "images" / "train",
batch_dir,
]
seen: set[str] = set()
for root in search_roots:
if not root.is_dir():
continue
for p in sorted(root.rglob("*")):
if not p.is_file() or p.suffix not in IMG_EXTS:
continue
key = str(p.resolve())
if key in seen:
continue
seen.add(key)
candidates.append(p.resolve())
return candidates
def _task_id_for_image(image_path: Path, batch_dir: Path) -> str:
try:
rel = image_path.relative_to(batch_dir)
stem = rel.as_posix()
except ValueError:
stem = image_path.stem
return hashlib.sha256(stem.encode()).hexdigest()[:16]
def _annotations_dir(batch_dir: Path) -> Path:
d = batch_dir / "labels" / ANNOTATIONS_DIRNAME
d.mkdir(parents=True, exist_ok=True)
return d
def campaign_bootstrap(campaign_id: str) -> dict[str, Any]:
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
reg = load_dms_registry() if camp.project == "dms" else None
row = enrich_batch_labels(camp.to_dict(), reg)
try:
batch_dir = resolve_campaign_batch_dir(camp)
row["batch_path"] = str(batch_dir)
row["image_count"] = len(_iter_batch_images(batch_dir))
except Exception as e:
row["batch_path"] = None
row["image_count"] = 0
row["batch_error"] = str(e)
row["editor"] = "cvat"
row["cvat_task_id"] = camp.cvat_task_id
row["cvat_job_url"] = camp.cvat_job_url
return row
def campaign_tasks(
campaign_id: str,
*,
offset: int = 0,
limit: int = 50,
user: User | None = None,
assignee: str | None = None,
) -> dict[str, Any]:
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
batch_dir = resolve_campaign_batch_dir(camp)
images = _iter_batch_images(batch_dir)
from as_platform.labeling.progress import get_assigned_task_ids, user_is_coordinator
filter_ids: set[str] | None = None
if assignee == "me" and user:
filter_ids = get_assigned_task_ids(campaign_id, user.id)
if not filter_ids and not user_is_coordinator(user):
return {
"tasks": [],
"total": 0,
"offset": offset,
"limit": limit,
"hint": "暂无分配给您的任务,请联系协调员在送标工作台均分任务",
}
if filter_ids is not None:
filtered = [img for img in images if _task_id_for_image(img, batch_dir) in filter_ids]
images = filtered
total = len(images)
slice_imgs = images[offset : offset + limit]
tasks: list[dict[str, Any]] = []
for img in slice_imgs:
tid = _task_id_for_image(img, batch_dir)
try:
rel = img.relative_to(batch_dir).as_posix()
except ValueError:
rel = img.name
media_path = quote(rel, safe="/")
tasks.append(
{
"id": tid,
"data": {
"image": f"/api/v1/labeling/media/{campaign_id}/{media_path}",
},
"meta": {"filename": img.name, "relative_path": rel},
}
)
out: dict[str, Any] = {"tasks": tasks, "total": total, "offset": offset, "limit": limit}
if filter_ids is not None and user and assignee == "me":
out["my_assigned"] = len(filter_ids)
return out
def resolve_media_file(campaign_id: str, rel_path: str) -> Path:
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
batch_dir = resolve_campaign_batch_dir(camp)
clean = Path(rel_path)
if clean.is_absolute() or ".." in clean.parts:
raise PermissionError("invalid path")
target = (batch_dir / clean).resolve()
if not target.is_file() or not target.is_relative_to(batch_dir.resolve()):
raise FileNotFoundError("media not found")
return target
def get_annotation(campaign_id: str, task_id: str) -> dict[str, Any]:
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
batch_dir = resolve_campaign_batch_dir(camp)
path = _annotations_dir(batch_dir) / f"{task_id}.json"
if not path.is_file():
return {"task_id": task_id, "result": None, "annotations": []}
data = json.loads(path.read_text(encoding="utf-8"))
return data
def save_annotation(
campaign_id: str,
task_id: str,
payload: dict[str, Any],
*,
user: User | None = None,
) -> dict[str, Any]:
from as_platform.labeling.progress import assert_can_save_task, mark_task_completed
if user:
assert_can_save_task(campaign_id, task_id, user)
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
batch_dir = resolve_campaign_batch_dir(camp)
path = _annotations_dir(batch_dir) / f"{task_id}.json"
now = datetime.now(timezone.utc).isoformat()
extra: dict[str, Any] = {"source": "hsap", "saved_at": now}
if user:
extra["completed_by_user_id"] = user.id
extra["completed_at"] = now
out = {"task_id": task_id, **payload, **extra}
path.write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
if user and _annotation_has_result(path):
mark_task_completed(campaign_id, task_id, user.id)
return {"ok": True, "path": str(path)}
def _annotation_has_result(path: Path) -> bool:
if not path.is_file():
return False
try:
data = json.loads(path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return False
result = data.get("result")
if result is None:
return False
if isinstance(result, list):
return len(result) > 0
if isinstance(result, dict):
return len(result) > 0
return bool(result)