- 统一标注引擎为 CVAT:客户端/配置/格式转换、iframe 标注页、docker-compose.cvat.yml 与 no_auth 补丁 - 移除 Label Studio 相关配置与构建脚本,清理 embedded.bak 备份与误提交的 node_modules - 新增「我的标注」:跨 Campaign 收件箱、逐张清单、CVAT frame 跳转 - 飞书任务分配:通讯录同步选人、按量分配、分配后 DM 通知(含 my-tasks 链接) - ADAS cuboid_7cls 数据湖接入:workflow 路径、register-batch、开标上传与标注同步 - 数据湖挂载 AS_DATA_LAKE_ROOT、datasets/adas 符号链接、reset_labeling 运维脚本 - 补充 docs/HANDOVER.md 项目交接文档 Co-authored-by: Cursor <cursoragent@cursor.com>
257 lines
9.0 KiB
Python
257 lines
9.0 KiB
Python
"""标注数据湖:批次目录、任务列表、标注 JSON、媒体文件(CVAT 为唯一标注引擎)。"""
|
||
from __future__ import annotations
|
||
|
||
import hashlib
|
||
import json
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
from urllib.parse import quote
|
||
|
||
from as_platform.data.batch import IMG_EXTS
|
||
from as_platform.data.core import load_wf, proj_root, resolve_pack_dir
|
||
from as_platform.db.engine import session_scope
|
||
from as_platform.db.models import LabelingCampaign, User
|
||
from as_platform.labeling.scope import enrich_batch_labels, load_dms_registry
|
||
|
||
# 历史目录名保留,导出脚本仍读取 labels/ls_annotations/
|
||
ANNOTATIONS_DIRNAME = "ls_annotations"
|
||
|
||
|
||
def _load_campaign(campaign_id: str) -> LabelingCampaign | None:
|
||
with session_scope() as db:
|
||
return db.get(LabelingCampaign, campaign_id)
|
||
|
||
|
||
def resolve_campaign_batch_dir(camp: LabelingCampaign) -> Path:
|
||
wf = load_wf()
|
||
root = proj_root(wf, camp.project)
|
||
if camp.project == "dms":
|
||
import yaml
|
||
|
||
reg = yaml.safe_load((root / wf["projects"]["dms"]["registry"]).read_text(encoding="utf-8"))
|
||
tcfg = reg["tasks"][camp.task]
|
||
if camp.location == "sources":
|
||
if not camp.pack:
|
||
raise ValueError("sources 批次需要 pack")
|
||
pack_dir = resolve_pack_dir("dms", root, wf, camp.pack)
|
||
src_sub = (reg.get("ingest") or {}).get("sources_subdir", "sources")
|
||
return (pack_dir / tcfg["task_dir"] / src_sub / camp.batch).resolve()
|
||
if tcfg.get("type") == "multi" and camp.mode:
|
||
from as_platform.labeling.scope import _dms_registry_api
|
||
|
||
get_mode_config, resolve_task_id, _ = _dms_registry_api()
|
||
task_r, mode_r = resolve_task_id(camp.task, camp.mode)
|
||
mcfg = get_mode_config(task_r, mode_r, reg)
|
||
inbox_rel = mcfg.get("inbox")
|
||
if inbox_rel:
|
||
return (root / inbox_rel).resolve()
|
||
mode = camp.mode
|
||
if mode:
|
||
return (root / "inbox" / camp.task / mode / camp.batch).resolve()
|
||
return (root / "inbox" / camp.task / camp.batch).resolve()
|
||
if camp.project == "adas":
|
||
if not camp.task:
|
||
raise ValueError("adas campaign 需要 task")
|
||
return (root / "inbox" / camp.task / camp.batch).resolve()
|
||
if camp.location == "pack" and camp.pack:
|
||
try:
|
||
from as_platform.data.core import resolve_pack
|
||
|
||
rel = resolve_pack("lane", root, wf, camp.pack)
|
||
return (root / rel).resolve()
|
||
except ValueError:
|
||
return (root / camp.pack).resolve()
|
||
return (root / "inbox" / camp.batch).resolve()
|
||
|
||
|
||
def _iter_batch_images(batch_dir: Path) -> list[Path]:
|
||
if not batch_dir.is_dir():
|
||
return []
|
||
candidates: list[Path] = []
|
||
search_roots = [
|
||
batch_dir / "images",
|
||
batch_dir / "images" / "train",
|
||
batch_dir,
|
||
]
|
||
seen: set[str] = set()
|
||
for root in search_roots:
|
||
if not root.is_dir():
|
||
continue
|
||
for p in sorted(root.rglob("*")):
|
||
if not p.is_file() or p.suffix not in IMG_EXTS:
|
||
continue
|
||
key = str(p.resolve())
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
candidates.append(p.resolve())
|
||
return candidates
|
||
|
||
|
||
def _task_id_for_image(image_path: Path, batch_dir: Path) -> str:
|
||
try:
|
||
rel = image_path.relative_to(batch_dir)
|
||
stem = rel.as_posix()
|
||
except ValueError:
|
||
stem = image_path.stem
|
||
return hashlib.sha256(stem.encode()).hexdigest()[:16]
|
||
|
||
|
||
def _annotations_dir(batch_dir: Path) -> Path:
|
||
d = batch_dir / "labels" / ANNOTATIONS_DIRNAME
|
||
d.mkdir(parents=True, exist_ok=True)
|
||
return d
|
||
|
||
|
||
def campaign_bootstrap(campaign_id: str) -> dict[str, Any]:
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
raise FileNotFoundError("campaign not found")
|
||
reg = load_dms_registry() if camp.project == "dms" else None
|
||
row = enrich_batch_labels(camp.to_dict(), reg)
|
||
try:
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
row["batch_path"] = str(batch_dir)
|
||
row["image_count"] = len(_iter_batch_images(batch_dir))
|
||
except Exception as e:
|
||
row["batch_path"] = None
|
||
row["image_count"] = 0
|
||
row["batch_error"] = str(e)
|
||
row["editor"] = "cvat"
|
||
row["cvat_task_id"] = camp.cvat_task_id
|
||
row["cvat_job_url"] = camp.cvat_job_url
|
||
return row
|
||
|
||
|
||
def campaign_tasks(
|
||
campaign_id: str,
|
||
*,
|
||
offset: int = 0,
|
||
limit: int = 50,
|
||
user: User | None = None,
|
||
assignee: str | None = None,
|
||
) -> dict[str, Any]:
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
raise FileNotFoundError("campaign not found")
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
images = _iter_batch_images(batch_dir)
|
||
from as_platform.labeling.progress import get_assigned_task_ids, user_is_coordinator
|
||
|
||
filter_ids: set[str] | None = None
|
||
if assignee == "me" and user:
|
||
filter_ids = get_assigned_task_ids(campaign_id, user.id)
|
||
if not filter_ids and not user_is_coordinator(user):
|
||
return {
|
||
"tasks": [],
|
||
"total": 0,
|
||
"offset": offset,
|
||
"limit": limit,
|
||
"hint": "暂无分配给您的任务,请联系协调员在送标工作台均分任务",
|
||
}
|
||
|
||
if filter_ids is not None:
|
||
filtered = [img for img in images if _task_id_for_image(img, batch_dir) in filter_ids]
|
||
images = filtered
|
||
|
||
total = len(images)
|
||
slice_imgs = images[offset : offset + limit]
|
||
tasks: list[dict[str, Any]] = []
|
||
for img in slice_imgs:
|
||
tid = _task_id_for_image(img, batch_dir)
|
||
try:
|
||
rel = img.relative_to(batch_dir).as_posix()
|
||
except ValueError:
|
||
rel = img.name
|
||
media_path = quote(rel, safe="/")
|
||
tasks.append(
|
||
{
|
||
"id": tid,
|
||
"data": {
|
||
"image": f"/api/v1/labeling/media/{campaign_id}/{media_path}",
|
||
},
|
||
"meta": {"filename": img.name, "relative_path": rel},
|
||
}
|
||
)
|
||
out: dict[str, Any] = {"tasks": tasks, "total": total, "offset": offset, "limit": limit}
|
||
if filter_ids is not None and user and assignee == "me":
|
||
out["my_assigned"] = len(filter_ids)
|
||
return out
|
||
|
||
|
||
def resolve_media_file(campaign_id: str, rel_path: str) -> Path:
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
raise FileNotFoundError("campaign not found")
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
clean = Path(rel_path)
|
||
if clean.is_absolute() or ".." in clean.parts:
|
||
raise PermissionError("invalid path")
|
||
target = (batch_dir / clean).resolve()
|
||
if not target.is_file() or not target.is_relative_to(batch_dir.resolve()):
|
||
raise FileNotFoundError("media not found")
|
||
return target
|
||
|
||
|
||
def get_annotation(campaign_id: str, task_id: str) -> dict[str, Any]:
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
raise FileNotFoundError("campaign not found")
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
path = _annotations_dir(batch_dir) / f"{task_id}.json"
|
||
if not path.is_file():
|
||
return {"task_id": task_id, "result": None, "annotations": []}
|
||
data = json.loads(path.read_text(encoding="utf-8"))
|
||
return data
|
||
|
||
|
||
def save_annotation(
|
||
campaign_id: str,
|
||
task_id: str,
|
||
payload: dict[str, Any],
|
||
*,
|
||
user: User | None = None,
|
||
) -> dict[str, Any]:
|
||
from as_platform.labeling.progress import assert_can_save_task, mark_task_completed
|
||
|
||
if user:
|
||
assert_can_save_task(campaign_id, task_id, user)
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
raise FileNotFoundError("campaign not found")
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
path = _annotations_dir(batch_dir) / f"{task_id}.json"
|
||
now = datetime.now(timezone.utc).isoformat()
|
||
extra: dict[str, Any] = {"source": "hsap", "saved_at": now}
|
||
if user:
|
||
extra["completed_by_user_id"] = user.id
|
||
extra["completed_at"] = now
|
||
out = {"task_id": task_id, **payload, **extra}
|
||
path.write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
if user and _annotation_has_result(path):
|
||
mark_task_completed(campaign_id, task_id, user.id)
|
||
return {"ok": True, "path": str(path)}
|
||
|
||
|
||
def _annotation_has_result(path: Path) -> bool:
|
||
if not path.is_file():
|
||
return False
|
||
try:
|
||
data = json.loads(path.read_text(encoding="utf-8"))
|
||
except (json.JSONDecodeError, OSError):
|
||
return False
|
||
result = data.get("result")
|
||
if result is None:
|
||
return False
|
||
if isinstance(result, list):
|
||
return len(result) > 0
|
||
if isinstance(result, dict):
|
||
return len(result) > 0
|
||
return bool(result)
|