feat: Unified Ingest SDK for DMS/ADAS promote, cuboid export and 3D fit

Replace subprocess build with promote_batch SDK, add ADAS cuboid export/fit/validate pipeline, stage normalization, and offline unit tests wired into smoke_labeling_api.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-06-16 09:58:35 +08:00
parent bc653d53a1
commit 0b8ade048e
42 changed files with 2074 additions and 104 deletions

View File

@@ -1,6 +1,7 @@
"""同步 inbox/sources 批次 batch.meta.yaml 的 stage与 Campaign 状态一致。"""
from __future__ import annotations
import json
from pathlib import Path
from as_platform.data.batch import read_meta, write_meta
@@ -18,6 +19,22 @@ def batch_has_yolo_labels(batch_dir: Path) -> bool:
return False
def batch_has_cuboid_labels(batch_dir: Path) -> bool:
"""批次是否已有导出的 ADAS quaternion_json含非空 detections"""
qdir = batch_dir / "labels" / "quaternion_json"
if not qdir.is_dir():
return False
for p in qdir.glob("*.json"):
try:
data = json.loads(p.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
dets = data.get("detections") or []
if isinstance(dets, list) and len(dets) > 0:
return True
return False
def batch_has_lane_labels(batch_dir: Path) -> bool:
"""批次是否已有 UFLD mask 清单list/train_gt.txt + annotations/*.png"""
list_path = batch_dir / "list" / "train_gt.txt"
@@ -29,6 +46,14 @@ def batch_has_lane_labels(batch_dir: Path) -> bool:
return any(ann_dir.rglob("*.png"))
def _batch_has_export_labels(project: str, batch_dir: Path) -> bool:
if project == "lane":
return batch_has_lane_labels(batch_dir)
if project == "adas":
return batch_has_cuboid_labels(batch_dir)
return batch_has_yolo_labels(batch_dir)
def update_campaign_batch_meta_stage(camp: LabelingCampaign, stage: str) -> bool:
try:
batch_dir = resolve_campaign_batch_dir(camp)
@@ -56,8 +81,23 @@ def update_campaign_batch_meta_stage_by_id(campaign_id: str, stage: str) -> bool
return update_campaign_batch_meta_stage(camp, stage)
def _advance_campaign_stage(campaign_id: str, stage: str) -> None:
with session_scope() as db:
camp = db.get(LabelingCampaign, str(campaign_id))
if not camp:
return
camp.status = stage
db.flush()
update_campaign_batch_meta_stage(camp, stage)
def _batch_has_calib(batch_dir: Path) -> bool:
calib = batch_dir / "calib"
return calib.is_dir() and bool(list(calib.glob("*.yaml")) + list(calib.glob("*.yml")))
def on_labeling_export_job_succeeded(job: dict) -> None:
"""导出 Job 成功且批次已有训练标签时进入 returned入库)。"""
"""导出 Job 成功且批次已有训练标签时进入 returned build)。"""
if job.get("action") != "labeling_export":
return
params = job.get("params") or {}
@@ -72,10 +112,46 @@ def on_labeling_export_job_succeeded(job: dict) -> None:
batch_dir = resolve_campaign_batch_dir(camp)
except Exception:
return
has_labels = (
batch_has_lane_labels(batch_dir)
if camp.project == "lane"
else batch_has_yolo_labels(batch_dir)
)
if has_labels:
update_campaign_batch_meta_stage_by_id(str(cid), "returned")
project = camp.project or "dms"
if _batch_has_export_labels(project, batch_dir):
_advance_campaign_stage(str(cid), "returned")
if project == "adas" and _batch_has_calib(batch_dir):
from as_platform.jobs.queue import enqueue_job
enqueue_job(
"cuboid_fit_3d",
{"campaign_id": str(cid)},
async_run=True,
)
def on_build_job_succeeded(job: dict) -> None:
"""build Job 成功后将批次晋升 ingested。"""
action = job.get("action")
if action not in ("build_dms", "build_adas", "build_lane"):
return
params = job.get("params") or {}
batch = params.get("batch")
if not batch:
return
project = params.get("project")
if not project:
if action == "build_adas":
project = "adas"
elif action == "build_lane":
project = "lane"
else:
project = "dms"
task = params.get("task")
with session_scope() as db:
q = db.query(LabelingCampaign).filter(LabelingCampaign.batch == str(batch))
if task:
q = q.filter(LabelingCampaign.task == str(task))
if project:
q = q.filter(LabelingCampaign.project == str(project))
camp = q.order_by(LabelingCampaign.created_at.desc()).first()
if not camp:
return
camp.status = "ingested"
db.flush()
update_campaign_batch_meta_stage(camp, "ingested")

View File

@@ -0,0 +1,74 @@
"""ADAS class_id 映射BK2/MOON 单源)。"""
from __future__ import annotations
from pathlib import Path
from typing import Any
import yaml
from as_platform.config import WORKSPACE
_ADAS_REGISTRY = WORKSPACE / "datasets" / "adas" / "adas.registry.yaml"
_LABELING_REGISTRY = WORKSPACE / "datasets" / "labeling.registry.yaml"
def load_adas_class_names() -> list[str]:
if _ADAS_REGISTRY.is_file():
reg = yaml.safe_load(_ADAS_REGISTRY.read_text(encoding="utf-8")) or {}
names = (reg.get("classes") or {}).get("names")
if names:
return [str(n) for n in names]
if _LABELING_REGISTRY.is_file():
reg = yaml.safe_load(_LABELING_REGISTRY.read_text(encoding="utf-8")) or {}
labels = (reg.get("profiles") or {}).get("cuboid_7cls", {}).get("cvat_labels")
if labels:
return [str(n) for n in labels]
from as_platform.labeling.format_converter import CUBOID_7CLS_NAMES
return list(CUBOID_7CLS_NAMES)
def class_name_to_id(name: str, class_map: dict[str, int] | None = None) -> int | None:
cmap = class_map or {n: i for i, n in enumerate(load_adas_class_names())}
if name in cmap:
return cmap[name]
low = name.lower()
for k, v in cmap.items():
if k.lower() == low:
return v
return None
def build_class_map(names: list[str] | None = None) -> dict[str, int]:
return {str(n): idx for idx, n in enumerate(names or load_adas_class_names())}
def remap_class_id(old_names: list[str], new_names: list[str], class_id: int) -> int:
if class_id < 0 or class_id >= len(old_names):
return class_id
label = old_names[class_id]
new_id = build_class_map(new_names).get(label)
if new_id is None:
for k, v in build_class_map(new_names).items():
if k.lower() == label.lower():
return v
return new_id if new_id is not None else class_id
def normalize_detection_class(det: dict[str, Any], class_map: dict[str, int] | None = None) -> dict[str, Any]:
cmap = class_map or build_class_map()
name = str(det.get("class_name") or "")
cid = det.get("class_id")
if name:
mapped = class_name_to_id(name, cmap)
if mapped is not None:
det = dict(det)
det["class_id"] = mapped
det["class_name"] = name
elif cid is not None:
names = list(cmap.keys())
idx = int(cid)
if 0 <= idx < len(names):
det = dict(det)
det["class_name"] = names[idx]
return det

View File

@@ -0,0 +1,174 @@
"""ls_annotations cuboid → labels/quaternion_json/*.jsonADAS MOON-3D 兼容格式)。"""
from __future__ import annotations
import hashlib
import json
from pathlib import Path
from typing import Any
import yaml
from as_platform.labeling.class_map import build_class_map, load_adas_class_names
from as_platform.labeling.format_converter import cuboid_item_to_detection
IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp", ".JPG", ".JPEG", ".PNG"}
ANNOTATIONS_DIRNAME = "ls_annotations"
def _load_cuboid_class_map() -> dict[str, int]:
return build_class_map(load_adas_class_names())
def _task_id_for_image(image_path: Path, batch_dir: Path) -> str:
try:
rel = image_path.relative_to(batch_dir)
stem = rel.as_posix()
except ValueError:
stem = image_path.name
return hashlib.sha256(stem.encode()).hexdigest()[:16]
def _iter_batch_images(batch_dir: Path) -> list[Path]:
if not batch_dir.is_dir():
return []
candidates: list[Path] = []
search_roots = [batch_dir / "images", batch_dir / "images" / "train", batch_dir]
seen: set[str] = set()
for root in search_roots:
if not root.is_dir():
continue
for p in sorted(root.rglob("*")):
if not p.is_file() or p.suffix not in IMG_EXTS:
continue
key = str(p.resolve())
if key in seen:
continue
seen.add(key)
candidates.append(p.resolve())
return candidates
def _extract_result_regions(data: dict[str, Any]) -> list[dict[str, Any]]:
result = data.get("result")
if isinstance(result, list) and result:
return result
annotations = data.get("annotations")
if isinstance(annotations, list) and annotations:
first = annotations[0]
if isinstance(first, dict) and isinstance(first.get("result"), list):
return first["result"]
return []
def _find_calib(batch_dir: Path) -> tuple[Path | None, list[list[float]] | None, list[int] | None]:
calib_dir = batch_dir / "calib"
if not calib_dir.is_dir():
return None, None, None
yaml_files = sorted(calib_dir.glob("*.yaml")) + sorted(calib_dir.glob("*.yml"))
if not yaml_files:
return None, None, None
path = yaml_files[0]
try:
data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
except (OSError, yaml.YAMLError):
return path, None, None
K = data.get("K")
image_size = data.get("image_size")
if K and isinstance(K, list) and len(K) == 3:
return path, K, list(image_size) if image_size else None
fx = data.get("fx")
fy = data.get("fy")
cx = data.get("cx")
cy = data.get("cy")
if fx is not None and fy is not None and cx is not None and cy is not None:
K = [[float(fx), 0.0, float(cx)], [0.0, float(fy), float(cy)], [0.0, 0.0, 1.0]]
return path, K, list(image_size) if image_size else None
return path, None, list(image_size) if image_size else None
def _resolve_image_for_ann(data: dict[str, Any], batch_dir: Path, task_id: str) -> Path | None:
image_name = data.get("image")
if image_name:
for root in (batch_dir / "images", batch_dir):
candidate = root / str(image_name)
if candidate.is_file():
return candidate
for p in root.rglob(str(image_name)):
if p.is_file():
return p
for image_path in _iter_batch_images(batch_dir):
if _task_id_for_image(image_path, batch_dir) == task_id:
return image_path
return None
def export_batch(batch_dir: Path) -> dict[str, Any]:
"""导出 cuboid ls_annotations → quaternion_json。"""
batch_dir = batch_dir.resolve()
class_map = _load_cuboid_class_map()
calib_path, K, calib_size = _find_calib(batch_dir)
ann_dir = batch_dir / "labels" / ANNOTATIONS_DIRNAME
out_dir = batch_dir / "labels" / "quaternion_json"
out_dir.mkdir(parents=True, exist_ok=True)
written = 0
skipped_empty = 0
missing_ann = 0
for ann_path in sorted(ann_dir.glob("*.json")):
task_id = ann_path.stem
try:
data = json.loads(ann_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
missing_ann += 1
continue
regions = _extract_result_regions(data)
cuboids = [r for r in regions if r.get("type") == "cuboid"]
if not cuboids:
skipped_empty += 1
continue
image_path = _resolve_image_for_ann(data, batch_dir, task_id)
if not image_path:
missing_ann += 1
continue
detections: list[dict[str, Any]] = []
for item in cuboids:
det = cuboid_item_to_detection(item, class_map, K=K)
if det:
detections.append(det)
if not detections:
skipped_empty += 1
continue
img_w = int((cuboids[0].get("original_width") or (calib_size or [1920, 1080])[0]))
img_h = int((cuboids[0].get("original_height") or (calib_size or [1920, 1080])[1]))
payload: dict[str, Any] = {
"image": str(image_path),
"image_stem": image_path.stem,
"image_size": [img_w, img_h],
"coordinate_frame": "opencv_camera",
"boxes3d_format": "center_3d + dimensions_wlh + quaternion_wxyz",
"text_prompts": load_adas_class_names(),
"num_detections": len(detections),
"detections": detections,
}
if K:
payload["K"] = K
payload["k_source"] = calib_path.name if calib_path else "fixed_calib"
else:
payload["k_source"] = "missing_calib"
out_path = out_dir / f"{image_path.stem}.json"
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
written += 1
return {
"written": written,
"skipped_empty": skipped_empty,
"missing_ann": missing_ann,
"missing_calib": calib_path is None or K is None,
"calib": str(calib_path) if calib_path else None,
}

View File

@@ -0,0 +1,95 @@
"""Batch-level cuboid 3D fit for quaternion_json."""
from __future__ import annotations
import hashlib
import json
from pathlib import Path
from typing import Any
IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}
def _task_id_for_image(image_path: Path, batch_dir: Path) -> str:
try:
rel = image_path.relative_to(batch_dir)
stem = rel.as_posix()
except ValueError:
stem = image_path.name
return hashlib.sha256(stem.encode()).hexdigest()[:16]
def _load_ls_cuboid_points(batch_dir: Path, stem: str) -> list[list[float]]:
ann_dir = batch_dir / "labels" / "ls_annotations"
if not ann_dir.is_dir():
return []
for p in ann_dir.glob("*.json"):
try:
data = json.loads(p.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
img = str(data.get("image") or "")
if stem in img or p.stem:
regions = data.get("result") or []
pts_list = []
for r in regions:
if r.get("type") != "cuboid":
continue
pts = list(r.get("points") or [])
if len(pts) >= 16:
pts_list.append(pts[:16])
if pts_list:
return pts_list
return []
def fit_batch(batch_dir: Path) -> dict[str, Any]:
from algorithms.adas_mono3d.fit_cuboid import fit_cuboid_detection
batch_dir = batch_dir.resolve()
qdir = batch_dir / "labels" / "quaternion_json"
if not qdir.is_dir():
raise ValueError(f"missing {qdir}")
updated = 0
fit_ok = 0
total = 0
for p in sorted(qdir.glob("*.json")):
data = json.loads(p.read_text(encoding="utf-8"))
K = data.get("K")
if not K:
continue
stem = data.get("image_stem") or p.stem
cuboid_pts_list = _load_ls_cuboid_points(batch_dir, stem)
new_dets = []
for i, det in enumerate(data.get("detections") or []):
det = dict(det)
if det.get("fit_ok"):
new_dets.append(det)
total += 1
fit_ok += 1
continue
class_name = str(det.get("class_name") or "car")
points = cuboid_pts_list[i] if i < len(cuboid_pts_list) else None
if not points:
box = det.get("box2d_xyxy") or []
if len(box) >= 4:
x1, y1, x2, y2 = box[:4]
points = [x1, y1, x2, y1, x1, y2, x2, y2, x1, y1, x2, y1, x1, y2, x2, y2]
if points:
fitted = fit_cuboid_detection(points, K, class_name)
det.update({k: v for k, v in fitted.items() if k != "box2d_xyxy" or "box2d_xyxy" not in det})
new_dets.append(det)
total += 1
if det.get("fit_ok"):
fit_ok += 1
data["detections"] = new_dets
data["num_detections"] = len(new_dets)
p.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
updated += 1
return {
"updated_files": updated,
"detections": total,
"fit_ok": fit_ok,
"fit_ok_ratio": fit_ok / max(total, 1),
}

View File

@@ -491,6 +491,71 @@ def convert_cvat_kitti_export_to_hsap(kitti_data: bytes, output_dir: Path) -> in
return count
# ═══════════════════════════════════════════════════════
# CVAT cuboid 16pt → HSAP quaternion_json detection (MVP)
# ═══════════════════════════════════════════════════════
CUBOID_7CLS_NAMES = [
"pedestrian",
"car",
"truck",
"bus",
"motorcycle",
"tricycle",
"traffic cone",
]
def cuboid_points_to_box2d(points: list[float]) -> list[float] | None:
"""从 CVAT cuboid 16 点8 个 x,y 对)计算 axis-aligned 2D bbox。"""
if len(points) < 16:
return None
xs = [float(points[i]) for i in range(0, 16, 2)]
ys = [float(points[i]) for i in range(1, 16, 2)]
return [min(xs), min(ys), max(xs), max(ys)]
def cuboid_item_to_detection(
item: dict[str, Any],
class_map: dict[str, int],
*,
K: list[list[float]] | None = None,
) -> dict[str, Any] | None:
"""ls_annotations cuboid 条目 → quaternion_json detectionMVP2D bbox + 可选 3D 占位)。"""
label = str(item.get("label") or "")
class_id = class_map.get(label)
if class_id is None:
for name, cid in class_map.items():
if name.lower() == label.lower():
class_id = cid
break
if class_id is None:
return None
points = item.get("points") or []
if len(points) < 16:
for key in (
"xtl1", "ytl1", "xtr1", "ytr1", "xbl1", "ybl1", "xbr1", "ybr1",
"xtl2", "ytl2", "xtr2", "ytr2", "xbl2", "ybl2", "xbr2", "ybr2",
):
if key in item:
points.append(float(item[key]))
box2d = cuboid_points_to_box2d(points)
if not box2d:
return None
det: dict[str, Any] = {
"class_id": class_id,
"class_name": label,
"score": 1.0,
"box2d_xyxy": box2d,
"fit_ok": False,
}
if K:
det["K_used"] = True
return det
# ═══════════════════════════════════════════════════════
# ADAS 3D Quaternion JSON → CVAT cuboid XML
# ═══════════════════════════════════════════════════════

View File

@@ -18,6 +18,7 @@ from as_platform.labeling.batch_stage import (
on_labeling_export_job_succeeded,
update_campaign_batch_meta_stage,
)
from as_platform.labeling.stage import effective_stage, matches_stage_filter
from as_platform.labeling.scope import (
enrich_batch_labels,
format_scope_key,
@@ -120,11 +121,14 @@ def list_labeling_batches(
def _append(b: dict[str, Any]) -> None:
if b.get("registry_only"):
return
if stage and b.get("stage") != stage:
raw_stage = b.get("stage")
eff = effective_stage(raw_stage)
if stage and not matches_stage_filter(raw_stage, stage):
return
if b.get("stage") not in allowed_stages:
if eff not in allowed_stages and raw_stage not in allowed_stages:
return
row = enrich_batch_labels(b, reg)
row["stage"] = eff or raw_stage
cid = _campaign_id(
row["project"], row.get("task") or "", row.get("mode"), row["batch"], row.get("location") or "inbox"
)
@@ -470,6 +474,48 @@ def trigger_labeling_export(campaign_id: str) -> dict[str, Any]:
return {"ok": True, "job": job, "export_job": ej, "export_default": row.get("export_default")}
def get_batch_export_stats(campaign_id: str) -> dict[str, Any]:
from as_platform.labeling.annotate import resolve_campaign_batch_dir
from as_platform.data.promote.validate.adas_cuboid import validate_adas_cuboid_batch
from as_platform.labeling.batch_stage import batch_has_cuboid_labels, batch_has_yolo_labels
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
project = camp.project
batch_dir = resolve_campaign_batch_dir(camp)
if project == "adas":
_errors, warnings, stats = validate_adas_cuboid_batch(batch_dir, allow_partial_3d=True)
calib = (batch_dir / "calib").is_dir() and bool(list((batch_dir / "calib").glob("*.yaml")))
return {
"project": "adas",
"campaign_id": campaign_id,
"pack_default": "adas_moon3d_v1",
"quaternion_files": stats.get("quaternion_files", 0),
"fit_ok_ratio": stats.get("fit_ok_ratio", 0),
"missing_calib": not calib,
"stats": stats,
"warnings": warnings,
}
return {
"project": project,
"campaign_id": campaign_id,
"has_yolo": batch_has_yolo_labels(batch_dir),
"has_cuboid": batch_has_cuboid_labels(batch_dir),
}
def trigger_cuboid_fit(campaign_id: str) -> dict[str, Any]:
row = get_campaign(campaign_id)
if not row:
raise FileNotFoundError("campaign not found")
if row.get("project") != "adas":
raise ValueError("cuboid_fit_3d 仅适用于 ADAS")
job = enqueue_job("cuboid_fit_3d", {"campaign_id": campaign_id}, async_run=True)
return {"ok": True, "job": job}
# ═══════════════════════════════════════════════════════
# CVAT 集成辅助
# ═══════════════════════════════════════════════════════

View File

@@ -0,0 +1,29 @@
"""标注批次 stage 读时归一化(兼容旧 pipeline"""
from __future__ import annotations
STAGE_ALIASES: dict[str, str] = {
"review_approved": "labeling_submitted",
}
CANONICAL_STAGES = (
"raw_pool",
"out_for_labeling",
"in_review",
"review_rejected",
"labeling_submitted",
"returned",
"ingested",
)
def effective_stage(stage: str | None) -> str | None:
if not stage:
return stage
return STAGE_ALIASES.get(stage, stage)
def matches_stage_filter(batch_stage: str | None, filter_stage: str | None) -> bool:
if not filter_stage:
return True
eff = effective_stage(batch_stage)
return eff == filter_stage or batch_stage == filter_stage