feat: Unified Ingest SDK for DMS/ADAS promote, cuboid export and 3D fit

Replace subprocess build with promote_batch SDK, add ADAS cuboid export/fit/validate pipeline, stage normalization, and offline unit tests wired into smoke_labeling_api.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-06-16 09:58:35 +08:00
parent bc653d53a1
commit 0b8ade048e
42 changed files with 2074 additions and 104 deletions

View File

@@ -0,0 +1,11 @@
from as_platform.data.promote.base import PackPromoteAdapter, PromoteContext, PromoteResult
from as_platform.data.promote.registry import get_promote_adapter
from as_platform.data.promote.runner import promote_batch
__all__ = [
"PackPromoteAdapter",
"PromoteContext",
"PromoteResult",
"get_promote_adapter",
"promote_batch",
]

View File

@@ -0,0 +1,152 @@
"""ADAS cuboid MOON-3D pack promote adapter."""
from __future__ import annotations
import json
import os
import shutil
from datetime import datetime, timezone
from pathlib import Path
from as_platform.data.batch import read_meta, write_meta
from as_platform.data.promote.base import PackPromoteAdapter, PromoteContext, PromoteResult
from as_platform.data.promote.manifest import refresh_adas_lists
from as_platform.data.promote.validate.adas_cuboid import validate_adas_cuboid_batch
from as_platform.labeling.class_map import build_class_map, load_adas_class_names, normalize_detection_class
IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}
def _link_or_copy(src: Path, dst: Path, *, copy: bool = False) -> None:
dst.parent.mkdir(parents=True, exist_ok=True)
if dst.exists() or dst.is_symlink():
dst.unlink()
if copy:
if src.is_dir():
shutil.copytree(src, dst, dirs_exist_ok=True)
else:
shutil.copy2(src, dst)
return
try:
os.link(src, dst)
except OSError:
if src.is_dir():
shutil.copytree(src, dst, dirs_exist_ok=True)
else:
shutil.copy2(src, dst)
def _sync_tree(src: Path, dst: Path, *, copy: bool = False) -> int:
count = 0
if not src.is_dir():
return 0
for p in sorted(src.rglob("*")):
if not p.is_file():
continue
rel = p.relative_to(src)
target = dst / rel
if not target.exists():
_link_or_copy(p, target, copy=copy)
count += 1
return count
def _normalize_quaternion_json(dest_batch: Path) -> int:
qdir = dest_batch / "labels" / "quaternion_json"
if not qdir.is_dir():
return 0
cmap = build_class_map(load_adas_class_names())
names = load_adas_class_names()
updated = 0
for p in qdir.glob("*.json"):
data = json.loads(p.read_text(encoding="utf-8"))
dets = []
for det in data.get("detections") or []:
dets.append(normalize_detection_class(det, cmap))
data["detections"] = dets
data["text_prompts"] = names
data["num_detections"] = len(dets)
p.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
updated += 1
return updated
class AdasCuboidPromoteAdapter(PackPromoteAdapter):
project = "adas"
def validate(self, ctx: PromoteContext) -> list[str]:
if ctx.skip_validate:
return []
errors, warnings, _stats = validate_adas_cuboid_batch(
ctx.batch_dir,
allow_partial_3d=ctx.allow_partial_3d,
)
ctx.extra.setdefault("validate_warnings", warnings)
return errors
def promote(self, ctx: PromoteContext) -> PromoteResult:
warnings = list(ctx.extra.get("validate_warnings") or [])
qdir = ctx.batch_dir / "labels" / "quaternion_json"
if not qdir.is_dir() or not any(qdir.glob("*.json")):
return PromoteResult(
ok=False,
project=ctx.project,
task=ctx.task,
batch=ctx.batch,
pack=ctx.pack,
warnings=["missing quaternion_json export"],
)
pack_dir = ctx.project_root / "packs" / ctx.pack
dest = pack_dir / "sources" / ctx.batch
if ctx.dry_run:
return PromoteResult(
ok=True,
project=ctx.project,
task=ctx.task,
batch=ctx.batch,
pack=ctx.pack,
dest_path=str(dest),
detail={"dry_run": True},
)
if dest.exists():
shutil.rmtree(dest)
dest.mkdir(parents=True, exist_ok=True)
copied = 0
for sub in ("images", "calib", "labels"):
src_sub = ctx.batch_dir / sub
if src_sub.is_dir():
copied += _sync_tree(src_sub, dest / sub)
normalized = _normalize_quaternion_json(dest)
meta = read_meta(ctx.batch_dir) or {}
meta.update({
"stage": "ingested",
"project": ctx.project,
"task": ctx.task,
"batch": ctx.batch,
"pack": ctx.pack,
"ingested_at": datetime.now(timezone.utc).isoformat(),
"pipeline_version": 2,
})
write_meta(dest, meta)
write_meta(ctx.batch_dir, meta)
manifest = refresh_adas_lists(pack=ctx.pack)
img_count = sum(1 for _ in (dest / "images").rglob("*") if _.suffix.lower() in IMG_EXTS) if (dest / "images").is_dir() else 0
return PromoteResult(
ok=True,
project=ctx.project,
task=ctx.task,
batch=ctx.batch,
pack=ctx.pack,
dest_path=str(dest),
images=img_count,
labels=normalized,
manifest_paths=[manifest.get("train_list", ""), manifest.get("val_list", "")],
warnings=warnings,
detail={"copied_files": copied, "normalized_json": normalized, **manifest},
)

View File

@@ -0,0 +1,56 @@
"""Pack promote adapter base types."""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Any
@dataclass
class PromoteContext:
project: str
task: str
batch: str
pack: str
batch_dir: Path
project_root: Path
dry_run: bool = False
skip_validate: bool = False
allow_partial_3d: bool = False
refresh: bool = True
extra: dict[str, Any] = field(default_factory=dict)
@dataclass
class PromoteResult:
ok: bool
project: str
task: str
batch: str
pack: str
dest_path: str = ""
images: int = 0
labels: int = 0
manifest_paths: list[str] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
stage: str = "ingested"
detail: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
out = asdict(self)
out["ok"] = self.ok
return out
class PackPromoteAdapter(ABC):
project: str = ""
@abstractmethod
def promote(self, ctx: PromoteContext) -> PromoteResult:
raise NotImplementedError
@abstractmethod
def validate(self, ctx: PromoteContext) -> list[str]:
"""Return list of error messages; empty means pass."""
raise NotImplementedError

View File

@@ -0,0 +1,62 @@
"""DMS YOLO pack promote adapter."""
from __future__ import annotations
import sys
from pathlib import Path
from as_platform.data.promote.base import PackPromoteAdapter, PromoteContext, PromoteResult
from as_platform.data.promote.manifest import refresh_dms_yaml
from as_platform.data.promote.validate.dms_yolo import validate_dms_task
_DMS_SCRIPTS = Path(__file__).resolve().parents[4] / "datasets" / "dms" / "scripts"
if str(_DMS_SCRIPTS) not in sys.path:
sys.path.insert(0, str(_DMS_SCRIPTS))
class DmsYoloPromoteAdapter(PackPromoteAdapter):
project = "dms"
def validate(self, ctx: PromoteContext) -> list[str]:
if ctx.skip_validate:
return []
return validate_dms_task(ctx.task)
def promote(self, ctx: PromoteContext) -> PromoteResult:
from ingest_incremental import promote_inbox_batch
if not ctx.batch_dir.is_dir():
return PromoteResult(
ok=False,
project=ctx.project,
task=ctx.task,
batch=ctx.batch,
pack=ctx.pack,
warnings=[f"batch_dir missing: {ctx.batch_dir}"],
)
pack_dir = ctx.project_root / "packs" / ctx.pack
pack_dir.mkdir(parents=True, exist_ok=True)
detail = promote_inbox_batch(
root=ctx.project_root,
task=ctx.task,
pack=ctx.pack,
src=ctx.batch_dir,
mode=ctx.extra.get("mode"),
dry_run=ctx.dry_run,
refresh=ctx.refresh and not ctx.dry_run,
)
if ctx.refresh and not ctx.dry_run and not ctx.skip_validate:
refresh_dms_yaml(task=ctx.task)
added = int(detail.get("added") or 0)
return PromoteResult(
ok=True,
project=ctx.project,
task=ctx.task,
batch=ctx.batch,
pack=ctx.pack,
dest_path=str(ctx.project_root / "packs" / ctx.pack),
labels=added,
detail=detail,
)

View File

@@ -0,0 +1,93 @@
"""Refresh ADAS / DMS pack manifests after promote."""
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import yaml
from as_platform.data.core import load_wf, proj_root
def _collect_adas_stems(sources_root: Path) -> list[str]:
stems: list[str] = []
if not sources_root.is_dir():
return stems
for batch_dir in sorted(sources_root.iterdir()):
if not batch_dir.is_dir() or batch_dir.name.startswith("."):
continue
qdir = batch_dir / "labels" / "quaternion_json"
if qdir.is_dir():
for p in sorted(qdir.glob("*.json")):
stems.append(p.stem)
else:
img_root = batch_dir / "images"
if img_root.is_dir():
for p in sorted(img_root.rglob("*")):
if p.is_file() and p.suffix.lower() in {".jpg", ".jpeg", ".png"}:
stems.append(p.stem)
return sorted(set(stems))
def refresh_adas_lists(wf: dict | None = None, *, pack: str = "adas_moon3d_v1") -> dict[str, Any]:
wf = wf or load_wf()
root = proj_root(wf, "adas")
pack_dir = root / "packs" / pack
sources = pack_dir / "sources"
lists_dir = pack_dir / "lists"
lists_dir.mkdir(parents=True, exist_ok=True)
stems = _collect_adas_stems(sources)
val_ratio = 0.1
reg_path = root / wf["projects"]["adas"]["registry"]
if reg_path.is_file():
reg = yaml.safe_load(reg_path.read_text(encoding="utf-8")) or {}
val_ratio = float((reg.get("split") or {}).get("val_ratio", 0.1))
n_val = max(0, int(len(stems) * val_ratio)) if len(stems) > 1 else 0
val_stems = stems[:n_val]
train_stems = stems[n_val:]
train_path = lists_dir / "train_stems.txt"
val_path = lists_dir / "val_stems.txt"
train_path.write_text("\n".join(train_stems) + ("\n" if train_stems else ""), encoding="utf-8")
val_path.write_text("\n".join(val_stems) + ("\n" if val_stems else ""), encoding="utf-8")
manifest_dir = pack_dir / "manifests"
manifest_dir.mkdir(parents=True, exist_ok=True)
index_path = manifest_dir / "pack_index.yaml"
batches = []
if sources.is_dir():
for d in sorted(sources.iterdir()):
if d.is_dir() and not d.name.startswith("."):
batches.append({"batch": d.name, "path": str(d)})
index = {
"pack": pack,
"updated_at": datetime.now(timezone.utc).isoformat(),
"batches": batches,
"train_stems": len(train_stems),
"val_stems": len(val_stems),
}
index_path.write_text(yaml.dump(index, allow_unicode=True, sort_keys=False), encoding="utf-8")
return {
"train_list": str(train_path),
"val_list": str(val_path),
"pack_index": str(index_path),
"train_count": len(train_stems),
"val_count": len(val_stems),
}
def refresh_dms_yaml(wf: dict | None = None, task: str | None = None) -> None:
wf = wf or load_wf()
root = proj_root(wf, "dms")
import subprocess
import sys
cmd = [sys.executable, str(root / "scripts" / "refresh_yaml.py")]
if task:
cmd.extend(["--task", task])
subprocess.check_call(cmd, cwd=str(root))

View File

@@ -0,0 +1,18 @@
"""Pack promote adapter registry."""
from __future__ import annotations
from as_platform.data.promote.adas_cuboid import AdasCuboidPromoteAdapter
from as_platform.data.promote.base import PackPromoteAdapter
from as_platform.data.promote.dms_yolo import DmsYoloPromoteAdapter
ADAPTERS: tuple[PackPromoteAdapter, ...] = (
DmsYoloPromoteAdapter(),
AdasCuboidPromoteAdapter(),
)
def get_promote_adapter(project: str) -> PackPromoteAdapter:
for adapter in ADAPTERS:
if adapter.project == project:
return adapter
raise ValueError(f"no promote adapter for project={project}")

View File

@@ -0,0 +1,126 @@
"""Unified pack promote entrypoint."""
from __future__ import annotations
from pathlib import Path
from typing import Any
from as_platform.data.batch import read_meta, write_meta
from as_platform.data.catalog_cache import invalidate_catalog_cache
from as_platform.data.core import load_wf, proj_root
from as_platform.data.promote.base import PromoteContext, PromoteResult
from as_platform.data.promote.registry import get_promote_adapter
from as_platform.db.engine import session_scope
from as_platform.db.models import LabelingCampaign
from as_platform.jobs.runner import _auto_snapshot
from as_platform.labeling.annotate import resolve_campaign_batch_dir
def _resolve_batch_dir(
project: str,
task: str,
batch: str,
*,
location: str = "inbox",
) -> Path:
wf = load_wf()
root = proj_root(wf, project)
if location == "inbox":
if project == "adas":
return (root / "inbox" / task / batch).resolve()
return (root / "inbox" / task / batch).resolve()
raise ValueError(f"unsupported location: {location}")
def _update_campaign_ingested(project: str, task: str, batch: str) -> None:
try:
with session_scope() as db:
camp = (
db.query(LabelingCampaign)
.filter(
LabelingCampaign.project == project,
LabelingCampaign.task == task,
LabelingCampaign.batch == batch,
)
.order_by(LabelingCampaign.created_at.desc())
.first()
)
if camp:
camp.status = "ingested"
db.flush()
try:
batch_dir = resolve_campaign_batch_dir(camp)
meta = read_meta(batch_dir) or {}
meta["stage"] = "ingested"
meta["pipeline_version"] = 2
write_meta(batch_dir, meta)
except Exception:
pass
except Exception:
pass
def promote_batch(
project: str,
*,
task: str,
batch: str | None = None,
pack: str | None = None,
batch_dir: Path | str | None = None,
dry_run: bool = False,
skip_validate: bool = False,
allow_partial_3d: bool = False,
refresh: bool = True,
all_sources: bool = False,
extra: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Promote inbox batch into training pack (SDK entry)."""
wf = load_wf()
pcfg = wf["projects"][project]
pack_name = pack or pcfg.get("base_pack")
if not pack_name:
raise ValueError(f"project {project} missing pack")
if not task:
raise ValueError("task required")
if all_sources:
raise ValueError("all_sources promote not yet in SDK; use CLI ingest_incremental")
if not batch:
raise ValueError("batch required")
root = proj_root(wf, project)
bdir = Path(batch_dir).resolve() if batch_dir else _resolve_batch_dir(project, task, batch)
if not bdir.is_dir():
raise ValueError(f"batch_dir not found: {bdir}")
adapter = get_promote_adapter(project)
ctx = PromoteContext(
project=project,
task=task,
batch=batch,
pack=pack_name,
batch_dir=bdir,
project_root=root,
dry_run=dry_run,
skip_validate=skip_validate,
allow_partial_3d=allow_partial_3d,
refresh=refresh,
extra=extra or {},
)
val_errors = adapter.validate(ctx)
if val_errors:
raise ValueError("; ".join(val_errors))
result: PromoteResult = adapter.promote(ctx)
if not result.ok:
raise ValueError(result.warnings[0] if result.warnings else "promote failed")
if not dry_run:
_update_campaign_ingested(project, task, batch)
invalidate_catalog_cache()
if project == "dms":
_auto_snapshot("dms", task=task)
out = result.to_dict()
out["stdout"] = __import__("json").dumps(out, ensure_ascii=False)
out["stderr"] = ""
return out

View File

@@ -0,0 +1,81 @@
"""ADAS cuboid batch validation before promote."""
from __future__ import annotations
import json
from pathlib import Path
from as_platform.labeling.class_map import load_adas_class_names
def validate_adas_cuboid_batch(
batch_dir: Path,
*,
allow_partial_3d: bool = False,
min_fit_ratio: float = 0.8,
) -> tuple[list[str], list[str], dict]:
"""Return (errors, warnings, stats)."""
errors: list[str] = []
warnings: list[str] = []
qdir = batch_dir / "labels" / "quaternion_json"
expected_names = load_adas_class_names()
if not qdir.is_dir():
errors.append(f"missing labels/quaternion_json under {batch_dir}")
return errors, warnings, {}
files = sorted(qdir.glob("*.json"))
if not files:
errors.append("no quaternion_json files")
return errors, warnings, {}
total_dets = 0
fit_ok = 0
has_k = 0
files_with_dets = 0
for p in files:
try:
data = json.loads(p.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError) as e:
errors.append(f"{p.name}: invalid json ({e})")
continue
dets = data.get("detections") or []
if not dets:
warnings.append(f"{p.name}: empty detections (skipped)")
continue
files_with_dets += 1
if data.get("K"):
has_k += 1
prompts = data.get("text_prompts") or []
if prompts and list(prompts) != expected_names:
warnings.append(f"{p.name}: text_prompts order differs from registry")
for det in dets:
total_dets += 1
cid = det.get("class_id")
if cid is None or int(cid) < 0 or int(cid) >= len(expected_names):
errors.append(f"{p.name}: invalid class_id {cid}")
if det.get("fit_ok"):
fit_ok += 1
stats = {
"quaternion_files": len(files),
"files_with_detections": files_with_dets,
"detections": total_dets,
"fit_ok_ratio": fit_ok / max(total_dets, 1),
"has_k_ratio": has_k / max(files_with_dets, 1),
}
if files_with_dets == 0:
errors.append("no quaternion json with detections")
calib_dir = batch_dir / "calib"
if calib_dir.is_dir() and list(calib_dir.glob("*.yaml")):
if files_with_dets > 0 and has_k < files_with_dets:
errors.append(f"calib present but only {has_k}/{files_with_dets} annotated json have K")
if not allow_partial_3d and total_dets > 0:
ratio = fit_ok / total_dets
if ratio < min_fit_ratio:
errors.append(
f"fit_ok ratio {ratio:.2f} < {min_fit_ratio} (use allow_partial_3d for pilot)"
)
return errors, warnings, stats

View File

@@ -0,0 +1,18 @@
"""DMS YOLO batch validation wrapper."""
from __future__ import annotations
import subprocess
import sys
from pathlib import Path
from as_platform.config import WORKSPACE
def validate_dms_task(task: str | None) -> list[str]:
cmd = [sys.executable, str(WORKSPACE / "scripts" / "validate_dms_tasks.py")]
if task:
cmd.extend(["--task", task])
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
return [proc.stderr or proc.stdout or f"validate_dms_tasks failed exit {proc.returncode}"]
return []