feat: HSAP platform v2 — modular navigation, quality review, audit log, world model simulation

Major changes: - New frontend (platform/web/): Vite + React 18 + TypeScript + Tailwind - 4-module navigation: 数据送标 / 模型管理 / 车队管理 / 系统管理 - Data catalog with charts (DMS/ADAS/Lane 3-tab view) - Quality review workflow (标注质检): Good/Fine/Bad scoring with auto-advance - Audit enhancements: batch operations, rejection categories, Feishu notifications - Operation audit log (操作日志) - World model simulation studio (仿真工坊) - Dataset version management with snapshots and diff - ADAS 7-class dataset integration (138K images organized + compressed) - User management with Feishu integration and pagination - CRUD/search/filter on all pages, card layout redesign - PIL-optimized image overlay rendering - Auto-snapshot on build, in_review workflow stage - Removed embedded algorithm code (now in workspace)
2026-06-03 11:40:21 +08:00
parent 7c43b44c57
commit e72bc061c5
5487 changed files with 979207 additions and 6197 deletions
--- a/datasets/dms/scripts/export_ls_to_yolo.py
+++ b/datasets/dms/scripts/export_ls_to_yolo.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python3
+"""Label Studio ls_annotations JSON → YOLO detect / YOLO pose txt."""
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+DMS_ROOT = SCRIPT_DIR.parent
+CONFIG_DIR = DMS_ROOT / "configs"
+REGISTRY_PATH = DMS_ROOT / "datasets.registry.yaml"
+KPT_ORDER_DIR = CONFIG_DIR / "keypoint_order"
+
+IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp", ".JPG", ".JPEG", ".PNG"}
+ANNOTATIONS_DIRNAME = "ls_annotations"
+
+
+def _load_registry() -> dict[str, Any]:
+    return yaml.safe_load(REGISTRY_PATH.read_text(encoding="utf-8"))
+
+
+def _resolve_task_config(task: str, mode: str | None = None) -> dict[str, Any]:
+    from task_registry import get_mode_config, resolve_task_id
+
+    reg = _load_registry()
+    task_r, mode_r = resolve_task_id(task, mode)
+    return get_mode_config(task_r, mode_r, reg)
+
+
+def _class_name_to_id(names: list[str] | dict[int | str, str]) -> dict[str, int]:
+    if isinstance(names, dict):
+        return {str(v): int(k) for k, v in names.items()}
+    return {name: idx for idx, name in enumerate(names)}
+
+
+def _load_kpt_label_map(task: str) -> dict[str, int]:
+    path = KPT_ORDER_DIR / f"{task}_37.yaml"
+    if not path.is_file():
+        path = KPT_ORDER_DIR / f"{task}.yaml"
+    if not path.is_file():
+        raise FileNotFoundError(f"keypoint manifest not found for task {task}")
+    data = yaml.safe_load(path.read_text(encoding="utf-8"))
+    out: dict[str, int] = {}
+    for item in data.get("keypoints") or []:
+        out[str(item["label"])] = int(item["id"])
+    return out
+
+
+def _task_id_for_image(image_path: Path, batch_dir: Path) -> str:
+    try:
+        rel = image_path.relative_to(batch_dir)
+        stem = rel.as_posix()
+    except ValueError:
+        stem = image_path.stem
+    return hashlib.sha256(stem.encode()).hexdigest()[:16]
+
+
+def _iter_batch_images(batch_dir: Path) -> list[Path]:
+    if not batch_dir.is_dir():
+        return []
+    candidates: list[Path] = []
+    search_roots = [
+        batch_dir / "images",
+        batch_dir / "images" / "train",
+        batch_dir,
+    ]
+    seen: set[str] = set()
+    for root in search_roots:
+        if not root.is_dir():
+            continue
+        for p in sorted(root.rglob("*")):
+            if not p.is_file() or p.suffix not in IMG_EXTS:
+                continue
+            key = str(p.resolve())
+            if key in seen:
+                continue
+            seen.add(key)
+            candidates.append(p.resolve())
+    return candidates
+
+
+def _label_out_path(image_path: Path, batch_dir: Path) -> Path:
+    try:
+        rel = image_path.relative_to(batch_dir)
+    except ValueError:
+        rel = Path(image_path.name)
+    parts = list(rel.parts)
+    if parts and parts[0] == "images":
+        parts = parts[1:]
+    if parts and parts[0] in ("train", "val", "test"):
+        split = parts[0]
+        name = Path(*parts[1:]).with_suffix(".txt")
+        return batch_dir / "labels" / split / name
+    name = Path(*parts).with_suffix(".txt")
+    return batch_dir / "labels" / name
+
+
+def _extract_result_regions(data: dict[str, Any]) -> list[dict[str, Any]]:
+    result = data.get("result")
+    if isinstance(result, list) and result:
+        return result
+    annotations = data.get("annotations")
+    if isinstance(annotations, list) and annotations:
+        first = annotations[0]
+        if isinstance(first, dict) and isinstance(first.get("result"), list):
+            return first["result"]
+    return []
+
+
+def _clamp01(v: float) -> float:
+    return max(0.0, min(1.0, v))
+
+
+def _ls_rect_to_yolo_bbox(value: dict[str, Any]) -> tuple[float, float, float, float]:
+    x = float(value["x"])
+    y = float(value["y"])
+    w = float(value["width"])
+    h = float(value["height"])
+    cx = _clamp01((x + w / 2.0) / 100.0)
+    cy = _clamp01((y + h / 2.0) / 100.0)
+    nw = _clamp01(w / 100.0)
+    nh = _clamp01(h / 100.0)
+    return cx, cy, nw, nh
+
+
+def _ls_point_to_yolo_xy(value: dict[str, Any]) -> tuple[float, float]:
+    return _clamp01(float(value["x"]) / 100.0), _clamp01(float(value["y"]) / 100.0)
+
+
+def _bbox_center(bbox: tuple[float, float, float, float]) -> tuple[float, float]:
+    cx, cy, _, _ = bbox
+    return cx, cy
+
+
+def _parse_rectangles(
+    regions: list[dict[str, Any]],
+    class_map: dict[str, int],
+) -> list[dict[str, Any]]:
+    boxes: list[dict[str, Any]] = []
+    for region in regions:
+        if region.get("type") != "rectanglelabels":
+            continue
+        value = region.get("value") or {}
+        labels = value.get("rectanglelabels") or []
+        if not labels:
+            continue
+        label = str(labels[0])
+        if label not in class_map:
+            continue
+        bbox = _ls_rect_to_yolo_bbox(value)
+        boxes.append({"class_id": class_map[label], "bbox": bbox, "region_id": region.get("id")})
+    return boxes
+
+
+def _parse_keypoints(
+    regions: list[dict[str, Any]],
+    kpt_map: dict[str, int],
+) -> list[dict[str, Any]]:
+    points: list[dict[str, Any]] = []
+    for region in regions:
+        rtype = region.get("type")
+        if rtype not in ("keypointlabels", "keypoint"):
+            continue
+        value = region.get("value") or {}
+        labels = value.get("keypointlabels") or []
+        if not labels:
+            continue
+        label = str(labels[0])
+        if label not in kpt_map:
+            continue
+        x, y = _ls_point_to_yolo_xy(value)
+        points.append({"index": kpt_map[label], "x": x, "y": y, "region_id": region.get("id")})
+    return points
+
+
+def _assign_keypoints_to_boxes(
+    boxes: list[dict[str, Any]],
+    points: list[dict[str, Any]],
+) -> dict[int | None, list[dict[str, Any]]]:
+    if not boxes:
+        return {None: points}
+    if len(boxes) == 1:
+        return {0: points}
+
+    assigned: dict[int, list[dict[str, Any]]] = {i: [] for i in range(len(boxes))}
+    for pt in points:
+        best_i = 0
+        best_d = float("inf")
+        for i, box in enumerate(boxes):
+            cx, cy = _bbox_center(box["bbox"])
+            d = (pt["x"] - cx) ** 2 + (pt["y"] - cy) ** 2
+            if d < best_d:
+                best_d = d
+                best_i = i
+        assigned[best_i].append(pt)
+    return assigned
+
+
+def _format_detect_line(class_id: int, bbox: tuple[float, float, float, float]) -> str:
+    cx, cy, w, h = bbox
+    return f"{class_id} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}"
+
+
+def _format_pose_line(
+    class_id: int,
+    bbox: tuple[float, float, float, float],
+    points: list[dict[str, Any]],
+    nk: int,
+) -> str:
+    slots: list[tuple[float, float, float]] = [(0.0, 0.0, 0.0)] * nk
+    for pt in points:
+        idx = int(pt["index"])
+        if 0 <= idx < nk:
+            slots[idx] = (pt["x"], pt["y"], 2.0)
+    parts = _format_detect_line(class_id, bbox).split()
+    for x, y, v in slots:
+        parts.extend([f"{x:.6f}", f"{y:.6f}", f"{v:.6f}"])
+    return " ".join(parts)
+
+
+def convert_regions_to_yolo_lines(
+    regions: list[dict[str, Any]],
+    *,
+    mode: str,
+    class_map: dict[str, int],
+    kpt_map: dict[str, int] | None = None,
+    kpt_shape: list[int] | None = None,
+) -> list[str]:
+    if mode == "detect":
+        lines = []
+        for box in _parse_rectangles(regions, class_map):
+            lines.append(_format_detect_line(box["class_id"], box["bbox"]))
+        return lines
+
+    if mode != "pose":
+        raise ValueError(f"unsupported mode: {mode}")
+    if not kpt_map or not kpt_shape:
+        raise ValueError("pose mode requires kpt_map and kpt_shape")
+
+    nk = int(kpt_shape[0])
+    boxes = _parse_rectangles(regions, class_map)
+    points = _parse_keypoints(regions, kpt_map)
+    if not boxes:
+        return []
+
+    assigned = _assign_keypoints_to_boxes(boxes, points)
+    lines: list[str] = []
+    for i, box in enumerate(boxes):
+        pts = assigned.get(i, [])
+        lines.append(_format_pose_line(box["class_id"], box["bbox"], pts, nk))
+    return lines
+
+
+def export_batch(
+    batch_dir: Path,
+    task: str,
+    *,
+    mode: str,
+    task_mode: str | None = None,
+    out_subdir: str | None = None,
+) -> dict[str, Any]:
+    batch_dir = batch_dir.resolve()
+    tcfg = _resolve_task_config(task, task_mode)
+    class_map = _class_name_to_id(tcfg.get("names") or {})
+    kpt_map: dict[str, int] | None = None
+    kpt_shape: list[int] | None = None
+    if mode == "pose":
+        kpt_map = _load_kpt_label_map(task)
+        kpt_shape = list(tcfg.get("kpt_shape") or [37, 3])
+
+    ann_dir = batch_dir / "labels" / ANNOTATIONS_DIRNAME
+    written = 0
+    skipped_empty = 0
+    missing_ann = 0
+
+    for image_path in _iter_batch_images(batch_dir):
+        task_id = _task_id_for_image(image_path, batch_dir)
+        ann_path = ann_dir / f"{task_id}.json"
+        if not ann_path.is_file():
+            missing_ann += 1
+            continue
+        data = json.loads(ann_path.read_text(encoding="utf-8"))
+        regions = _extract_result_regions(data)
+        if not regions:
+            skipped_empty += 1
+            continue
+        lines = convert_regions_to_yolo_lines(
+            regions,
+            mode=mode,
+            class_map=class_map,
+            kpt_map=kpt_map,
+            kpt_shape=kpt_shape,
+        )
+        if not lines:
+            skipped_empty += 1
+            continue
+        out_path = _label_out_path(image_path, batch_dir)
+        if out_subdir:
+            # 显式覆盖：相对 batch_dir 的子目录 + 文件名
+            out_path = batch_dir / out_subdir / f"{image_path.stem}.txt"
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+        written += 1
+
+    return {
+        "ok": True,
+        "batch_dir": str(batch_dir),
+        "task": task,
+        "mode": mode,
+        "written": written,
+        "skipped_empty": skipped_empty,
+        "missing_ann": missing_ann,
+        "out_subdir": out_subdir or "auto",
+    }
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Export Label Studio annotations to YOLO txt")
+    parser.add_argument("--batch-dir", type=Path, required=True)
+    parser.add_argument("--task", required=True)
+    parser.add_argument("--mode", choices=("detect", "pose"), required=True)
+    parser.add_argument("--task-mode", default=None, help="dam batch_0516 / batch_0417 等")
+    parser.add_argument("--out-subdir", default="labels/train")
+    args = parser.parse_args(argv)
+
+    if str(SCRIPT_DIR) not in sys.path:
+        sys.path.insert(0, str(SCRIPT_DIR))
+
+    result = export_batch(
+        args.batch_dir,
+        args.task,
+        mode=args.mode,
+        task_mode=args.task_mode,
+        out_subdir=args.out_subdir,
+    )
+    print(json.dumps(result, ensure_ascii=False, indent=2))
+    return 0 if result["written"] > 0 else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/datasets/dms/scripts/ingest_incremental.py
+++ b/datasets/dms/scripts/ingest_incremental.py
@@ -236,7 +236,7 @@ def ingest_yolo(

    if is_voc:
        if tcfg["type"] != "detect":
-            raise SystemExit("VOC xml 仅支持 detect 任务（dam / dam_0417）")
+            raise SystemExit("VOC xml 仅支持 detect 任务（dam 各批次等）")
        staging = data_root.parent / "_staging_voc" / task
        staging_parent = staging.parent
        if staging.exists() and not dry_run:
@@ -384,7 +384,11 @@ def ingest_one(
    src: Path,
    args: argparse.Namespace,
 ) -> dict:
-    tcfg = reg["tasks"][task]
+    from task_registry import get_mode_config, resolve_task_id
+
+    submode = getattr(args, "mode", None) or getattr(args, "submode", None)
+    task, submode = resolve_task_id(task, submode)
+    tcfg = get_mode_config(task, submode, reg)
    pack = getattr(args, "pack", None) or "dms_v1"
    data_root = pack_task_data_root(root, pack, tcfg["task_dir"])
    sk = split_kwargs(reg, args)
@@ -449,23 +453,42 @@ def ingest_all_sources(root: Path, reg: dict, task: str, args: argparse.Namespac


 def ingest_all_inbox(root: Path, reg: dict, args: argparse.Namespace) -> None:
+    from task_registry import inbox_dir
+
+    pack = getattr(args, "pack", None) or "dms_v1"
    for task, tcfg in reg["tasks"].items():
-        inbox = root / tcfg.get("inbox", f"inbox/{task}")
-        if not inbox.is_dir():
-            continue
-        batches = sorted(d for d in inbox.iterdir() if d.is_dir())
-        if not batches:
-            continue
-        print(f"\n>>> inbox {task}: {len(batches)} batch(es)")
-        for batch in batches:
-            ingest_one(root, reg, task, batch, args)
-            if not args.dry_run:
-                append_log(root, {"src": str(batch), "task": task, "pack": pack, "via": "inbox"})
+        if tcfg.get("type") == "multi":
+            for mode in (tcfg.get("modes") or {}):
+                inbox = inbox_dir(root, task, mode, reg)
+                if not inbox.is_dir():
+                    continue
+                batches = sorted(d for d in inbox.iterdir() if d.is_dir())
+                if not batches:
+                    continue
+                print(f"\n>>> inbox {task}/{mode}: {len(batches)} batch(es)")
+                args.submode = mode
+                for batch in batches:
+                    ingest_one(root, reg, task, batch, args)
+                    if not args.dry_run:
+                        append_log(root, {"src": str(batch), "task": task, "mode": mode, "pack": pack, "via": "inbox"})
+        else:
+            inbox = inbox_dir(root, task, None, reg)
+            if not inbox.is_dir():
+                continue
+            batches = sorted(d for d in inbox.iterdir() if d.is_dir())
+            if not batches:
+                continue
+            print(f"\n>>> inbox {task}: {len(batches)} batch(es)")
+            for batch in batches:
+                ingest_one(root, reg, task, batch, args)
+                if not args.dry_run:
+                    append_log(root, {"src": str(batch), "task": task, "pack": pack, "via": "inbox"})


 def main() -> None:
    p = argparse.ArgumentParser(description="DMS 全任务增量接入")
-    p.add_argument("--task", help="registry 任务名；与 --all-inbox 二选一")
+    p.add_argument("--task", help="registry 任务名（forward 等 multi 任务需配合 --submode）")
+    p.add_argument("--submode", choices=("detect", "classify"), help="multi 任务子模式，如 forward 的 detect/classify")
    p.add_argument("--src", type=Path, help="新数据目录")
    p.add_argument("--all-inbox", action="store_true", help="处理所有 inbox/<task>/* 批次")
    p.add_argument("--all-sources", action="store_true", help="处理任务 data/sources/* 下所有待合并批次")
--- a/datasets/dms/scripts/migrate_dam_layout.py
+++ b/datasets/dms/scripts/migrate_dam_layout.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""将 dam / dam_0417 合并为 dam/batch_0516、dam/batch_0417（默认符号链接）。"""
+from __future__ import annotations
+
+import argparse
+import shutil
+from pathlib import Path
+
+
+def link_or_move(src: Path, dst: Path, *, move: bool) -> None:
+    if not src.is_dir():
+        print(f"  skip（不存在）: {src}")
+        return
+    if dst.exists():
+        print(f"  已存在: {dst}")
+        return
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    if move:
+        shutil.move(str(src), str(dst))
+        print(f"  moved {src} -> {dst}")
+    else:
+        dst.symlink_to(src.resolve())
+        print(f"  symlink {dst} -> {src.resolve()}")
+
+
+def migrate_pack(pack_dir: Path, *, move: bool) -> None:
+    dam_root = pack_dir / "dam"
+    dam_0417 = pack_dir / "dam_0417"
+    batch_0516 = dam_root / "batch_0516"
+    batch_0417 = dam_root / "batch_0417"
+
+    if batch_0516.exists() and batch_0417.exists():
+        print("  dam 已迁移，跳过")
+        return
+
+    # 当前 dam 为扁平 YOLO 布局（images/labels 在根下）
+    if dam_root.is_dir() and (dam_root / "images").is_dir() and not batch_0516.exists():
+        stash = pack_dir / "_dam_stash_0516"
+        if stash.exists():
+            print(f"  清理旧 stash: {stash}")
+            if stash.is_symlink():
+                stash.unlink()
+            else:
+                shutil.rmtree(stash)
+        shutil.move(str(dam_root), str(stash))
+        dam_root.mkdir(parents=True)
+        link_or_move(stash, batch_0516, move=move)
+
+    if dam_0417.is_dir():
+        link_or_move(dam_0417, batch_0417, move=move)
+
+
+def migrate_inbox(dms_root: Path, *, move: bool) -> None:
+    for old, new in (
+        ("dam", "dam/batch_0516"),
+        ("dam_0417", "dam/batch_0417"),
+    ):
+        src = dms_root / "inbox" / old
+        dst = dms_root / "inbox" / new
+        if src.is_dir():
+            link_or_move(src, dst, move=move)
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--pack-dir", type=Path, required=True)
+    p.add_argument("--dms-root", type=Path, help="datasets/dms 根，迁移 inbox")
+    p.add_argument("--move", action="store_true")
+    args = p.parse_args()
+    migrate_pack(args.pack_dir.resolve(), move=args.move)
+    if args.dms_root:
+        migrate_inbox(args.dms_root.resolve(), move=args.move)
+    print("完成。请运行 refresh_yaml.py --task dam 并刷新 catalog。")
+
+
+if __name__ == "__main__":
+    main()
--- a/datasets/dms/scripts/migrate_forward_layout.py
+++ b/datasets/dms/scripts/migrate_forward_layout.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+"""将 isa / isa_class 目录迁入 forward/detect、forward/classify（默认符号链接，保留原数据）。"""
+from __future__ import annotations
+
+import argparse
+import shutil
+from pathlib import Path
+
+
+def link_or_move(src: Path, dst: Path, *, move: bool) -> None:
+    if not src.is_dir():
+        print(f"  skip（不存在）: {src}")
+        return
+    if dst.exists():
+        print(f"  已存在: {dst}")
+        return
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    if move:
+        shutil.move(str(src), str(dst))
+        print(f"  moved {src} -> {dst}")
+    else:
+        dst.symlink_to(src.resolve())
+        print(f"  symlink {dst} -> {src.resolve()}")
+
+
+def migrate_pack(pack_dir: Path, *, move: bool) -> None:
+    forward = pack_dir / "forward"
+    forward.mkdir(parents=True, exist_ok=True)
+    link_or_move(pack_dir / "isa", forward / "detect", move=move)
+    link_or_move(pack_dir / "isa_class", forward / "classify", move=move)
+
+
+def migrate_inbox(dms_root: Path, *, move: bool) -> None:
+    for old, new in (
+        ("isa", "forward/detect"),
+        ("isa_class", "forward/classify"),
+    ):
+        src = dms_root / "inbox" / old
+        dst = dms_root / "inbox" / new
+        if src.is_dir():
+            link_or_move(src, dst, move=move)
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--pack-dir", type=Path, required=True, help="如 .../packs/dms_v1")
+    p.add_argument("--dms-root", type=Path, help="datasets/dms 根，迁移 inbox")
+    p.add_argument("--move", action="store_true", help="移动而非符号链接")
+    args = p.parse_args()
+    migrate_pack(args.pack_dir.resolve(), move=args.move)
+    if args.dms_root:
+        migrate_inbox(args.dms_root.resolve(), move=args.move)
+    print("完成。请运行 refresh_yaml.py 并刷新平台 catalog。")
+
+
+if __name__ == "__main__":
+    main()
--- a/datasets/dms/scripts/refresh_yaml.py
+++ b/datasets/dms/scripts/refresh_yaml.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 """按 workflow active_packs 生成 manifests/yaml_active/*.yaml（可多包合并 train/val）。"""
-
 from __future__ import annotations

 import argparse
@@ -11,10 +10,8 @@ import yaml

 SCRIPT_DIR = Path(__file__).resolve().parent
 sys.path.insert(0, str(SCRIPT_DIR))
-from pack_registry import (  # noqa: E402
-    load_active_pack_names,
-    resolve_pack_dir,
-)
+from pack_registry import load_active_pack_names, resolve_pack_dir  # noqa: E402
+from task_registry import get_mode_config, load_registry, train_yaml_key  # noqa: E402


 def fmt_names(names) -> str:
@@ -39,13 +36,13 @@ def pack_task_root(root: Path, pack_name: str, task_dir: str) -> Path:


 def build_detect_pose_yaml(
-    task: str,
-    tcfg: dict,
+    yaml_key: str,
+    mcfg: dict,
    root: Path,
    pack_names: list[str],
    typ: str,
 ) -> str:
-    task_dir = tcfg["task_dir"]
+    task_dir = mcfg["task_dir"]
    bases = []
    train_paths = []
    val_paths = []
@@ -59,28 +56,28 @@ def build_detect_pose_yaml(
        val_paths.append(str((base / "images" / "val").resolve()))

    if not bases:
-        raise SystemExit(f"{task}: 无可用数据包目录")
+        raise SystemExit(f"{yaml_key}: 无可用数据包目录")

    lines = [
-        f"# {task} — packs: {', '.join(pack_names)}",
+        f"# {yaml_key} — packs: {', '.join(pack_names)}",
        f"path: {bases[0]}",
        yaml_list("train", train_paths),
        yaml_list("val", val_paths),
        "",
    ]
    if typ == "pose":
-        lines.insert(4, f"kpt_shape: {tcfg.get('kpt_shape', [37, 3])}")
+        lines.insert(4, f"kpt_shape: {mcfg.get('kpt_shape', [37, 3])}")
    else:
-        lines.extend([f"nc: {tcfg['nc']}", fmt_names(tcfg["names"]), ""])
+        lines.extend([f"nc: {mcfg['nc']}", fmt_names(mcfg["names"]), ""])
    return "\n".join(lines)


-def build_classify_yaml(task: str, tcfg: dict, root: Path, pack_names: list[str]) -> str:
-    task_dir = tcfg["task_dir"]
+def build_classify_yaml(yaml_key: str, mcfg: dict, root: Path, pack_names: list[str]) -> str:
+    task_dir = mcfg["task_dir"]
    if len(pack_names) > 1:
-        print(f"  warn {task}: classify 暂用首个包 {pack_names[0]}（多包请先合并目录）")
+        print(f"  warn {yaml_key}: classify 暂用首个包 {pack_names[0]}（多包请先合并目录）")
    base = pack_task_root(root, pack_names[0], task_dir)
-    return f"""# {task} — pack: {pack_names[0]}
+    return f"""# {yaml_key} — pack: {pack_names[0]}
 path: {base.resolve()}
 train: train
 val: val
@@ -88,39 +85,49 @@ test: test
 """


+def iter_yaml_jobs(reg: dict, only_task: str | None = None):
+    tasks = load_registry(reg)
+    if only_task:
+        if only_task not in tasks:
+            raise SystemExit(f"未知 task: {only_task}")
+        tasks = {only_task: tasks[only_task]}
+    for task, tcfg in tasks.items():
+        if tcfg.get("type") == "multi":
+            for mode in (tcfg.get("modes") or {}):
+                mcfg = get_mode_config(task, mode, reg)
+                key = train_yaml_key(task, mode, reg)
+                yield key, mcfg
+        else:
+            yield task, tcfg
+
+
 def main() -> None:
    p = argparse.ArgumentParser()
    p.add_argument("--root", type=Path, default=SCRIPT_DIR.parent)
    p.add_argument("--packs", help="逗号分隔，覆盖 workflow active_packs")
-    p.add_argument("--task", help="只生成某一任务")
+    p.add_argument("--task", help="只生成某一任务（multi 会生成全部 mode）")
    args = p.parse_args()
    root = args.root.resolve()
    reg = yaml.safe_load((root / "datasets.registry.yaml").read_text(encoding="utf-8"))
    cli = [x.strip() for x in args.packs.split(",")] if args.packs else None
    pack_names = load_active_pack_names(root, cli)
    if not pack_names:
-        raise SystemExit("active_packs 为空，请编辑 ML/workflow.registry.yaml 或 --packs")
+        raise SystemExit("active_packs 为空，请编辑 workflow.registry.yaml 或 --packs")

    out_dir = root / "manifests" / "yaml_active"
    out_dir.mkdir(parents=True, exist_ok=True)
    print(f"active_packs: {pack_names}")

-    tasks = reg["tasks"]
-    if args.task:
-        if args.task not in tasks:
-            raise SystemExit(f"未知 task: {args.task}")
-        tasks = {args.task: tasks[args.task]}
-
-    for task, tcfg in tasks.items():
-        typ = tcfg["type"]
+    for yaml_key, mcfg in iter_yaml_jobs(reg, args.task):
+        typ = mcfg["type"]
        if typ in ("detect", "pose"):
-            content = build_detect_pose_yaml(task, tcfg, root, pack_names, typ)
+            content = build_detect_pose_yaml(yaml_key, mcfg, root, pack_names, typ)
        elif typ == "classify":
-            content = build_classify_yaml(task, tcfg, root, pack_names)
+            content = build_classify_yaml(yaml_key, mcfg, root, pack_names)
        else:
-            print(f"  skip {task}: type {typ}")
+            print(f"  skip {yaml_key}: type {typ}")
            continue
-        out = out_dir / f"{task}.yaml"
+        out = out_dir / f"{yaml_key}.yaml"
        out.write_text(content, encoding="utf-8")
        print(f"  wrote {out.relative_to(root)}")

--- a/datasets/dms/scripts/task_registry.py
+++ b/datasets/dms/scripts/task_registry.py
@@ -0,0 +1,166 @@
+"""DMS 任务注册表：domain 分组、multi 任务（前向 detect+classify）、旧 ID 别名。"""
+from __future__ import annotations
+
+from typing import Any
+
+DOMAIN_LABELS = {
+    "dms": "舱内 DMS",
+    "forward": "前向 ADAS",
+}
+
+# 报表 / 旧目录名 -> (task, mode)
+REPORT_TASK_ALIASES: dict[str, tuple[str, str | None]] = {
+    "isa": ("forward", "detect"),
+    "isa_detect": ("forward", "detect"),
+    "isa_class": ("forward", "classify"),
+    "isa_class_0116": ("forward", "classify"),
+    "dam_0417": ("dam", "batch_0417"),
+}
+
+LEGACY_TASK_ALIASES: dict[str, tuple[str, str | None]] = {
+    "isa": ("forward", "detect"),
+    "isa_class": ("forward", "classify"),
+    "dam_0417": ("dam", "batch_0417"),
+}
+
+
+def load_registry(reg: dict) -> dict[str, Any]:
+    return reg.get("tasks") or {}
+
+
+def resolve_task_id(task: str, mode: str | None = None) -> tuple[str, str | None]:
+    """用户/历史 task ID -> (canonical_task, mode)。"""
+    if task in LEGACY_TASK_ALIASES:
+        t, m = LEGACY_TASK_ALIASES[task]
+        return t, mode or m
+    return task, mode
+
+
+def report_task_key(task: str, mode: str | None = None) -> str:
+    """catalog 报表 CSV 中的任务列名。"""
+    t, m = resolve_task_id(task, mode)
+    if t == "forward" and m == "detect":
+        return "isa"
+    if t == "forward" and m == "classify":
+        return "isa_class"
+    if t == "dam" and m == "batch_0516":
+        return "dam"
+    if t == "dam" and m == "batch_0417":
+        return "dam_0417"
+    return task
+
+
+def train_yaml_key(task: str, mode: str | None, reg: dict) -> str:
+    """manifests/yaml_active 与 train.sh 使用的文件名（不含 .yaml）。"""
+    task, mode = resolve_task_id(task, mode)
+    tcfg = load_registry(reg)[task]
+    if tcfg.get("type") == "multi":
+        if not mode:
+            raise ValueError(f"任务 {task} 需指定 mode（detect / classify）")
+        return f"{task}__{mode}"
+    return task
+
+
+def get_mode_config(task: str, mode: str | None, reg: dict) -> dict[str, Any]:
+    task, mode = resolve_task_id(task, mode)
+    tcfg = load_registry(reg)[task]
+    if tcfg.get("type") != "multi":
+        return {**tcfg, "task": task, "mode": None}
+    modes = tcfg.get("modes") or {}
+    if not mode:
+        raise ValueError(f"任务 {task} 需指定 mode")
+    if mode not in modes:
+        raise ValueError(f"未知 mode: {task}/{mode}")
+    mcfg = dict(modes[mode])
+    mcfg["task"] = task
+    mcfg["mode"] = mode
+    mcfg["task_dir"] = f"{tcfg.get('task_dir', task)}/{mcfg.get('subdir', mode)}"
+    mcfg["domain"] = tcfg.get("domain")
+    mcfg["label"] = mcfg.get("label") or tcfg.get("label")
+    return mcfg
+
+
+def task_data_dir(pack_dir, task: str, mode: str | None, reg: dict):
+    from pathlib import Path
+
+    mcfg = get_mode_config(task, mode, reg)
+    return Path(pack_dir) / mcfg["task_dir"]
+
+
+def inbox_dir(root, task: str, mode: str | None, reg: dict):
+    from pathlib import Path
+
+    task, mode = resolve_task_id(task, mode)
+    tcfg = load_registry(reg)[task]
+    if tcfg.get("type") == "multi":
+        mcfg = tcfg["modes"][mode or ""]
+        rel = mcfg.get("inbox") or f"inbox/{task}/{mode}"
+        return Path(root) / rel
+    return Path(root) / (tcfg.get("inbox") or f"inbox/{task}")
+
+
+def iter_catalog_tasks(reg: dict) -> list[tuple[str, dict[str, Any]]]:
+    """catalog 顶层任务列表。"""
+    out: list[tuple[str, dict[str, Any]]] = []
+    for task, tcfg in load_registry(reg).items():
+        entry = {
+            "domain": tcfg.get("domain", "dms"),
+            "domain_label": DOMAIN_LABELS.get(tcfg.get("domain", "dms"), tcfg.get("domain", "dms")),
+            "label": tcfg.get("label", task),
+            "type": tcfg.get("type"),
+        }
+        if tcfg.get("type") == "multi":
+            entry["modes"] = {}
+            for mode, mcfg in (tcfg.get("modes") or {}).items():
+                entry["modes"][mode] = {
+                    "label": mcfg.get("label", mode),
+                    "type": mcfg.get("type"),
+                    "nc": mcfg.get("nc"),
+                    "names": mcfg.get("names"),
+                    "packs": [],
+                    "class_counts": {},
+                }
+        else:
+            entry["nc"] = tcfg.get("nc")
+            entry["names"] = tcfg.get("names")
+            entry["packs"] = []
+            entry["class_counts"] = {}
+        out.append((task, entry))
+    return out
+
+
+def map_report_task(report_name: str) -> tuple[str, str | None]:
+    if report_name in REPORT_TASK_ALIASES:
+        return REPORT_TASK_ALIASES[report_name]
+    return report_name, None
+
+
+def task_defs_for_pending(reg: dict) -> dict[str, Any]:
+    """平台 pending API 的 task_defs。"""
+    defs: dict[str, Any] = {}
+    for task, tcfg in load_registry(reg).items():
+        if tcfg.get("type") == "multi":
+            defs[task] = {
+                "type": "multi",
+                "domain": tcfg.get("domain", "dms"),
+                "label": tcfg.get("label", task),
+                "modes": {
+                    m: {
+                        "type": mc.get("type"),
+                        "nc": mc.get("nc"),
+                        "names": mc.get("names"),
+                        "task_dir": f"{tcfg.get('task_dir', task)}/{mc.get('subdir', m)}",
+                    }
+                    for m, mc in (tcfg.get("modes") or {}).items()
+                },
+            }
+        else:
+            defs[task] = {
+                "type": tcfg.get("type"),
+                "domain": tcfg.get("domain", "dms"),
+                "label": tcfg.get("label", task),
+                "nc": tcfg.get("nc"),
+                "names": tcfg.get("names"),
+                "task_dir": tcfg.get("task_dir", task),
+            }
+    return defs
--- a/datasets/dms/scripts/test_export_ls_to_yolo.py
+++ b/datasets/dms/scripts/test_export_ls_to_yolo.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""export_ls_to_yolo 单元测试（无 pytest 依赖）。"""
+from __future__ import annotations
+
+import hashlib
+import json
+import sys
+import tempfile
+from pathlib import Path
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(SCRIPT_DIR))
+
+from export_ls_to_yolo import (  # noqa: E402
+    convert_regions_to_yolo_lines,
+    export_batch,
+)
+from ingest_incremental import validate_detect_label, validate_pose_label  # noqa: E402
+
+
+def _task_id(rel: str) -> str:
+    return hashlib.sha256(rel.encode()).hexdigest()[:16]
+
+
+def test_detect_conversion() -> None:
+    regions = [
+        {
+            "type": "rectanglelabels",
+            "value": {
+                "x": 10.0,
+                "y": 20.0,
+                "width": 30.0,
+                "height": 40.0,
+                "rectanglelabels": ["face"],
+            },
+        }
+    ]
+    lines = convert_regions_to_yolo_lines(
+        regions,
+        mode="detect",
+        class_map={"face": 0, "eye_open": 1},
+    )
+    assert len(lines) == 1
+    parts = lines[0].split()
+    assert len(parts) == 5
+    assert parts[0] == "0"
+    assert abs(float(parts[1]) - 0.25) < 1e-5  # cx = (10+15)/100
+    assert abs(float(parts[2]) - 0.40) < 1e-5  # cy = (20+20)/100
+    err = validate_detect_label("\n".join(lines), 4)
+    assert err is None, err
+
+
+def test_pose_conversion() -> None:
+    regions = [
+        {
+            "type": "rectanglelabels",
+            "value": {
+                "x": 10.0,
+                "y": 20.0,
+                "width": 30.0,
+                "height": 40.0,
+                "rectanglelabels": ["face"],
+            },
+        },
+        {
+            "type": "keypointlabels",
+            "value": {"x": 35.6, "y": 52.9, "width": 0.5, "keypointlabels": ["kp_01"]},
+        },
+        {
+            "type": "keypointlabels",
+            "value": {"x": 50.0, "y": 50.0, "width": 0.5, "keypointlabels": ["kp_10"]},
+        },
+    ]
+    kpt_map = {f"kp_{i:02d}": i for i in range(37)}
+    lines = convert_regions_to_yolo_lines(
+        regions,
+        mode="pose",
+        class_map={"face": 0},
+        kpt_map=kpt_map,
+        kpt_shape=[37, 3],
+    )
+    assert len(lines) == 1
+    parts = lines[0].split()
+    assert len(parts) == 116
+    assert parts[0] == "0"
+    # kp_01 at index 1 -> fields 5+3..5+5
+    assert abs(float(parts[8]) - 0.356) < 1e-3
+    assert abs(float(parts[9]) - 0.529) < 1e-3
+    assert parts[10] == "2.000000"
+    err = validate_pose_label("\n".join(lines), [37, 3])
+    assert err is None, err
+
+
+def test_export_batch_end_to_end() -> None:
+    with tempfile.TemporaryDirectory() as tmp:
+        batch = Path(tmp)
+        img_rel = "images/train/sample.jpg"
+        img_path = batch / img_rel
+        img_path.parent.mkdir(parents=True)
+        img_path.write_bytes(b"\xff\xd8\xff")
+
+        tid = _task_id(img_rel)
+        ann = {
+            "task_id": tid,
+            "result": [
+                {
+                    "type": "rectanglelabels",
+                    "value": {
+                        "x": 10.0,
+                        "y": 20.0,
+                        "width": 30.0,
+                        "height": 40.0,
+                        "rectanglelabels": ["face"],
+                    },
+                },
+                {
+                    "type": "keypointlabels",
+                    "value": {"x": 25.0, "y": 40.0, "width": 0.5, "keypointlabels": ["kp_00"]},
+                },
+            ],
+        }
+        ann_dir = batch / "labels" / "ls_annotations"
+        ann_dir.mkdir(parents=True)
+        (ann_dir / f"{tid}.json").write_text(json.dumps(ann), encoding="utf-8")
+
+        result = export_batch(batch, "addw_face", mode="pose")
+        assert result["written"] == 1
+        out = batch / "labels" / "train" / "sample.txt"
+        assert out.is_file()
+        parts = out.read_text().strip().split()
+        assert len(parts) == 116
+
+
+def main() -> int:
+    test_detect_conversion()
+    test_pose_conversion()
+    test_export_batch_end_to_end()
+    print("OK export_ls_to_yolo tests")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/datasets/dms/scripts/train.sh
+++ b/datasets/dms/scripts/train.sh
@@ -17,20 +17,21 @@ TASK="${1:?用法: $0 <task> [full|continue]}"
 TRAIN_MODE="${2:-full}"

 REG="$DATASET_ROOT/datasets.registry.yaml"
-YAML="$DATASET_ROOT/manifests/yaml_active/${TASK}.yaml"
 VERSIONS="$DATASET_ROOT/manifests/train_versions.yaml"
+SUBMODE="${SUBMODE:-}"

-if [[ ! -f "$YAML" ]]; then
-  echo "找不到 yaml: $YAML"
-  exit 1
-fi
-
-read -r TYPE MODE MODEL EPOCHS LR0 IMGSZ RUN_SUFFIX <<< "$(python3 - <<PY
-import yaml
+read -r YAML_KEY TYPE MODE MODEL EPOCHS LR0 IMGSZ RUN_SUFFIX <<< "$(python3 - <<PY
+import sys
 from pathlib import Path
+import yaml
+sys.path.insert(0, str(Path("$DATASET_ROOT/scripts")))
+from task_registry import get_mode_config, resolve_task_id, train_yaml_key
 reg = yaml.safe_load(Path("$REG").read_text())
-tcfg = reg["tasks"]["$TASK"]
-typ = tcfg["type"]
+task, sub = resolve_task_id("$TASK", "$SUBMODE" or None)
+mcfg = get_mode_config(task, sub, reg)
+typ = mcfg["type"]
+yaml_key = train_yaml_key(task, sub, reg)
+print(yaml_key, end=" ")
 train_mode = "$TRAIN_MODE" if "$TRAIN_MODE" in ("full", "continue") else reg.get("train", {}).get("mode", "full")
 t = reg.get("train", {}).get(typ, reg.get("train_defaults", {}).get(typ, {}))
 if train_mode == "continue":
@@ -49,6 +50,12 @@ print(typ, mode, model, epochs, lr0, imgsz, suffix)
 PY
 )"

+YAML="$DATASET_ROOT/manifests/yaml_active/${YAML_KEY}.yaml"
+if [[ ! -f "$YAML" ]]; then
+  echo "找不到 yaml: $YAML（请先 refresh_yaml.py）"
+  exit 1
+fi
+
 # continue 模式：warm_start 为空则读 train_versions.yaml
 if [[ "$TRAIN_MODE" == "continue" && ( "$MODEL" == "null" || "$MODEL" == "None" || -z "$MODEL" ) ]]; then
  MODEL=$(python3 - <<PY 2>/dev/null || true
@@ -57,7 +64,7 @@ from pathlib import Path
 p = Path("$VERSIONS")
 if p.is_file():
    v = yaml.safe_load(p.read_text()) or {}
-    c = v.get("$TASK", {}).get("current")
+    c = v.get("$YAML_KEY", {}).get("current")
    if c: print(c)
 PY
 )
@@ -68,9 +75,9 @@ if [[ "$TRAIN_MODE" == "continue" && ( -z "$MODEL" || "$MODEL" == "null" ) ]]; t
  exit 1
 fi

-RUN_NAME="${TASK}_${RUN_SUFFIX}_$(date +%Y%m%d)"
+RUN_NAME="${YAML_KEY}_${RUN_SUFFIX}_$(date +%Y%m%d)"

-echo "task=$TASK type=$TYPE yolo_mode=$MODE train_mode=$TRAIN_MODE"
+echo "task=$TASK submode=$SUBMODE yaml_key=$YAML_KEY type=$TYPE yolo_mode=$MODE train_mode=$TRAIN_MODE"
 echo "data=$YAML"
 echo "model=$MODEL epochs=$EPOCHS lr0=$LR0 imgsz=$IMGSZ name=$RUN_NAME"

@@ -93,4 +100,4 @@ yolo "$MODE" train \

 BEST="runs/${MODE}/${RUN_NAME}/weights/best.pt"
 echo "完成: $BEST"
-echo "请更新 manifests/train_versions.yaml 中 $TASK.current = $BEST"
+echo "请更新 manifests/train_versions.yaml 中 $YAML_KEY.current = $BEST"