feat: HSAP platform v2 — modular navigation, quality review, audit log, world model simulation

Major changes:
- New frontend (platform/web/): Vite + React 18 + TypeScript + Tailwind
- 4-module navigation: 数据送标 / 模型管理 / 车队管理 / 系统管理
- Data catalog with charts (DMS/ADAS/Lane 3-tab view)
- Quality review workflow (标注质检): Good/Fine/Bad scoring with auto-advance
- Audit enhancements: batch operations, rejection categories, Feishu notifications
- Operation audit log (操作日志)
- World model simulation studio (仿真工坊)
- Dataset version management with snapshots and diff
- ADAS 7-class dataset integration (138K images organized + compressed)
- User management with Feishu integration and pagination
- CRUD/search/filter on all pages, card layout redesign
- PIL-optimized image overlay rendering
- Auto-snapshot on build, in_review workflow stage
- Removed embedded algorithm code (now in workspace)
This commit is contained in:
2026-06-03 11:40:21 +08:00
parent 7c43b44c57
commit e72bc061c5
5487 changed files with 979207 additions and 6197 deletions

View File

@@ -0,0 +1,347 @@
#!/usr/bin/env python3
"""Label Studio ls_annotations JSON → YOLO detect / YOLO pose txt."""
from __future__ import annotations
import argparse
import hashlib
import json
import sys
from pathlib import Path
from typing import Any
import yaml
SCRIPT_DIR = Path(__file__).resolve().parent
DMS_ROOT = SCRIPT_DIR.parent
CONFIG_DIR = DMS_ROOT / "configs"
REGISTRY_PATH = DMS_ROOT / "datasets.registry.yaml"
KPT_ORDER_DIR = CONFIG_DIR / "keypoint_order"
IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp", ".JPG", ".JPEG", ".PNG"}
ANNOTATIONS_DIRNAME = "ls_annotations"
def _load_registry() -> dict[str, Any]:
return yaml.safe_load(REGISTRY_PATH.read_text(encoding="utf-8"))
def _resolve_task_config(task: str, mode: str | None = None) -> dict[str, Any]:
from task_registry import get_mode_config, resolve_task_id
reg = _load_registry()
task_r, mode_r = resolve_task_id(task, mode)
return get_mode_config(task_r, mode_r, reg)
def _class_name_to_id(names: list[str] | dict[int | str, str]) -> dict[str, int]:
if isinstance(names, dict):
return {str(v): int(k) for k, v in names.items()}
return {name: idx for idx, name in enumerate(names)}
def _load_kpt_label_map(task: str) -> dict[str, int]:
path = KPT_ORDER_DIR / f"{task}_37.yaml"
if not path.is_file():
path = KPT_ORDER_DIR / f"{task}.yaml"
if not path.is_file():
raise FileNotFoundError(f"keypoint manifest not found for task {task}")
data = yaml.safe_load(path.read_text(encoding="utf-8"))
out: dict[str, int] = {}
for item in data.get("keypoints") or []:
out[str(item["label"])] = int(item["id"])
return out
def _task_id_for_image(image_path: Path, batch_dir: Path) -> str:
try:
rel = image_path.relative_to(batch_dir)
stem = rel.as_posix()
except ValueError:
stem = image_path.stem
return hashlib.sha256(stem.encode()).hexdigest()[:16]
def _iter_batch_images(batch_dir: Path) -> list[Path]:
if not batch_dir.is_dir():
return []
candidates: list[Path] = []
search_roots = [
batch_dir / "images",
batch_dir / "images" / "train",
batch_dir,
]
seen: set[str] = set()
for root in search_roots:
if not root.is_dir():
continue
for p in sorted(root.rglob("*")):
if not p.is_file() or p.suffix not in IMG_EXTS:
continue
key = str(p.resolve())
if key in seen:
continue
seen.add(key)
candidates.append(p.resolve())
return candidates
def _label_out_path(image_path: Path, batch_dir: Path) -> Path:
try:
rel = image_path.relative_to(batch_dir)
except ValueError:
rel = Path(image_path.name)
parts = list(rel.parts)
if parts and parts[0] == "images":
parts = parts[1:]
if parts and parts[0] in ("train", "val", "test"):
split = parts[0]
name = Path(*parts[1:]).with_suffix(".txt")
return batch_dir / "labels" / split / name
name = Path(*parts).with_suffix(".txt")
return batch_dir / "labels" / name
def _extract_result_regions(data: dict[str, Any]) -> list[dict[str, Any]]:
result = data.get("result")
if isinstance(result, list) and result:
return result
annotations = data.get("annotations")
if isinstance(annotations, list) and annotations:
first = annotations[0]
if isinstance(first, dict) and isinstance(first.get("result"), list):
return first["result"]
return []
def _clamp01(v: float) -> float:
return max(0.0, min(1.0, v))
def _ls_rect_to_yolo_bbox(value: dict[str, Any]) -> tuple[float, float, float, float]:
x = float(value["x"])
y = float(value["y"])
w = float(value["width"])
h = float(value["height"])
cx = _clamp01((x + w / 2.0) / 100.0)
cy = _clamp01((y + h / 2.0) / 100.0)
nw = _clamp01(w / 100.0)
nh = _clamp01(h / 100.0)
return cx, cy, nw, nh
def _ls_point_to_yolo_xy(value: dict[str, Any]) -> tuple[float, float]:
return _clamp01(float(value["x"]) / 100.0), _clamp01(float(value["y"]) / 100.0)
def _bbox_center(bbox: tuple[float, float, float, float]) -> tuple[float, float]:
cx, cy, _, _ = bbox
return cx, cy
def _parse_rectangles(
regions: list[dict[str, Any]],
class_map: dict[str, int],
) -> list[dict[str, Any]]:
boxes: list[dict[str, Any]] = []
for region in regions:
if region.get("type") != "rectanglelabels":
continue
value = region.get("value") or {}
labels = value.get("rectanglelabels") or []
if not labels:
continue
label = str(labels[0])
if label not in class_map:
continue
bbox = _ls_rect_to_yolo_bbox(value)
boxes.append({"class_id": class_map[label], "bbox": bbox, "region_id": region.get("id")})
return boxes
def _parse_keypoints(
regions: list[dict[str, Any]],
kpt_map: dict[str, int],
) -> list[dict[str, Any]]:
points: list[dict[str, Any]] = []
for region in regions:
rtype = region.get("type")
if rtype not in ("keypointlabels", "keypoint"):
continue
value = region.get("value") or {}
labels = value.get("keypointlabels") or []
if not labels:
continue
label = str(labels[0])
if label not in kpt_map:
continue
x, y = _ls_point_to_yolo_xy(value)
points.append({"index": kpt_map[label], "x": x, "y": y, "region_id": region.get("id")})
return points
def _assign_keypoints_to_boxes(
boxes: list[dict[str, Any]],
points: list[dict[str, Any]],
) -> dict[int | None, list[dict[str, Any]]]:
if not boxes:
return {None: points}
if len(boxes) == 1:
return {0: points}
assigned: dict[int, list[dict[str, Any]]] = {i: [] for i in range(len(boxes))}
for pt in points:
best_i = 0
best_d = float("inf")
for i, box in enumerate(boxes):
cx, cy = _bbox_center(box["bbox"])
d = (pt["x"] - cx) ** 2 + (pt["y"] - cy) ** 2
if d < best_d:
best_d = d
best_i = i
assigned[best_i].append(pt)
return assigned
def _format_detect_line(class_id: int, bbox: tuple[float, float, float, float]) -> str:
cx, cy, w, h = bbox
return f"{class_id} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}"
def _format_pose_line(
class_id: int,
bbox: tuple[float, float, float, float],
points: list[dict[str, Any]],
nk: int,
) -> str:
slots: list[tuple[float, float, float]] = [(0.0, 0.0, 0.0)] * nk
for pt in points:
idx = int(pt["index"])
if 0 <= idx < nk:
slots[idx] = (pt["x"], pt["y"], 2.0)
parts = _format_detect_line(class_id, bbox).split()
for x, y, v in slots:
parts.extend([f"{x:.6f}", f"{y:.6f}", f"{v:.6f}"])
return " ".join(parts)
def convert_regions_to_yolo_lines(
regions: list[dict[str, Any]],
*,
mode: str,
class_map: dict[str, int],
kpt_map: dict[str, int] | None = None,
kpt_shape: list[int] | None = None,
) -> list[str]:
if mode == "detect":
lines = []
for box in _parse_rectangles(regions, class_map):
lines.append(_format_detect_line(box["class_id"], box["bbox"]))
return lines
if mode != "pose":
raise ValueError(f"unsupported mode: {mode}")
if not kpt_map or not kpt_shape:
raise ValueError("pose mode requires kpt_map and kpt_shape")
nk = int(kpt_shape[0])
boxes = _parse_rectangles(regions, class_map)
points = _parse_keypoints(regions, kpt_map)
if not boxes:
return []
assigned = _assign_keypoints_to_boxes(boxes, points)
lines: list[str] = []
for i, box in enumerate(boxes):
pts = assigned.get(i, [])
lines.append(_format_pose_line(box["class_id"], box["bbox"], pts, nk))
return lines
def export_batch(
batch_dir: Path,
task: str,
*,
mode: str,
task_mode: str | None = None,
out_subdir: str | None = None,
) -> dict[str, Any]:
batch_dir = batch_dir.resolve()
tcfg = _resolve_task_config(task, task_mode)
class_map = _class_name_to_id(tcfg.get("names") or {})
kpt_map: dict[str, int] | None = None
kpt_shape: list[int] | None = None
if mode == "pose":
kpt_map = _load_kpt_label_map(task)
kpt_shape = list(tcfg.get("kpt_shape") or [37, 3])
ann_dir = batch_dir / "labels" / ANNOTATIONS_DIRNAME
written = 0
skipped_empty = 0
missing_ann = 0
for image_path in _iter_batch_images(batch_dir):
task_id = _task_id_for_image(image_path, batch_dir)
ann_path = ann_dir / f"{task_id}.json"
if not ann_path.is_file():
missing_ann += 1
continue
data = json.loads(ann_path.read_text(encoding="utf-8"))
regions = _extract_result_regions(data)
if not regions:
skipped_empty += 1
continue
lines = convert_regions_to_yolo_lines(
regions,
mode=mode,
class_map=class_map,
kpt_map=kpt_map,
kpt_shape=kpt_shape,
)
if not lines:
skipped_empty += 1
continue
out_path = _label_out_path(image_path, batch_dir)
if out_subdir:
# 显式覆盖:相对 batch_dir 的子目录 + 文件名
out_path = batch_dir / out_subdir / f"{image_path.stem}.txt"
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
written += 1
return {
"ok": True,
"batch_dir": str(batch_dir),
"task": task,
"mode": mode,
"written": written,
"skipped_empty": skipped_empty,
"missing_ann": missing_ann,
"out_subdir": out_subdir or "auto",
}
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Export Label Studio annotations to YOLO txt")
parser.add_argument("--batch-dir", type=Path, required=True)
parser.add_argument("--task", required=True)
parser.add_argument("--mode", choices=("detect", "pose"), required=True)
parser.add_argument("--task-mode", default=None, help="dam batch_0516 / batch_0417 等")
parser.add_argument("--out-subdir", default="labels/train")
args = parser.parse_args(argv)
if str(SCRIPT_DIR) not in sys.path:
sys.path.insert(0, str(SCRIPT_DIR))
result = export_batch(
args.batch_dir,
args.task,
mode=args.mode,
task_mode=args.task_mode,
out_subdir=args.out_subdir,
)
print(json.dumps(result, ensure_ascii=False, indent=2))
return 0 if result["written"] > 0 else 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -236,7 +236,7 @@ def ingest_yolo(
if is_voc:
if tcfg["type"] != "detect":
raise SystemExit("VOC xml 仅支持 detect 任务dam / dam_0417")
raise SystemExit("VOC xml 仅支持 detect 任务dam 各批次等")
staging = data_root.parent / "_staging_voc" / task
staging_parent = staging.parent
if staging.exists() and not dry_run:
@@ -384,7 +384,11 @@ def ingest_one(
src: Path,
args: argparse.Namespace,
) -> dict:
tcfg = reg["tasks"][task]
from task_registry import get_mode_config, resolve_task_id
submode = getattr(args, "mode", None) or getattr(args, "submode", None)
task, submode = resolve_task_id(task, submode)
tcfg = get_mode_config(task, submode, reg)
pack = getattr(args, "pack", None) or "dms_v1"
data_root = pack_task_data_root(root, pack, tcfg["task_dir"])
sk = split_kwargs(reg, args)
@@ -449,23 +453,42 @@ def ingest_all_sources(root: Path, reg: dict, task: str, args: argparse.Namespac
def ingest_all_inbox(root: Path, reg: dict, args: argparse.Namespace) -> None:
from task_registry import inbox_dir
pack = getattr(args, "pack", None) or "dms_v1"
for task, tcfg in reg["tasks"].items():
inbox = root / tcfg.get("inbox", f"inbox/{task}")
if not inbox.is_dir():
continue
batches = sorted(d for d in inbox.iterdir() if d.is_dir())
if not batches:
continue
print(f"\n>>> inbox {task}: {len(batches)} batch(es)")
for batch in batches:
ingest_one(root, reg, task, batch, args)
if not args.dry_run:
append_log(root, {"src": str(batch), "task": task, "pack": pack, "via": "inbox"})
if tcfg.get("type") == "multi":
for mode in (tcfg.get("modes") or {}):
inbox = inbox_dir(root, task, mode, reg)
if not inbox.is_dir():
continue
batches = sorted(d for d in inbox.iterdir() if d.is_dir())
if not batches:
continue
print(f"\n>>> inbox {task}/{mode}: {len(batches)} batch(es)")
args.submode = mode
for batch in batches:
ingest_one(root, reg, task, batch, args)
if not args.dry_run:
append_log(root, {"src": str(batch), "task": task, "mode": mode, "pack": pack, "via": "inbox"})
else:
inbox = inbox_dir(root, task, None, reg)
if not inbox.is_dir():
continue
batches = sorted(d for d in inbox.iterdir() if d.is_dir())
if not batches:
continue
print(f"\n>>> inbox {task}: {len(batches)} batch(es)")
for batch in batches:
ingest_one(root, reg, task, batch, args)
if not args.dry_run:
append_log(root, {"src": str(batch), "task": task, "pack": pack, "via": "inbox"})
def main() -> None:
p = argparse.ArgumentParser(description="DMS 全任务增量接入")
p.add_argument("--task", help="registry 任务名;与 --all-inbox 二选一")
p.add_argument("--task", help="registry 任务名forward 等 multi 任务需配合 --submode")
p.add_argument("--submode", choices=("detect", "classify"), help="multi 任务子模式,如 forward 的 detect/classify")
p.add_argument("--src", type=Path, help="新数据目录")
p.add_argument("--all-inbox", action="store_true", help="处理所有 inbox/<task>/* 批次")
p.add_argument("--all-sources", action="store_true", help="处理任务 data/sources/* 下所有待合并批次")

View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
"""将 dam / dam_0417 合并为 dam/batch_0516、dam/batch_0417默认符号链接"""
from __future__ import annotations
import argparse
import shutil
from pathlib import Path
def link_or_move(src: Path, dst: Path, *, move: bool) -> None:
if not src.is_dir():
print(f" skip不存在: {src}")
return
if dst.exists():
print(f" 已存在: {dst}")
return
dst.parent.mkdir(parents=True, exist_ok=True)
if move:
shutil.move(str(src), str(dst))
print(f" moved {src} -> {dst}")
else:
dst.symlink_to(src.resolve())
print(f" symlink {dst} -> {src.resolve()}")
def migrate_pack(pack_dir: Path, *, move: bool) -> None:
dam_root = pack_dir / "dam"
dam_0417 = pack_dir / "dam_0417"
batch_0516 = dam_root / "batch_0516"
batch_0417 = dam_root / "batch_0417"
if batch_0516.exists() and batch_0417.exists():
print(" dam 已迁移,跳过")
return
# 当前 dam 为扁平 YOLO 布局images/labels 在根下)
if dam_root.is_dir() and (dam_root / "images").is_dir() and not batch_0516.exists():
stash = pack_dir / "_dam_stash_0516"
if stash.exists():
print(f" 清理旧 stash: {stash}")
if stash.is_symlink():
stash.unlink()
else:
shutil.rmtree(stash)
shutil.move(str(dam_root), str(stash))
dam_root.mkdir(parents=True)
link_or_move(stash, batch_0516, move=move)
if dam_0417.is_dir():
link_or_move(dam_0417, batch_0417, move=move)
def migrate_inbox(dms_root: Path, *, move: bool) -> None:
for old, new in (
("dam", "dam/batch_0516"),
("dam_0417", "dam/batch_0417"),
):
src = dms_root / "inbox" / old
dst = dms_root / "inbox" / new
if src.is_dir():
link_or_move(src, dst, move=move)
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--pack-dir", type=Path, required=True)
p.add_argument("--dms-root", type=Path, help="datasets/dms 根,迁移 inbox")
p.add_argument("--move", action="store_true")
args = p.parse_args()
migrate_pack(args.pack_dir.resolve(), move=args.move)
if args.dms_root:
migrate_inbox(args.dms_root.resolve(), move=args.move)
print("完成。请运行 refresh_yaml.py --task dam 并刷新 catalog。")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,57 @@
#!/usr/bin/env python3
"""将 isa / isa_class 目录迁入 forward/detect、forward/classify默认符号链接保留原数据"""
from __future__ import annotations
import argparse
import shutil
from pathlib import Path
def link_or_move(src: Path, dst: Path, *, move: bool) -> None:
if not src.is_dir():
print(f" skip不存在: {src}")
return
if dst.exists():
print(f" 已存在: {dst}")
return
dst.parent.mkdir(parents=True, exist_ok=True)
if move:
shutil.move(str(src), str(dst))
print(f" moved {src} -> {dst}")
else:
dst.symlink_to(src.resolve())
print(f" symlink {dst} -> {src.resolve()}")
def migrate_pack(pack_dir: Path, *, move: bool) -> None:
forward = pack_dir / "forward"
forward.mkdir(parents=True, exist_ok=True)
link_or_move(pack_dir / "isa", forward / "detect", move=move)
link_or_move(pack_dir / "isa_class", forward / "classify", move=move)
def migrate_inbox(dms_root: Path, *, move: bool) -> None:
for old, new in (
("isa", "forward/detect"),
("isa_class", "forward/classify"),
):
src = dms_root / "inbox" / old
dst = dms_root / "inbox" / new
if src.is_dir():
link_or_move(src, dst, move=move)
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--pack-dir", type=Path, required=True, help="如 .../packs/dms_v1")
p.add_argument("--dms-root", type=Path, help="datasets/dms 根,迁移 inbox")
p.add_argument("--move", action="store_true", help="移动而非符号链接")
args = p.parse_args()
migrate_pack(args.pack_dir.resolve(), move=args.move)
if args.dms_root:
migrate_inbox(args.dms_root.resolve(), move=args.move)
print("完成。请运行 refresh_yaml.py 并刷新平台 catalog。")
if __name__ == "__main__":
main()

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python3
"""按 workflow active_packs 生成 manifests/yaml_active/*.yaml可多包合并 train/val"""
from __future__ import annotations
import argparse
@@ -11,10 +10,8 @@ import yaml
SCRIPT_DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(SCRIPT_DIR))
from pack_registry import ( # noqa: E402
load_active_pack_names,
resolve_pack_dir,
)
from pack_registry import load_active_pack_names, resolve_pack_dir # noqa: E402
from task_registry import get_mode_config, load_registry, train_yaml_key # noqa: E402
def fmt_names(names) -> str:
@@ -39,13 +36,13 @@ def pack_task_root(root: Path, pack_name: str, task_dir: str) -> Path:
def build_detect_pose_yaml(
task: str,
tcfg: dict,
yaml_key: str,
mcfg: dict,
root: Path,
pack_names: list[str],
typ: str,
) -> str:
task_dir = tcfg["task_dir"]
task_dir = mcfg["task_dir"]
bases = []
train_paths = []
val_paths = []
@@ -59,28 +56,28 @@ def build_detect_pose_yaml(
val_paths.append(str((base / "images" / "val").resolve()))
if not bases:
raise SystemExit(f"{task}: 无可用数据包目录")
raise SystemExit(f"{yaml_key}: 无可用数据包目录")
lines = [
f"# {task} — packs: {', '.join(pack_names)}",
f"# {yaml_key} — packs: {', '.join(pack_names)}",
f"path: {bases[0]}",
yaml_list("train", train_paths),
yaml_list("val", val_paths),
"",
]
if typ == "pose":
lines.insert(4, f"kpt_shape: {tcfg.get('kpt_shape', [37, 3])}")
lines.insert(4, f"kpt_shape: {mcfg.get('kpt_shape', [37, 3])}")
else:
lines.extend([f"nc: {tcfg['nc']}", fmt_names(tcfg["names"]), ""])
lines.extend([f"nc: {mcfg['nc']}", fmt_names(mcfg["names"]), ""])
return "\n".join(lines)
def build_classify_yaml(task: str, tcfg: dict, root: Path, pack_names: list[str]) -> str:
task_dir = tcfg["task_dir"]
def build_classify_yaml(yaml_key: str, mcfg: dict, root: Path, pack_names: list[str]) -> str:
task_dir = mcfg["task_dir"]
if len(pack_names) > 1:
print(f" warn {task}: classify 暂用首个包 {pack_names[0]}(多包请先合并目录)")
print(f" warn {yaml_key}: classify 暂用首个包 {pack_names[0]}(多包请先合并目录)")
base = pack_task_root(root, pack_names[0], task_dir)
return f"""# {task} — pack: {pack_names[0]}
return f"""# {yaml_key} — pack: {pack_names[0]}
path: {base.resolve()}
train: train
val: val
@@ -88,39 +85,49 @@ test: test
"""
def iter_yaml_jobs(reg: dict, only_task: str | None = None):
tasks = load_registry(reg)
if only_task:
if only_task not in tasks:
raise SystemExit(f"未知 task: {only_task}")
tasks = {only_task: tasks[only_task]}
for task, tcfg in tasks.items():
if tcfg.get("type") == "multi":
for mode in (tcfg.get("modes") or {}):
mcfg = get_mode_config(task, mode, reg)
key = train_yaml_key(task, mode, reg)
yield key, mcfg
else:
yield task, tcfg
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--root", type=Path, default=SCRIPT_DIR.parent)
p.add_argument("--packs", help="逗号分隔,覆盖 workflow active_packs")
p.add_argument("--task", help="只生成某一任务")
p.add_argument("--task", help="只生成某一任务multi 会生成全部 mode")
args = p.parse_args()
root = args.root.resolve()
reg = yaml.safe_load((root / "datasets.registry.yaml").read_text(encoding="utf-8"))
cli = [x.strip() for x in args.packs.split(",")] if args.packs else None
pack_names = load_active_pack_names(root, cli)
if not pack_names:
raise SystemExit("active_packs 为空,请编辑 ML/workflow.registry.yaml 或 --packs")
raise SystemExit("active_packs 为空,请编辑 workflow.registry.yaml 或 --packs")
out_dir = root / "manifests" / "yaml_active"
out_dir.mkdir(parents=True, exist_ok=True)
print(f"active_packs: {pack_names}")
tasks = reg["tasks"]
if args.task:
if args.task not in tasks:
raise SystemExit(f"未知 task: {args.task}")
tasks = {args.task: tasks[args.task]}
for task, tcfg in tasks.items():
typ = tcfg["type"]
for yaml_key, mcfg in iter_yaml_jobs(reg, args.task):
typ = mcfg["type"]
if typ in ("detect", "pose"):
content = build_detect_pose_yaml(task, tcfg, root, pack_names, typ)
content = build_detect_pose_yaml(yaml_key, mcfg, root, pack_names, typ)
elif typ == "classify":
content = build_classify_yaml(task, tcfg, root, pack_names)
content = build_classify_yaml(yaml_key, mcfg, root, pack_names)
else:
print(f" skip {task}: type {typ}")
print(f" skip {yaml_key}: type {typ}")
continue
out = out_dir / f"{task}.yaml"
out = out_dir / f"{yaml_key}.yaml"
out.write_text(content, encoding="utf-8")
print(f" wrote {out.relative_to(root)}")

View File

@@ -0,0 +1,166 @@
"""DMS 任务注册表domain 分组、multi 任务(前向 detect+classify、旧 ID 别名。"""
from __future__ import annotations
from typing import Any
DOMAIN_LABELS = {
"dms": "舱内 DMS",
"forward": "前向 ADAS",
}
# 报表 / 旧目录名 -> (task, mode)
REPORT_TASK_ALIASES: dict[str, tuple[str, str | None]] = {
"isa": ("forward", "detect"),
"isa_detect": ("forward", "detect"),
"isa_class": ("forward", "classify"),
"isa_class_0116": ("forward", "classify"),
"dam_0417": ("dam", "batch_0417"),
}
LEGACY_TASK_ALIASES: dict[str, tuple[str, str | None]] = {
"isa": ("forward", "detect"),
"isa_class": ("forward", "classify"),
"dam_0417": ("dam", "batch_0417"),
}
def load_registry(reg: dict) -> dict[str, Any]:
return reg.get("tasks") or {}
def resolve_task_id(task: str, mode: str | None = None) -> tuple[str, str | None]:
"""用户/历史 task ID -> (canonical_task, mode)。"""
if task in LEGACY_TASK_ALIASES:
t, m = LEGACY_TASK_ALIASES[task]
return t, mode or m
return task, mode
def report_task_key(task: str, mode: str | None = None) -> str:
"""catalog 报表 CSV 中的任务列名。"""
t, m = resolve_task_id(task, mode)
if t == "forward" and m == "detect":
return "isa"
if t == "forward" and m == "classify":
return "isa_class"
if t == "dam" and m == "batch_0516":
return "dam"
if t == "dam" and m == "batch_0417":
return "dam_0417"
return task
def train_yaml_key(task: str, mode: str | None, reg: dict) -> str:
"""manifests/yaml_active 与 train.sh 使用的文件名(不含 .yaml"""
task, mode = resolve_task_id(task, mode)
tcfg = load_registry(reg)[task]
if tcfg.get("type") == "multi":
if not mode:
raise ValueError(f"任务 {task} 需指定 modedetect / classify")
return f"{task}__{mode}"
return task
def get_mode_config(task: str, mode: str | None, reg: dict) -> dict[str, Any]:
task, mode = resolve_task_id(task, mode)
tcfg = load_registry(reg)[task]
if tcfg.get("type") != "multi":
return {**tcfg, "task": task, "mode": None}
modes = tcfg.get("modes") or {}
if not mode:
raise ValueError(f"任务 {task} 需指定 mode")
if mode not in modes:
raise ValueError(f"未知 mode: {task}/{mode}")
mcfg = dict(modes[mode])
mcfg["task"] = task
mcfg["mode"] = mode
mcfg["task_dir"] = f"{tcfg.get('task_dir', task)}/{mcfg.get('subdir', mode)}"
mcfg["domain"] = tcfg.get("domain")
mcfg["label"] = mcfg.get("label") or tcfg.get("label")
return mcfg
def task_data_dir(pack_dir, task: str, mode: str | None, reg: dict):
from pathlib import Path
mcfg = get_mode_config(task, mode, reg)
return Path(pack_dir) / mcfg["task_dir"]
def inbox_dir(root, task: str, mode: str | None, reg: dict):
from pathlib import Path
task, mode = resolve_task_id(task, mode)
tcfg = load_registry(reg)[task]
if tcfg.get("type") == "multi":
mcfg = tcfg["modes"][mode or ""]
rel = mcfg.get("inbox") or f"inbox/{task}/{mode}"
return Path(root) / rel
return Path(root) / (tcfg.get("inbox") or f"inbox/{task}")
def iter_catalog_tasks(reg: dict) -> list[tuple[str, dict[str, Any]]]:
"""catalog 顶层任务列表。"""
out: list[tuple[str, dict[str, Any]]] = []
for task, tcfg in load_registry(reg).items():
entry = {
"domain": tcfg.get("domain", "dms"),
"domain_label": DOMAIN_LABELS.get(tcfg.get("domain", "dms"), tcfg.get("domain", "dms")),
"label": tcfg.get("label", task),
"type": tcfg.get("type"),
}
if tcfg.get("type") == "multi":
entry["modes"] = {}
for mode, mcfg in (tcfg.get("modes") or {}).items():
entry["modes"][mode] = {
"label": mcfg.get("label", mode),
"type": mcfg.get("type"),
"nc": mcfg.get("nc"),
"names": mcfg.get("names"),
"packs": [],
"class_counts": {},
}
else:
entry["nc"] = tcfg.get("nc")
entry["names"] = tcfg.get("names")
entry["packs"] = []
entry["class_counts"] = {}
out.append((task, entry))
return out
def map_report_task(report_name: str) -> tuple[str, str | None]:
if report_name in REPORT_TASK_ALIASES:
return REPORT_TASK_ALIASES[report_name]
return report_name, None
def task_defs_for_pending(reg: dict) -> dict[str, Any]:
"""平台 pending API 的 task_defs。"""
defs: dict[str, Any] = {}
for task, tcfg in load_registry(reg).items():
if tcfg.get("type") == "multi":
defs[task] = {
"type": "multi",
"domain": tcfg.get("domain", "dms"),
"label": tcfg.get("label", task),
"modes": {
m: {
"type": mc.get("type"),
"nc": mc.get("nc"),
"names": mc.get("names"),
"task_dir": f"{tcfg.get('task_dir', task)}/{mc.get('subdir', m)}",
}
for m, mc in (tcfg.get("modes") or {}).items()
},
}
else:
defs[task] = {
"type": tcfg.get("type"),
"domain": tcfg.get("domain", "dms"),
"label": tcfg.get("label", task),
"nc": tcfg.get("nc"),
"names": tcfg.get("names"),
"task_dir": tcfg.get("task_dir", task),
}
return defs

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env python3
"""export_ls_to_yolo 单元测试(无 pytest 依赖)。"""
from __future__ import annotations
import hashlib
import json
import sys
import tempfile
from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(SCRIPT_DIR))
from export_ls_to_yolo import ( # noqa: E402
convert_regions_to_yolo_lines,
export_batch,
)
from ingest_incremental import validate_detect_label, validate_pose_label # noqa: E402
def _task_id(rel: str) -> str:
return hashlib.sha256(rel.encode()).hexdigest()[:16]
def test_detect_conversion() -> None:
regions = [
{
"type": "rectanglelabels",
"value": {
"x": 10.0,
"y": 20.0,
"width": 30.0,
"height": 40.0,
"rectanglelabels": ["face"],
},
}
]
lines = convert_regions_to_yolo_lines(
regions,
mode="detect",
class_map={"face": 0, "eye_open": 1},
)
assert len(lines) == 1
parts = lines[0].split()
assert len(parts) == 5
assert parts[0] == "0"
assert abs(float(parts[1]) - 0.25) < 1e-5 # cx = (10+15)/100
assert abs(float(parts[2]) - 0.40) < 1e-5 # cy = (20+20)/100
err = validate_detect_label("\n".join(lines), 4)
assert err is None, err
def test_pose_conversion() -> None:
regions = [
{
"type": "rectanglelabels",
"value": {
"x": 10.0,
"y": 20.0,
"width": 30.0,
"height": 40.0,
"rectanglelabels": ["face"],
},
},
{
"type": "keypointlabels",
"value": {"x": 35.6, "y": 52.9, "width": 0.5, "keypointlabels": ["kp_01"]},
},
{
"type": "keypointlabels",
"value": {"x": 50.0, "y": 50.0, "width": 0.5, "keypointlabels": ["kp_10"]},
},
]
kpt_map = {f"kp_{i:02d}": i for i in range(37)}
lines = convert_regions_to_yolo_lines(
regions,
mode="pose",
class_map={"face": 0},
kpt_map=kpt_map,
kpt_shape=[37, 3],
)
assert len(lines) == 1
parts = lines[0].split()
assert len(parts) == 116
assert parts[0] == "0"
# kp_01 at index 1 -> fields 5+3..5+5
assert abs(float(parts[8]) - 0.356) < 1e-3
assert abs(float(parts[9]) - 0.529) < 1e-3
assert parts[10] == "2.000000"
err = validate_pose_label("\n".join(lines), [37, 3])
assert err is None, err
def test_export_batch_end_to_end() -> None:
with tempfile.TemporaryDirectory() as tmp:
batch = Path(tmp)
img_rel = "images/train/sample.jpg"
img_path = batch / img_rel
img_path.parent.mkdir(parents=True)
img_path.write_bytes(b"\xff\xd8\xff")
tid = _task_id(img_rel)
ann = {
"task_id": tid,
"result": [
{
"type": "rectanglelabels",
"value": {
"x": 10.0,
"y": 20.0,
"width": 30.0,
"height": 40.0,
"rectanglelabels": ["face"],
},
},
{
"type": "keypointlabels",
"value": {"x": 25.0, "y": 40.0, "width": 0.5, "keypointlabels": ["kp_00"]},
},
],
}
ann_dir = batch / "labels" / "ls_annotations"
ann_dir.mkdir(parents=True)
(ann_dir / f"{tid}.json").write_text(json.dumps(ann), encoding="utf-8")
result = export_batch(batch, "addw_face", mode="pose")
assert result["written"] == 1
out = batch / "labels" / "train" / "sample.txt"
assert out.is_file()
parts = out.read_text().strip().split()
assert len(parts) == 116
def main() -> int:
test_detect_conversion()
test_pose_conversion()
test_export_batch_end_to_end()
print("OK export_ls_to_yolo tests")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -17,20 +17,21 @@ TASK="${1:?用法: $0 <task> [full|continue]}"
TRAIN_MODE="${2:-full}"
REG="$DATASET_ROOT/datasets.registry.yaml"
YAML="$DATASET_ROOT/manifests/yaml_active/${TASK}.yaml"
VERSIONS="$DATASET_ROOT/manifests/train_versions.yaml"
SUBMODE="${SUBMODE:-}"
if [[ ! -f "$YAML" ]]; then
echo "找不到 yaml: $YAML"
exit 1
fi
read -r TYPE MODE MODEL EPOCHS LR0 IMGSZ RUN_SUFFIX <<< "$(python3 - <<PY
import yaml
read -r YAML_KEY TYPE MODE MODEL EPOCHS LR0 IMGSZ RUN_SUFFIX <<< "$(python3 - <<PY
import sys
from pathlib import Path
import yaml
sys.path.insert(0, str(Path("$DATASET_ROOT/scripts")))
from task_registry import get_mode_config, resolve_task_id, train_yaml_key
reg = yaml.safe_load(Path("$REG").read_text())
tcfg = reg["tasks"]["$TASK"]
typ = tcfg["type"]
task, sub = resolve_task_id("$TASK", "$SUBMODE" or None)
mcfg = get_mode_config(task, sub, reg)
typ = mcfg["type"]
yaml_key = train_yaml_key(task, sub, reg)
print(yaml_key, end=" ")
train_mode = "$TRAIN_MODE" if "$TRAIN_MODE" in ("full", "continue") else reg.get("train", {}).get("mode", "full")
t = reg.get("train", {}).get(typ, reg.get("train_defaults", {}).get(typ, {}))
if train_mode == "continue":
@@ -49,6 +50,12 @@ print(typ, mode, model, epochs, lr0, imgsz, suffix)
PY
)"
YAML="$DATASET_ROOT/manifests/yaml_active/${YAML_KEY}.yaml"
if [[ ! -f "$YAML" ]]; then
echo "找不到 yaml: $YAML(请先 refresh_yaml.py"
exit 1
fi
# continue 模式warm_start 为空则读 train_versions.yaml
if [[ "$TRAIN_MODE" == "continue" && ( "$MODEL" == "null" || "$MODEL" == "None" || -z "$MODEL" ) ]]; then
MODEL=$(python3 - <<PY 2>/dev/null || true
@@ -57,7 +64,7 @@ from pathlib import Path
p = Path("$VERSIONS")
if p.is_file():
v = yaml.safe_load(p.read_text()) or {}
c = v.get("$TASK", {}).get("current")
c = v.get("$YAML_KEY", {}).get("current")
if c: print(c)
PY
)
@@ -68,9 +75,9 @@ if [[ "$TRAIN_MODE" == "continue" && ( -z "$MODEL" || "$MODEL" == "null" ) ]]; t
exit 1
fi
RUN_NAME="${TASK}_${RUN_SUFFIX}_$(date +%Y%m%d)"
RUN_NAME="${YAML_KEY}_${RUN_SUFFIX}_$(date +%Y%m%d)"
echo "task=$TASK type=$TYPE yolo_mode=$MODE train_mode=$TRAIN_MODE"
echo "task=$TASK submode=$SUBMODE yaml_key=$YAML_KEY type=$TYPE yolo_mode=$MODE train_mode=$TRAIN_MODE"
echo "data=$YAML"
echo "model=$MODEL epochs=$EPOCHS lr0=$LR0 imgsz=$IMGSZ name=$RUN_NAME"
@@ -93,4 +100,4 @@ yolo "$MODE" train \
BEST="runs/${MODE}/${RUN_NAME}/weights/best.pt"
echo "完成: $BEST"
echo "请更新 manifests/train_versions.yaml 中 $TASK.current = $BEST"
echo "请更新 manifests/train_versions.yaml 中 $YAML_KEY.current = $BEST"