Major changes: - New frontend (platform/web/): Vite + React 18 + TypeScript + Tailwind - 4-module navigation: 数据送标 / 模型管理 / 车队管理 / 系统管理 - Data catalog with charts (DMS/ADAS/Lane 3-tab view) - Quality review workflow (标注质检): Good/Fine/Bad scoring with auto-advance - Audit enhancements: batch operations, rejection categories, Feishu notifications - Operation audit log (操作日志) - World model simulation studio (仿真工坊) - Dataset version management with snapshots and diff - ADAS 7-class dataset integration (138K images organized + compressed) - User management with Feishu integration and pagination - CRUD/search/filter on all pages, card layout redesign - PIL-optimized image overlay rendering - Auto-snapshot on build, in_review workflow stage - Removed embedded algorithm code (now in workspace)
383 lines
13 KiB
Python
383 lines
13 KiB
Python
"""审核单关联的送标/回传数据:解析范围、列举图像、渲染 GT 叠加。"""
|
||
from __future__ import annotations
|
||
|
||
import hashlib
|
||
import io
|
||
from dataclasses import dataclass
|
||
from pathlib import Path
|
||
from typing import Any, Sequence
|
||
|
||
import yaml
|
||
from PIL import Image, ImageDraw, ImageFont
|
||
|
||
from as_platform.data.batch import IMG_EXTS
|
||
from as_platform.data.core import load_wf, proj_root, resolve_pack_dir
|
||
|
||
IMAGE_EXTS = tuple(ext.lower() for ext in IMG_EXTS) + tuple(ext.upper() for ext in IMG_EXTS if ext.islower())
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class ImageRef:
|
||
image_path: Path
|
||
label_path: Path | None
|
||
batch: str
|
||
location: str
|
||
split: str
|
||
|
||
@property
|
||
def id(self) -> str:
|
||
key = f"{self.image_path}|{self.label_path or ''}"
|
||
return hashlib.sha256(key.encode()).hexdigest()[:16]
|
||
|
||
|
||
def _find_image(images_dir: Path, stem: str) -> Path | None:
|
||
for ext in IMAGE_EXTS:
|
||
p = images_dir / f"{stem}{ext}"
|
||
if p.is_file():
|
||
return p
|
||
return None
|
||
|
||
|
||
def _parse_yolo_line(line: str) -> dict[str, Any] | None:
|
||
parts = line.strip().split()
|
||
if len(parts) < 5:
|
||
return None
|
||
try:
|
||
class_id = int(float(parts[0]))
|
||
cx, cy, w, h = map(float, parts[1:5])
|
||
except Exception:
|
||
return None
|
||
keypoints: list[tuple[float, float, float]] = []
|
||
rest = parts[5:]
|
||
if len(rest) >= 3:
|
||
n = len(rest) // 3
|
||
for i in range(n):
|
||
keypoints.append((float(rest[i * 3]), float(rest[i * 3 + 1]), float(rest[i * 3 + 2])))
|
||
return {"class_id": class_id, "bbox": (cx, cy, w, h), "keypoints": keypoints}
|
||
|
||
|
||
def parse_label_file(label_path: Path) -> list[dict[str, Any]]:
|
||
if not label_path.is_file():
|
||
return []
|
||
out: list[dict[str, Any]] = []
|
||
for raw in label_path.read_text(encoding="utf-8", errors="ignore").splitlines():
|
||
parsed = _parse_yolo_line(raw)
|
||
if parsed is not None:
|
||
out.append(parsed)
|
||
return out
|
||
|
||
|
||
def _load_font(size: int) -> ImageFont.ImageFont:
|
||
for p in (
|
||
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
|
||
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
|
||
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
||
):
|
||
if Path(p).exists():
|
||
try:
|
||
return ImageFont.truetype(p, size)
|
||
except Exception:
|
||
continue
|
||
return ImageFont.load_default()
|
||
|
||
|
||
def _yolo_bbox_to_xyxy(bbox: tuple[float, float, float, float], width: int, height: int) -> tuple[int, int, int, int]:
|
||
cx, cy, bw, bh = bbox
|
||
x1 = int((cx - bw / 2.0) * width)
|
||
y1 = int((cy - bh / 2.0) * height)
|
||
x2 = int((cx + bw / 2.0) * width)
|
||
y2 = int((cy + bh / 2.0) * height)
|
||
return (
|
||
max(0, min(width - 1, x1)),
|
||
max(0, min(height - 1, y1)),
|
||
max(0, min(width - 1, x2)),
|
||
max(0, min(height - 1, y2)),
|
||
)
|
||
|
||
|
||
def render_overlay(
|
||
image_path: Path,
|
||
label_path: Path | None,
|
||
class_names: dict[int, str],
|
||
*,
|
||
max_size: int | None = None,
|
||
) -> bytes:
|
||
with Image.open(image_path) as im:
|
||
base = im.convert("RGB")
|
||
if max_size and max(base.size) > max_size:
|
||
base.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
|
||
|
||
over = base.copy()
|
||
draw = ImageDraw.Draw(over)
|
||
w, h = over.size
|
||
font = _load_font(max(12, min(18, w // 40)))
|
||
anns = parse_label_file(label_path) if label_path else []
|
||
|
||
palette = [
|
||
(220, 20, 60),
|
||
(30, 144, 255),
|
||
(50, 205, 50),
|
||
(255, 165, 0),
|
||
(186, 85, 211),
|
||
(0, 206, 209),
|
||
]
|
||
for ann in anns:
|
||
cid = ann["class_id"]
|
||
color = palette[cid % len(palette)]
|
||
x1, y1, x2, y2 = _yolo_bbox_to_xyxy(ann["bbox"], w, h)
|
||
draw.rectangle((x1, y1, x2, y2), outline=color, width=max(2, w // 320))
|
||
label = class_names.get(cid, f"class_{cid}")
|
||
draw.text((x1 + 2, max(0, y1 - 16)), label, fill=color, font=font)
|
||
for kx, ky, kv in ann.get("keypoints") or []:
|
||
if kv <= 0:
|
||
continue
|
||
px, py = int(kx * w), int(ky * h)
|
||
r = max(2, w // 400)
|
||
draw.ellipse((px - r, py - r, px + r, py + r), outline=color, fill=color)
|
||
|
||
buf = io.BytesIO()
|
||
over.save(buf, format="JPEG", quality=88)
|
||
return buf.getvalue()
|
||
|
||
|
||
def _collect_from_split(batch_dir: Path, split: str, *, batch: str, location: str) -> list[ImageRef]:
|
||
if split:
|
||
images_dir = batch_dir / "images" / split
|
||
labels_dir = batch_dir / "labels" / split
|
||
else:
|
||
images_dir = batch_dir / "images"
|
||
labels_dir = batch_dir / "labels"
|
||
|
||
out: list[ImageRef] = []
|
||
if not images_dir.is_dir():
|
||
return out
|
||
|
||
label_stems: dict[str, Path] = {}
|
||
if labels_dir.is_dir():
|
||
for lp in labels_dir.glob("*.txt"):
|
||
label_stems[lp.stem] = lp
|
||
|
||
seen: set[str] = set()
|
||
for p in sorted(images_dir.iterdir()):
|
||
if not p.is_file() or p.suffix.lower() not in {e.lower() for e in IMG_EXTS}:
|
||
continue
|
||
stem = p.stem
|
||
seen.add(stem)
|
||
out.append(
|
||
ImageRef(
|
||
image_path=p.resolve(),
|
||
label_path=label_stems.get(stem),
|
||
batch=batch,
|
||
location=location,
|
||
split=split or "root",
|
||
)
|
||
)
|
||
|
||
for stem, lp in sorted(label_stems.items()):
|
||
if stem in seen:
|
||
continue
|
||
img = _find_image(images_dir, stem)
|
||
if img:
|
||
out.append(
|
||
ImageRef(
|
||
image_path=img.resolve(),
|
||
label_path=lp.resolve(),
|
||
batch=batch,
|
||
location=location,
|
||
split=split or "root",
|
||
)
|
||
)
|
||
return out
|
||
|
||
|
||
def collect_batch_images(batch_dir: Path, *, batch: str, location: str) -> list[ImageRef]:
|
||
if not batch_dir.is_dir():
|
||
return []
|
||
refs: list[ImageRef] = []
|
||
for split in ("train", "val", "test", ""):
|
||
refs.extend(_collect_from_split(batch_dir, split, batch=batch, location=location))
|
||
# 去重(flat 与 train 可能重叠)
|
||
dedup: dict[str, ImageRef] = {}
|
||
for ref in refs:
|
||
dedup[str(ref.image_path)] = ref
|
||
return sorted(dedup.values(), key=lambda r: (r.batch, r.split, r.image_path.name))
|
||
|
||
|
||
def _dms_task_cfg(root: Path, wf: dict, task: str) -> tuple[dict, dict]:
|
||
reg_path = root / wf["projects"]["dms"]["registry"]
|
||
reg = yaml.safe_load(reg_path.read_text(encoding="utf-8"))
|
||
if task not in reg.get("tasks", {}):
|
||
raise ValueError(f"未知 task: {task}")
|
||
return reg, reg["tasks"][task]
|
||
|
||
|
||
def resolve_approval_scope(action: str, params: dict[str, Any]) -> dict[str, Any]:
|
||
"""解析审核单对应的数据目录与类别名。"""
|
||
p = params or {}
|
||
wf = load_wf()
|
||
|
||
if action in ("build_dms", "register_batch"):
|
||
task = p.get("task")
|
||
if not task:
|
||
raise ValueError("缺少 task 参数")
|
||
root = proj_root(wf, "dms")
|
||
reg, tcfg = _dms_task_cfg(root, wf, task)
|
||
src_sub = (reg.get("ingest") or {}).get("sources_subdir", "sources")
|
||
pack = p.get("pack") or "dms_v2"
|
||
batches: list[dict[str, Any]] = []
|
||
|
||
location = p.get("location", "inbox")
|
||
if action == "build_dms" and p.get("all_sources"):
|
||
location = "sources"
|
||
if action == "build_dms" and p.get("batch") and not p.get("all_sources"):
|
||
location = "inbox"
|
||
|
||
if location == "inbox":
|
||
batch_name = p.get("batch")
|
||
if batch_name:
|
||
batches.append({"path": root / "inbox" / task / batch_name, "batch": batch_name, "location": "inbox"})
|
||
else:
|
||
ib = root / "inbox" / task
|
||
if ib.is_dir():
|
||
for d in sorted(ib.iterdir()):
|
||
if d.is_dir() and not d.name.startswith("."):
|
||
batches.append({"path": d, "batch": d.name, "location": "inbox"})
|
||
else:
|
||
pack_dir = resolve_pack_dir("dms", root, wf, pack)
|
||
src_root = pack_dir / tcfg.get("task_dir", task) / src_sub
|
||
batch_name = p.get("batch")
|
||
if batch_name:
|
||
batches.append({"path": src_root / batch_name, "batch": batch_name, "location": "sources"})
|
||
elif src_root.is_dir():
|
||
for d in sorted(src_root.iterdir()):
|
||
if d.is_dir() and d.name not in ("_ingested", "_merged") and not d.name.startswith("."):
|
||
batches.append({"path": d, "batch": d.name, "location": "sources"})
|
||
|
||
names = tcfg.get("names") or {}
|
||
class_names = {int(k): v for k, v in names.items()} if isinstance(names, dict) else {i: n for i, n in enumerate(names)}
|
||
return {
|
||
"project": "dms",
|
||
"task": task,
|
||
"pack": pack,
|
||
"scope_label": f"DMS · {task} · {pack}" + (" · 全部 sources" if location == "sources" and not p.get("batch") else ""),
|
||
"class_names": class_names,
|
||
"batches": batches,
|
||
}
|
||
|
||
if action == "delivery_ingest":
|
||
data_path = (p.get("data_path") or "").strip()
|
||
if not data_path:
|
||
raise ValueError("缺少 data_path 参数")
|
||
src = Path(data_path)
|
||
project = p.get("project") or "dms"
|
||
task = p.get("task") or ""
|
||
batch_name = p.get("batch_name") or src.name
|
||
scope_label = f"数据送标入湖 · {project}"
|
||
if task:
|
||
scope_label += f" · {task}"
|
||
scope_label += f" · {batch_name}"
|
||
return {
|
||
"project": project,
|
||
"task": task or None,
|
||
"pack": None,
|
||
"scope_label": scope_label,
|
||
"class_names": {},
|
||
"batches": [
|
||
{
|
||
"path": src,
|
||
"batch": batch_name,
|
||
"location": "delivery",
|
||
}
|
||
],
|
||
}
|
||
|
||
if action in ("train_dms", "promote_dms", "eval_dms"):
|
||
task = p.get("task")
|
||
if not task:
|
||
raise ValueError("缺少 task 参数")
|
||
root = proj_root(wf, "dms")
|
||
_, tcfg = _dms_task_cfg(root, wf, task)
|
||
pack = p.get("pack") or "dms_v2"
|
||
pack_dir = resolve_pack_dir("dms", root, wf, pack)
|
||
task_dir = pack_dir / tcfg.get("task_dir", task)
|
||
batches = [{"path": task_dir, "batch": f"{pack}/{task}", "location": "pack"}]
|
||
names = tcfg.get("names") or {}
|
||
class_names = {int(k): v for k, v in names.items()} if isinstance(names, dict) else {i: n for i, n in enumerate(names)}
|
||
label = "模型晋级" if action == "promote_dms" else ("评估" if action == "eval_dms" else "训练")
|
||
return {
|
||
"project": "dms",
|
||
"task": task,
|
||
"pack": pack,
|
||
"scope_label": f"DMS · {task} · {pack} · pack 数据({label})",
|
||
"class_names": class_names,
|
||
"batches": batches,
|
||
}
|
||
|
||
raise ValueError(f"暂不支持预览的动作: {action}")
|
||
|
||
|
||
def list_scope_images(scope: dict[str, Any], *, offset: int = 0, limit: int = 60) -> dict[str, Any]:
|
||
all_refs: list[ImageRef] = []
|
||
for b in scope.get("batches") or []:
|
||
batch_dir = Path(b["path"])
|
||
all_refs.extend(
|
||
collect_batch_images(batch_dir, batch=b.get("batch", batch_dir.name), location=b.get("location", ""))
|
||
)
|
||
|
||
dedup: dict[str, ImageRef] = {str(r.image_path): r for r in all_refs}
|
||
ordered = sorted(dedup.values(), key=lambda r: (r.batch, r.split, r.image_path.name))
|
||
total = len(ordered)
|
||
page = ordered[offset : offset + limit]
|
||
|
||
items = []
|
||
for ref in page:
|
||
anns = parse_label_file(ref.label_path) if ref.label_path else []
|
||
items.append(
|
||
{
|
||
"id": ref.id,
|
||
"batch": ref.batch,
|
||
"location": ref.location,
|
||
"split": ref.split,
|
||
"filename": ref.image_path.name,
|
||
"has_label": ref.label_path is not None and ref.label_path.is_file(),
|
||
"box_count": len(anns),
|
||
"missing_label": ref.label_path is None or not ref.label_path.is_file(),
|
||
}
|
||
)
|
||
return {"total": total, "offset": offset, "limit": limit, "items": items}
|
||
|
||
|
||
def find_image_ref(scope: dict[str, Any], image_id: str) -> ImageRef | None:
|
||
"""线性查找;审核场景批次有限,可接受。"""
|
||
batches = scope.get("batches") or []
|
||
for b in batches:
|
||
batch_dir = Path(b["path"])
|
||
refs = collect_batch_images(batch_dir, batch=b.get("batch", batch_dir.name), location=b.get("location", ""))
|
||
for ref in refs:
|
||
if ref.id == image_id:
|
||
return ref
|
||
return None
|
||
|
||
|
||
def image_to_item(ref: ImageRef) -> dict[str, Any]:
|
||
anns = parse_label_file(ref.label_path) if ref.label_path else []
|
||
return {
|
||
"id": ref.id,
|
||
"batch": ref.batch,
|
||
"location": ref.location,
|
||
"split": ref.split,
|
||
"filename": ref.image_path.name,
|
||
"has_label": ref.label_path is not None and ref.label_path.is_file(),
|
||
"box_count": len(anns),
|
||
"missing_label": ref.label_path is None or not ref.label_path.is_file(),
|
||
"annotations": [
|
||
{
|
||
"class_id": a["class_id"],
|
||
"class_name": None,
|
||
"bbox": a["bbox"],
|
||
"keypoints": a.get("keypoints") or [],
|
||
}
|
||
for a in anns
|
||
],
|
||
}
|