Files
HSAP/platform/as_platform/labeling/service.py
Chengfang Lu e72bc061c5 feat: HSAP platform v2 — modular navigation, quality review, audit log, world model simulation
Major changes:
- New frontend (platform/web/): Vite + React 18 + TypeScript + Tailwind
- 4-module navigation: 数据送标 / 模型管理 / 车队管理 / 系统管理
- Data catalog with charts (DMS/ADAS/Lane 3-tab view)
- Quality review workflow (标注质检): Good/Fine/Bad scoring with auto-advance
- Audit enhancements: batch operations, rejection categories, Feishu notifications
- Operation audit log (操作日志)
- World model simulation studio (仿真工坊)
- Dataset version management with snapshots and diff
- ADAS 7-class dataset integration (138K images organized + compressed)
- User management with Feishu integration and pagination
- CRUD/search/filter on all pages, card layout redesign
- PIL-optimized image overlay rendering
- Auto-snapshot on build, in_review workflow stage
- Removed embedded algorithm code (now in workspace)
2026-06-03 11:40:21 +08:00

402 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Campaign 与 pending 批次合并列表。"""
from __future__ import annotations
import hashlib
import json
import uuid
from datetime import datetime, timezone
from typing import Any
from as_platform.config import WORKSPACE
from as_platform.data.core import get_pending_report, load_wf
from as_platform.db.engine import session_scope
from as_platform.db.models import LabelingCampaign, LabelingExportJob, User
from as_platform.jobs.queue import enqueue_job, get_job
from as_platform.labeling.annotate import resolve_editor_xml, sync_campaign_config_xml
from as_platform.labeling.batch_stage import (
on_labeling_export_job_succeeded,
update_campaign_batch_meta_stage,
)
from as_platform.labeling.scope import (
enrich_batch_labels,
format_scope_key,
load_dms_registry,
load_labeling_registry,
)
def _campaign_id(project: str, task: str, mode: str | None, batch: str, location: str) -> str:
sk = format_scope_key(project, task, mode)
raw = f"{sk}:{batch}:{location}"
return hashlib.sha256(raw.encode()).hexdigest()[:20]
def _parse_scope_key(scope_key: str) -> tuple[str, str, str | None]:
parts = scope_key.split(":")
if parts[0] == "lane":
return "lane", parts[1] if len(parts) > 1 else "lane_v1", None
if len(parts) >= 3:
return "dms", parts[1], parts[2]
if len(parts) == 2:
return "dms", parts[1], None
return "dms", parts[-1], None
def _registry_fallback_batches(wf: dict, reg: dict) -> list[dict[str, Any]]:
"""labeling.registry 中有配置但 pending 未扫到的批次(如空 inbox"""
from pathlib import Path
from as_platform.data.batch import enrich_batch
from as_platform.data.core import proj_root
profiles = load_labeling_registry().get("profiles") or {}
rows: list[dict[str, Any]] = []
dms_root = proj_root(wf, "dms")
for _pk, prof in profiles.items():
scope_key = prof.get("scope_key") or ""
project, task, mode = _parse_scope_key(scope_key)
if project != "dms":
continue
batch = mode or task
batch_dir = None
if mode:
try:
import sys
scripts = WORKSPACE / "datasets" / "dms" / "scripts"
if str(scripts) not in sys.path:
sys.path.insert(0, str(scripts))
from task_registry import inbox_dir, resolve_task_id
task_r, mode_r = resolve_task_id(task, mode)
batch_dir = inbox_dir(dms_root, task_r, mode_r, reg)
except Exception:
batch_dir = dms_root / "inbox" / task / mode
else:
batch_dir = dms_root / "inbox" / task / batch
if not isinstance(batch_dir, Path) or not batch_dir.is_dir():
row = {
"project": project,
"task": task,
"mode": mode,
"batch": batch,
"stage": "raw_pool",
"location": "inbox",
"path": str(batch_dir) if batch_dir else "",
"counts": {"images": 0, "labels": 0},
"registry_only": True,
}
else:
row = enrich_batch(
batch_dir,
project=project,
task=task,
pack=None,
batch=batch,
location="inbox",
)
row["mode"] = mode
row["scope_key"] = scope_key
rows.append(row)
return rows
def list_labeling_batches(
*,
stage: str | None = None,
offset: int = 0,
limit: int = 20,
) -> dict[str, Any]:
wf = load_wf()
report = get_pending_report(wf)
reg = load_dms_registry()
items: list[dict[str, Any]] = []
seen: set[str] = set()
allowed_stages = ("raw_pool", "out_for_labeling", "returned", "labeling_submitted", "in_review", "review_approved", "review_rejected")
def _append(b: dict[str, Any]) -> None:
if stage and b.get("stage") != stage:
return
if b.get("stage") not in allowed_stages:
return
row = enrich_batch_labels(b, reg)
cid = _campaign_id(
row["project"], row.get("task") or "", row.get("mode"), row["batch"], row.get("location") or "inbox"
)
key = f"{cid}"
if key in seen:
return
seen.add(key)
with session_scope() as db:
camp = db.get(LabelingCampaign, cid)
status = camp.status if camp else "not_opened"
if camp:
row["assigned_to_user_id"] = camp.assigned_to_user_id
row["assigned_to_name"] = camp.assigned_to_name
row["campaign_id"] = cid
row["campaign_status"] = status
if camp and status in ("in_progress", "labeling_submitted"):
try:
from as_platform.labeling.progress import campaign_progress_summary
row.update(campaign_progress_summary(cid))
except Exception:
row.update({"total_tasks": 0, "completed_tasks": 0, "assigned_tasks": 0})
items.append(row)
for b in report.get("batches", []):
_append(b)
for b in _registry_fallback_batches(wf, reg):
_append(b)
total = len(items)
page = items[max(0, offset) : max(0, offset) + max(1, limit)]
return {
"items": page,
"total": total,
"offset": offset,
"limit": limit,
"updated_at": report.get("updated_at"),
}
def open_campaign(
*,
project: str,
task: str,
batch: str,
mode: str | None = None,
pack: str | None = None,
location: str = "inbox",
) -> dict[str, Any]:
cid = _campaign_id(project, task, mode, batch, location)
config_xml = resolve_editor_xml(project, task, mode)
now = datetime.now(timezone.utc)
with session_scope() as db:
camp = db.get(LabelingCampaign, cid)
if not camp:
camp = LabelingCampaign(
id=cid,
project=project,
task=task,
mode=mode,
batch=batch,
pack=pack,
location=location,
status="in_progress",
config_xml=config_xml,
created_at=now,
updated_at=now,
)
db.add(camp)
else:
camp.status = "in_progress"
camp.updated_at = now
sync_campaign_config_xml(camp)
db.flush()
out = camp.to_dict()
out["config_xml"] = camp.config_xml
update_campaign_batch_meta_stage(camp, "out_for_labeling")
reg = load_dms_registry() if project == "dms" else None
row = enrich_batch_labels(out, reg)
row["stage"] = "out_for_labeling"
return row
def get_campaign(campaign_id: str) -> dict[str, Any] | None:
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
return None
row = camp.to_dict()
row["config_xml"] = camp.config_xml
reg = load_dms_registry() if row.get("project") == "dms" else None
return enrich_batch_labels(row, reg)
def _export_job_id() -> str:
return f"lej-{uuid.uuid4().hex[:16]}"
def _record_export_job(campaign_id: str, action: str, job: dict[str, Any]) -> dict[str, Any]:
ej_id = _export_job_id()
job_id = job.get("id")
now = datetime.now(timezone.utc)
with session_scope() as db:
ej = LabelingExportJob(
id=ej_id,
campaign_id=campaign_id,
action=action,
job_id=job_id,
status=job.get("status") or "queued",
created_at=now,
)
db.add(ej)
out = get_export_job(ej_id)
return out or {"id": ej_id, "campaign_id": campaign_id, "action": action, "job_id": job_id}
def _sync_export_job_from_queue(ej: LabelingExportJob) -> None:
if not ej.job_id:
return
job = get_job(ej.job_id)
if not job:
return
ej.status = job.get("status") or ej.status
if job.get("finished_at"):
try:
ej.finished_at = datetime.fromisoformat(str(job["finished_at"]).replace("Z", "+00:00"))
except Exception:
pass
if job.get("result") is not None:
ej.result_json = json.dumps(job.get("result"), ensure_ascii=False)
if ej.action == "labeling_export" and ej.status in ("succeeded", "completed"):
on_labeling_export_job_succeeded(
{"action": "labeling_export", "params": {"campaign_id": ej.campaign_id}}
)
def get_export_job(export_job_id: str) -> dict[str, Any] | None:
with session_scope() as db:
ej = db.get(LabelingExportJob, export_job_id)
if not ej:
return None
_sync_export_job_from_queue(ej)
db.flush()
return ej.to_dict()
def list_campaign_export_jobs(campaign_id: str, *, limit: int = 30) -> dict[str, Any]:
with session_scope() as db:
rows = (
db.query(LabelingExportJob)
.filter_by(campaign_id=campaign_id)
.filter(LabelingExportJob.action != "labeling_ml_predict")
.order_by(LabelingExportJob.created_at.desc())
.limit(limit)
.all()
)
for ej in rows:
_sync_export_job_from_queue(ej)
db.flush()
items = [ej.to_dict() for ej in rows]
return {"items": items, "campaign_id": campaign_id}
def list_labeling_assignees() -> dict[str, Any]:
"""可指派为批次负责人的用户(标注相关角色)。"""
role_codes = ("labeler", "internal_labeler", "vendor_labeler", "engineer", "admin")
with session_scope() as db:
users = (
db.query(User)
.filter(User.is_active.is_(True))
.order_by(User.name)
.all()
)
items = []
for u in users:
codes = {r.code for r in (u.roles or [])}
if codes.intersection(role_codes):
items.append({"id": u.id, "name": u.name or f"user-{u.id}", "roles": sorted(codes)})
return {"items": items}
def _find_batch_for_campaign_id(campaign_id: str) -> dict[str, Any] | None:
"""由确定性 campaign_id 反查 pending / registry 批次行。"""
wf = load_wf()
reg = load_dms_registry()
candidates: list[dict[str, Any]] = []
report = get_pending_report(wf)
candidates.extend(report.get("batches") or [])
candidates.extend(_registry_fallback_batches(wf, reg))
for b in candidates:
cid = _campaign_id(
b.get("project") or "dms",
b.get("task") or "",
b.get("mode"),
b.get("batch") or "",
b.get("location") or "inbox",
)
if cid == campaign_id:
return b
return None
def ensure_campaign_record(campaign_id: str) -> None:
"""提交/导出前保证 DB 中有 LabelingCampaign未点「进入标注」时自动创建"""
with session_scope() as db:
if db.get(LabelingCampaign, campaign_id):
return
batch = _find_batch_for_campaign_id(campaign_id)
if not batch:
raise FileNotFoundError("campaign not found")
if batch.get("registry_only"):
raise ValueError("该条目为任务模板占位,无真实 inbox 批次目录,请先送标入湖或从「进入标注」开启真实批次")
open_campaign(
project=batch.get("project") or "dms",
task=batch.get("task") or "",
batch=batch["batch"],
mode=batch.get("mode"),
pack=batch.get("pack"),
location=batch.get("location") or "inbox",
)
def assign_campaign(campaign_id: str, user_id: int | None) -> dict[str, Any]:
now = datetime.now(timezone.utc)
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
if user_id is None:
camp.assigned_to_user_id = None
camp.assigned_to_name = None
else:
user = db.get(User, user_id)
if not user:
raise ValueError(f"用户不存在: {user_id}")
camp.assigned_to_user_id = user_id
camp.assigned_to_name = user.name
camp.updated_at = now
db.flush()
out = camp.to_dict()
reg = load_dms_registry() if out.get("project") == "dms" else None
return enrich_batch_labels(out, reg)
def submit_campaign(campaign_id: str) -> dict[str, Any]:
ensure_campaign_record(campaign_id)
now = datetime.now(timezone.utc)
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
camp.status = "in_review"
camp.updated_at = now
db.flush()
out = camp.to_dict()
update_campaign_batch_meta_stage(camp, "in_review")
reg = load_dms_registry() if out.get("project") == "dms" else None
row = enrich_batch_labels(out, reg)
row["stage"] = "in_review"
return row
def trigger_labeling_export(campaign_id: str) -> dict[str, Any]:
row = get_campaign(campaign_id)
if not row:
raise FileNotFoundError("campaign not found")
job = enqueue_job(
"labeling_export",
{
"campaign_id": campaign_id,
"export_default": row.get("export_default"),
"scope_key": row.get("scope_key"),
"batch": row.get("batch"),
},
async_run=True,
)
ej = _record_export_job(campaign_id, "labeling_export", job)
return {"ok": True, "job": job, "export_job": ej, "export_default": row.get("export_default")}