"""Campaign 与 pending 批次合并列表。""" from __future__ import annotations import hashlib import json import uuid from datetime import datetime, timezone from typing import Any from as_platform.config import WORKSPACE from as_platform.data.core import get_pending_report, load_wf from as_platform.db.engine import session_scope from as_platform.db.models import LabelingCampaign, LabelingExportJob, User from as_platform.jobs.queue import enqueue_job, get_job from as_platform.labeling.annotate import resolve_editor_xml, sync_campaign_config_xml from as_platform.labeling.batch_stage import ( on_labeling_export_job_succeeded, update_campaign_batch_meta_stage, ) from as_platform.labeling.scope import ( enrich_batch_labels, format_scope_key, load_dms_registry, load_labeling_registry, ) def _campaign_id(project: str, task: str, mode: str | None, batch: str, location: str) -> str: sk = format_scope_key(project, task, mode) raw = f"{sk}:{batch}:{location}" return hashlib.sha256(raw.encode()).hexdigest()[:20] def _parse_scope_key(scope_key: str) -> tuple[str, str, str | None]: parts = scope_key.split(":") if parts[0] == "lane": return "lane", parts[1] if len(parts) > 1 else "lane_v1", None if len(parts) >= 3: return "dms", parts[1], parts[2] if len(parts) == 2: return "dms", parts[1], None return "dms", parts[-1], None def _registry_fallback_batches(wf: dict, reg: dict) -> list[dict[str, Any]]: """labeling.registry 中有配置但 pending 未扫到的批次(如空 inbox)。""" from pathlib import Path from as_platform.data.batch import enrich_batch from as_platform.data.core import proj_root profiles = load_labeling_registry().get("profiles") or {} rows: list[dict[str, Any]] = [] dms_root = proj_root(wf, "dms") for _pk, prof in profiles.items(): scope_key = prof.get("scope_key") or "" project, task, mode = _parse_scope_key(scope_key) if project != "dms": continue batch = mode or task batch_dir = None if mode: try: import sys scripts = WORKSPACE / "datasets" / "dms" / "scripts" if str(scripts) not in sys.path: sys.path.insert(0, str(scripts)) from task_registry import inbox_dir, resolve_task_id task_r, mode_r = resolve_task_id(task, mode) batch_dir = inbox_dir(dms_root, task_r, mode_r, reg) except Exception: batch_dir = dms_root / "inbox" / task / mode else: batch_dir = dms_root / "inbox" / task / batch if not isinstance(batch_dir, Path) or not batch_dir.is_dir(): row = { "project": project, "task": task, "mode": mode, "batch": batch, "stage": "raw_pool", "location": "inbox", "path": str(batch_dir) if batch_dir else "", "counts": {"images": 0, "labels": 0}, "registry_only": True, } else: row = enrich_batch( batch_dir, project=project, task=task, pack=None, batch=batch, location="inbox", ) row["mode"] = mode row["scope_key"] = scope_key rows.append(row) return rows def list_labeling_batches( *, stage: str | None = None, offset: int = 0, limit: int = 20, ) -> dict[str, Any]: wf = load_wf() report = get_pending_report(wf) reg = load_dms_registry() items: list[dict[str, Any]] = [] seen: set[str] = set() allowed_stages = ("raw_pool", "out_for_labeling", "returned", "labeling_submitted", "in_review", "review_approved", "review_rejected") def _append(b: dict[str, Any]) -> None: if stage and b.get("stage") != stage: return if b.get("stage") not in allowed_stages: return row = enrich_batch_labels(b, reg) cid = _campaign_id( row["project"], row.get("task") or "", row.get("mode"), row["batch"], row.get("location") or "inbox" ) key = f"{cid}" if key in seen: return seen.add(key) with session_scope() as db: camp = db.get(LabelingCampaign, cid) status = camp.status if camp else "not_opened" if camp: row["assigned_to_user_id"] = camp.assigned_to_user_id row["assigned_to_name"] = camp.assigned_to_name row["campaign_id"] = cid row["campaign_status"] = status if camp and status in ("in_progress", "labeling_submitted"): try: from as_platform.labeling.progress import campaign_progress_summary row.update(campaign_progress_summary(cid)) except Exception: row.update({"total_tasks": 0, "completed_tasks": 0, "assigned_tasks": 0}) items.append(row) for b in report.get("batches", []): _append(b) for b in _registry_fallback_batches(wf, reg): _append(b) total = len(items) page = items[max(0, offset) : max(0, offset) + max(1, limit)] return { "items": page, "total": total, "offset": offset, "limit": limit, "updated_at": report.get("updated_at"), } def open_campaign( *, project: str, task: str, batch: str, mode: str | None = None, pack: str | None = None, location: str = "inbox", ) -> dict[str, Any]: cid = _campaign_id(project, task, mode, batch, location) config_xml = resolve_editor_xml(project, task, mode) now = datetime.now(timezone.utc) with session_scope() as db: camp = db.get(LabelingCampaign, cid) if not camp: camp = LabelingCampaign( id=cid, project=project, task=task, mode=mode, batch=batch, pack=pack, location=location, status="in_progress", config_xml=config_xml, created_at=now, updated_at=now, ) db.add(camp) else: camp.status = "in_progress" camp.updated_at = now sync_campaign_config_xml(camp) db.flush() out = camp.to_dict() out["config_xml"] = camp.config_xml update_campaign_batch_meta_stage(camp, "out_for_labeling") reg = load_dms_registry() if project == "dms" else None row = enrich_batch_labels(out, reg) row["stage"] = "out_for_labeling" return row def get_campaign(campaign_id: str) -> dict[str, Any] | None: with session_scope() as db: camp = db.get(LabelingCampaign, campaign_id) if not camp: return None row = camp.to_dict() row["config_xml"] = camp.config_xml reg = load_dms_registry() if row.get("project") == "dms" else None return enrich_batch_labels(row, reg) def _export_job_id() -> str: return f"lej-{uuid.uuid4().hex[:16]}" def _record_export_job(campaign_id: str, action: str, job: dict[str, Any]) -> dict[str, Any]: ej_id = _export_job_id() job_id = job.get("id") now = datetime.now(timezone.utc) with session_scope() as db: ej = LabelingExportJob( id=ej_id, campaign_id=campaign_id, action=action, job_id=job_id, status=job.get("status") or "queued", created_at=now, ) db.add(ej) out = get_export_job(ej_id) return out or {"id": ej_id, "campaign_id": campaign_id, "action": action, "job_id": job_id} def _sync_export_job_from_queue(ej: LabelingExportJob) -> None: if not ej.job_id: return job = get_job(ej.job_id) if not job: return ej.status = job.get("status") or ej.status if job.get("finished_at"): try: ej.finished_at = datetime.fromisoformat(str(job["finished_at"]).replace("Z", "+00:00")) except Exception: pass if job.get("result") is not None: ej.result_json = json.dumps(job.get("result"), ensure_ascii=False) if ej.action == "labeling_export" and ej.status in ("succeeded", "completed"): on_labeling_export_job_succeeded( {"action": "labeling_export", "params": {"campaign_id": ej.campaign_id}} ) def get_export_job(export_job_id: str) -> dict[str, Any] | None: with session_scope() as db: ej = db.get(LabelingExportJob, export_job_id) if not ej: return None _sync_export_job_from_queue(ej) db.flush() return ej.to_dict() def list_campaign_export_jobs(campaign_id: str, *, limit: int = 30) -> dict[str, Any]: with session_scope() as db: rows = ( db.query(LabelingExportJob) .filter_by(campaign_id=campaign_id) .filter(LabelingExportJob.action != "labeling_ml_predict") .order_by(LabelingExportJob.created_at.desc()) .limit(limit) .all() ) for ej in rows: _sync_export_job_from_queue(ej) db.flush() items = [ej.to_dict() for ej in rows] return {"items": items, "campaign_id": campaign_id} def list_labeling_assignees() -> dict[str, Any]: """可指派为批次负责人的用户(标注相关角色)。""" role_codes = ("labeler", "internal_labeler", "vendor_labeler", "engineer", "admin") with session_scope() as db: users = ( db.query(User) .filter(User.is_active.is_(True)) .order_by(User.name) .all() ) items = [] for u in users: codes = {r.code for r in (u.roles or [])} if codes.intersection(role_codes): items.append({"id": u.id, "name": u.name or f"user-{u.id}", "roles": sorted(codes)}) return {"items": items} def _find_batch_for_campaign_id(campaign_id: str) -> dict[str, Any] | None: """由确定性 campaign_id 反查 pending / registry 批次行。""" wf = load_wf() reg = load_dms_registry() candidates: list[dict[str, Any]] = [] report = get_pending_report(wf) candidates.extend(report.get("batches") or []) candidates.extend(_registry_fallback_batches(wf, reg)) for b in candidates: cid = _campaign_id( b.get("project") or "dms", b.get("task") or "", b.get("mode"), b.get("batch") or "", b.get("location") or "inbox", ) if cid == campaign_id: return b return None def ensure_campaign_record(campaign_id: str) -> None: """提交/导出前保证 DB 中有 LabelingCampaign(未点「进入标注」时自动创建)。""" with session_scope() as db: if db.get(LabelingCampaign, campaign_id): return batch = _find_batch_for_campaign_id(campaign_id) if not batch: raise FileNotFoundError("campaign not found") if batch.get("registry_only"): raise ValueError("该条目为任务模板占位,无真实 inbox 批次目录,请先送标入湖或从「进入标注」开启真实批次") open_campaign( project=batch.get("project") or "dms", task=batch.get("task") or "", batch=batch["batch"], mode=batch.get("mode"), pack=batch.get("pack"), location=batch.get("location") or "inbox", ) def assign_campaign(campaign_id: str, user_id: int | None) -> dict[str, Any]: now = datetime.now(timezone.utc) with session_scope() as db: camp = db.get(LabelingCampaign, campaign_id) if not camp: raise FileNotFoundError("campaign not found") if user_id is None: camp.assigned_to_user_id = None camp.assigned_to_name = None else: user = db.get(User, user_id) if not user: raise ValueError(f"用户不存在: {user_id}") camp.assigned_to_user_id = user_id camp.assigned_to_name = user.name camp.updated_at = now db.flush() out = camp.to_dict() reg = load_dms_registry() if out.get("project") == "dms" else None return enrich_batch_labels(out, reg) def submit_campaign(campaign_id: str) -> dict[str, Any]: ensure_campaign_record(campaign_id) now = datetime.now(timezone.utc) with session_scope() as db: camp = db.get(LabelingCampaign, campaign_id) if not camp: raise FileNotFoundError("campaign not found") camp.status = "in_review" camp.updated_at = now db.flush() out = camp.to_dict() update_campaign_batch_meta_stage(camp, "in_review") reg = load_dms_registry() if out.get("project") == "dms" else None row = enrich_batch_labels(out, reg) row["stage"] = "in_review" return row def trigger_labeling_export(campaign_id: str) -> dict[str, Any]: row = get_campaign(campaign_id) if not row: raise FileNotFoundError("campaign not found") job = enqueue_job( "labeling_export", { "campaign_id": campaign_id, "export_default": row.get("export_default"), "scope_key": row.get("scope_key"), "batch": row.get("batch"), }, async_run=True, ) ej = _record_export_job(campaign_id, "labeling_export", job) return {"ok": True, "job": job, "export_job": ej, "export_default": row.get("export_default")}