Files
HSAP/platform/as_platform/deliveries/scan.py
Chengfang Lu 483e027482 feat: 合并 Docker Compose、标注表格优化与部署文档
将 platform + CVAT 合并为单文件 docker-compose.yml,完善 .env 与 init/dev_up 脚本;
新增 docs/DEPLOY.md 与更新 README 以支持新机器部署;含数据湖示例、车队地图、
紧凑表格 UI、ADAS det_7cls 路径与批次台账等近期改动。

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-16 17:06:31 +08:00

231 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""扫描 inbox / 数据湖目录,与批次台账对齐。"""
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from as_platform.data.core import load_wf, proj_root, register_batch
from as_platform.db.engine import session_scope
from as_platform.db.models import BatchDelivery, BatchIndex, User
from as_platform.deliveries.service import _new_delivery_id, _normalize_task
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
def _dir_mtime_iso(path: Path) -> str | None:
try:
ts = path.stat().st_mtime
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
except OSError:
return None
def _scan_project_inbox(project: str, wf: dict | None = None) -> list[dict[str, Any]]:
from as_platform.data.batch import count_images, count_label_files, dms_has_labels
wf = wf or load_wf()
root = proj_root(wf, project)
inbox = root / "inbox"
if not inbox.is_dir():
return []
with session_scope() as db:
deliveries = {
(r.project, r.task or "", r.mode or "", r.batch_name): r
for r in db.query(BatchDelivery).filter(BatchDelivery.project == project).all()
}
indexed = {
(r.task or "", r.batch)
for r in db.query(BatchIndex).filter(
BatchIndex.project == project,
BatchIndex.archived.is_(False),
).all()
}
items: list[dict[str, Any]] = []
for task_dir in sorted(inbox.iterdir()):
if not task_dir.is_dir():
continue
for batch_dir in sorted(task_dir.iterdir()):
if not batch_dir.is_dir():
continue
task_name = task_dir.name
batch_name = batch_dir.name
img_count = count_images(batch_dir)
if not img_count and (batch_dir / "images").is_dir():
img_count = count_images(batch_dir / "images")
lbl_count = count_label_files(batch_dir / "labels") if (batch_dir / "labels").is_dir() else 0
has_labels = lbl_count > 0 or dms_has_labels(batch_dir)
stage_hint = "returned" if has_labels and lbl_count > 0 else "raw_pool"
key = (project, task_name, "", batch_name)
delivery = deliveries.get(key)
in_index = (task_name, batch_name) in indexed
items.append({
"project": project,
"task": task_name,
"mode": None,
"batch": batch_name,
"batch_name": batch_name,
"path": str(batch_dir),
"data_path": str(batch_dir),
"images": img_count,
"labels": lbl_count,
"has_labels": has_labels,
"stage_hint": stage_hint,
"source_type": "inbox_scan",
"delivery_id": delivery.id if delivery else None,
"delivery_status": delivery.status if delivery else None,
"in_ledger": delivery is not None,
"in_workbench": in_index,
"collection_start": delivery.collection_start if delivery else _dir_mtime_iso(batch_dir),
"collection_end": delivery.collection_end if delivery else None,
"created_at": delivery.created_at.isoformat() if delivery and delivery.created_at else None,
"needs_ledger": delivery is None,
"needs_workbench": not in_index,
})
return items
def scan_delivery_sources(*, projects: list[str] | None = None) -> dict[str, Any]:
"""扫描 inbox返回与台账、工作台对齐状态。"""
projs = projects or ["dms", "adas", "lane"]
wf = load_wf()
items: list[dict[str, Any]] = []
for p in projs:
items.extend(_scan_project_inbox(p, wf))
needs_ledger = sum(1 for i in items if i.get("needs_ledger"))
needs_workbench = sum(1 for i in items if i.get("needs_workbench"))
return {
"items": items,
"count": len(items),
"needs_ledger": needs_ledger,
"needs_workbench": needs_workbench,
"scanned_at": _utcnow().isoformat(),
}
def register_scanned_to_ledger(
items: list[dict[str, Any]],
user: User,
*,
sync_workbench: bool = True,
) -> dict[str, Any]:
"""将扫描结果登记到台账;已在 inbox 的批次直接标为 in_lake 并同步工作台。"""
created = 0
updated = 0
synced = 0
out_items: list[dict[str, Any]] = []
for raw in items:
project = (raw.get("project") or "dms").strip()
task = _normalize_task(project, raw.get("task"))
mode = (raw.get("mode") or "").strip() or None
batch_name = (raw.get("batch_name") or raw.get("batch") or "").strip()
data_path = (raw.get("data_path") or raw.get("path") or "").strip()
if not batch_name or not data_path:
continue
if not Path(data_path).is_dir():
continue
stage_hint = raw.get("stage_hint") or "raw_pool"
collection_start = (raw.get("collection_start") or "").strip() or _dir_mtime_iso(Path(data_path))
collection_end = (raw.get("collection_end") or "").strip() or None
estimated = raw.get("images")
if estimated is None:
estimated = raw.get("estimated_count")
with session_scope() as db:
rec = (
db.query(BatchDelivery)
.filter_by(project=project, task=task, mode=mode, batch_name=batch_name)
.first()
)
if not rec:
rec = BatchDelivery(
id=_new_delivery_id(),
project=project,
task=task,
mode=mode,
batch_name=batch_name,
source_type=(raw.get("source_type") or "inbox_scan"),
collection_start=collection_start,
collection_end=collection_end,
data_path=data_path,
estimated_count=int(estimated) if estimated not in (None, "") else None,
status="in_lake",
inbox_path=data_path,
owner_user_id=user.id,
owner_name=user.name,
submitted_by_user_id=user.id,
submitted_by_name=user.name,
)
db.add(rec)
created += 1
else:
if rec.status in ("draft", "rejected", "ingest_failed"):
rec.status = "in_lake"
if not rec.inbox_path:
rec.inbox_path = data_path
if not rec.data_path:
rec.data_path = data_path
if collection_start and not rec.collection_start:
rec.collection_start = collection_start
if estimated not in (None, "") and not rec.estimated_count:
rec.estimated_count = int(estimated)
if not rec.source_type:
rec.source_type = "inbox_scan"
rec.updated_at = _utcnow()
updated += 1
db.flush()
out_items.append(rec.to_dict())
if sync_workbench and stage_hint in ("raw_pool", "returned"):
try:
register_batch(
None,
project,
task,
batch_name,
stage=stage_hint,
location="inbox",
)
synced += 1
except Exception:
pass
return {
"ok": True,
"created": created,
"updated": updated,
"synced_workbench": synced,
"items": out_items,
}
def bridge_delivery_to_workbench(delivery_id: str) -> dict[str, Any]:
"""台账 in_lake 后同步到送标工作台索引。"""
with session_scope() as db:
rec = db.get(BatchDelivery, delivery_id)
if not rec:
raise ValueError("送标申请不存在")
if rec.status != "in_lake":
raise ValueError(f"当前状态不可同步工作台: {rec.status}")
project = rec.project
task = rec.task
batch_name = rec.batch_name
inbox_path = rec.inbox_path or rec.data_path
stage = "raw_pool"
if inbox_path:
labels_dir = Path(inbox_path) / "labels"
if labels_dir.is_dir() and any(labels_dir.iterdir()):
stage = "returned"
result = register_batch(None, project, task, batch_name, stage=stage, location="inbox")
return {"ok": True, "delivery_id": delivery_id, "batch": result.get("batch")}