将 platform + CVAT 合并为单文件 docker-compose.yml,完善 .env 与 init/dev_up 脚本; 新增 docs/DEPLOY.md 与更新 README 以支持新机器部署;含数据湖示例、车队地图、 紧凑表格 UI、ADAS det_7cls 路径与批次台账等近期改动。 Co-authored-by: Cursor <cursoragent@cursor.com>
231 lines
8.6 KiB
Python
231 lines
8.6 KiB
Python
"""扫描 inbox / 数据湖目录,与批次台账对齐。"""
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
from as_platform.data.core import load_wf, proj_root, register_batch
|
||
from as_platform.db.engine import session_scope
|
||
from as_platform.db.models import BatchDelivery, BatchIndex, User
|
||
from as_platform.deliveries.service import _new_delivery_id, _normalize_task
|
||
|
||
|
||
def _utcnow() -> datetime:
|
||
return datetime.now(timezone.utc)
|
||
|
||
|
||
def _dir_mtime_iso(path: Path) -> str | None:
|
||
try:
|
||
ts = path.stat().st_mtime
|
||
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
|
||
except OSError:
|
||
return None
|
||
|
||
|
||
def _scan_project_inbox(project: str, wf: dict | None = None) -> list[dict[str, Any]]:
|
||
from as_platform.data.batch import count_images, count_label_files, dms_has_labels
|
||
|
||
wf = wf or load_wf()
|
||
root = proj_root(wf, project)
|
||
inbox = root / "inbox"
|
||
if not inbox.is_dir():
|
||
return []
|
||
|
||
with session_scope() as db:
|
||
deliveries = {
|
||
(r.project, r.task or "", r.mode or "", r.batch_name): r
|
||
for r in db.query(BatchDelivery).filter(BatchDelivery.project == project).all()
|
||
}
|
||
indexed = {
|
||
(r.task or "", r.batch)
|
||
for r in db.query(BatchIndex).filter(
|
||
BatchIndex.project == project,
|
||
BatchIndex.archived.is_(False),
|
||
).all()
|
||
}
|
||
|
||
items: list[dict[str, Any]] = []
|
||
for task_dir in sorted(inbox.iterdir()):
|
||
if not task_dir.is_dir():
|
||
continue
|
||
for batch_dir in sorted(task_dir.iterdir()):
|
||
if not batch_dir.is_dir():
|
||
continue
|
||
task_name = task_dir.name
|
||
batch_name = batch_dir.name
|
||
img_count = count_images(batch_dir)
|
||
if not img_count and (batch_dir / "images").is_dir():
|
||
img_count = count_images(batch_dir / "images")
|
||
lbl_count = count_label_files(batch_dir / "labels") if (batch_dir / "labels").is_dir() else 0
|
||
has_labels = lbl_count > 0 or dms_has_labels(batch_dir)
|
||
stage_hint = "returned" if has_labels and lbl_count > 0 else "raw_pool"
|
||
|
||
key = (project, task_name, "", batch_name)
|
||
delivery = deliveries.get(key)
|
||
in_index = (task_name, batch_name) in indexed
|
||
|
||
items.append({
|
||
"project": project,
|
||
"task": task_name,
|
||
"mode": None,
|
||
"batch": batch_name,
|
||
"batch_name": batch_name,
|
||
"path": str(batch_dir),
|
||
"data_path": str(batch_dir),
|
||
"images": img_count,
|
||
"labels": lbl_count,
|
||
"has_labels": has_labels,
|
||
"stage_hint": stage_hint,
|
||
"source_type": "inbox_scan",
|
||
"delivery_id": delivery.id if delivery else None,
|
||
"delivery_status": delivery.status if delivery else None,
|
||
"in_ledger": delivery is not None,
|
||
"in_workbench": in_index,
|
||
"collection_start": delivery.collection_start if delivery else _dir_mtime_iso(batch_dir),
|
||
"collection_end": delivery.collection_end if delivery else None,
|
||
"created_at": delivery.created_at.isoformat() if delivery and delivery.created_at else None,
|
||
"needs_ledger": delivery is None,
|
||
"needs_workbench": not in_index,
|
||
})
|
||
return items
|
||
|
||
|
||
def scan_delivery_sources(*, projects: list[str] | None = None) -> dict[str, Any]:
|
||
"""扫描 inbox,返回与台账、工作台对齐状态。"""
|
||
projs = projects or ["dms", "adas", "lane"]
|
||
wf = load_wf()
|
||
items: list[dict[str, Any]] = []
|
||
for p in projs:
|
||
items.extend(_scan_project_inbox(p, wf))
|
||
needs_ledger = sum(1 for i in items if i.get("needs_ledger"))
|
||
needs_workbench = sum(1 for i in items if i.get("needs_workbench"))
|
||
return {
|
||
"items": items,
|
||
"count": len(items),
|
||
"needs_ledger": needs_ledger,
|
||
"needs_workbench": needs_workbench,
|
||
"scanned_at": _utcnow().isoformat(),
|
||
}
|
||
|
||
|
||
def register_scanned_to_ledger(
|
||
items: list[dict[str, Any]],
|
||
user: User,
|
||
*,
|
||
sync_workbench: bool = True,
|
||
) -> dict[str, Any]:
|
||
"""将扫描结果登记到台账;已在 inbox 的批次直接标为 in_lake 并同步工作台。"""
|
||
created = 0
|
||
updated = 0
|
||
synced = 0
|
||
out_items: list[dict[str, Any]] = []
|
||
|
||
for raw in items:
|
||
project = (raw.get("project") or "dms").strip()
|
||
task = _normalize_task(project, raw.get("task"))
|
||
mode = (raw.get("mode") or "").strip() or None
|
||
batch_name = (raw.get("batch_name") or raw.get("batch") or "").strip()
|
||
data_path = (raw.get("data_path") or raw.get("path") or "").strip()
|
||
if not batch_name or not data_path:
|
||
continue
|
||
if not Path(data_path).is_dir():
|
||
continue
|
||
|
||
stage_hint = raw.get("stage_hint") or "raw_pool"
|
||
collection_start = (raw.get("collection_start") or "").strip() or _dir_mtime_iso(Path(data_path))
|
||
collection_end = (raw.get("collection_end") or "").strip() or None
|
||
estimated = raw.get("images")
|
||
if estimated is None:
|
||
estimated = raw.get("estimated_count")
|
||
|
||
with session_scope() as db:
|
||
rec = (
|
||
db.query(BatchDelivery)
|
||
.filter_by(project=project, task=task, mode=mode, batch_name=batch_name)
|
||
.first()
|
||
)
|
||
if not rec:
|
||
rec = BatchDelivery(
|
||
id=_new_delivery_id(),
|
||
project=project,
|
||
task=task,
|
||
mode=mode,
|
||
batch_name=batch_name,
|
||
source_type=(raw.get("source_type") or "inbox_scan"),
|
||
collection_start=collection_start,
|
||
collection_end=collection_end,
|
||
data_path=data_path,
|
||
estimated_count=int(estimated) if estimated not in (None, "") else None,
|
||
status="in_lake",
|
||
inbox_path=data_path,
|
||
owner_user_id=user.id,
|
||
owner_name=user.name,
|
||
submitted_by_user_id=user.id,
|
||
submitted_by_name=user.name,
|
||
)
|
||
db.add(rec)
|
||
created += 1
|
||
else:
|
||
if rec.status in ("draft", "rejected", "ingest_failed"):
|
||
rec.status = "in_lake"
|
||
if not rec.inbox_path:
|
||
rec.inbox_path = data_path
|
||
if not rec.data_path:
|
||
rec.data_path = data_path
|
||
if collection_start and not rec.collection_start:
|
||
rec.collection_start = collection_start
|
||
if estimated not in (None, "") and not rec.estimated_count:
|
||
rec.estimated_count = int(estimated)
|
||
if not rec.source_type:
|
||
rec.source_type = "inbox_scan"
|
||
rec.updated_at = _utcnow()
|
||
updated += 1
|
||
db.flush()
|
||
out_items.append(rec.to_dict())
|
||
|
||
if sync_workbench and stage_hint in ("raw_pool", "returned"):
|
||
try:
|
||
register_batch(
|
||
None,
|
||
project,
|
||
task,
|
||
batch_name,
|
||
stage=stage_hint,
|
||
location="inbox",
|
||
)
|
||
synced += 1
|
||
except Exception:
|
||
pass
|
||
|
||
return {
|
||
"ok": True,
|
||
"created": created,
|
||
"updated": updated,
|
||
"synced_workbench": synced,
|
||
"items": out_items,
|
||
}
|
||
|
||
|
||
def bridge_delivery_to_workbench(delivery_id: str) -> dict[str, Any]:
|
||
"""台账 in_lake 后同步到送标工作台索引。"""
|
||
with session_scope() as db:
|
||
rec = db.get(BatchDelivery, delivery_id)
|
||
if not rec:
|
||
raise ValueError("送标申请不存在")
|
||
if rec.status != "in_lake":
|
||
raise ValueError(f"当前状态不可同步工作台: {rec.status}")
|
||
project = rec.project
|
||
task = rec.task
|
||
batch_name = rec.batch_name
|
||
inbox_path = rec.inbox_path or rec.data_path
|
||
|
||
stage = "raw_pool"
|
||
if inbox_path:
|
||
labels_dir = Path(inbox_path) / "labels"
|
||
if labels_dir.is_dir() and any(labels_dir.iterdir()):
|
||
stage = "returned"
|
||
|
||
result = register_batch(None, project, task, batch_name, stage=stage, location="inbox")
|
||
return {"ok": True, "delivery_id": delivery_id, "batch": result.get("batch")}
|