39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
|
|
"""数据整理:校验摘要写入 batch.meta。"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
from as_platform.data.batch import META_FILENAME, count_images, count_label_files, read_meta, write_meta
|
||
|
|
|
||
|
|
|
||
|
|
def organize_batch(batch_dir: Path, *, task: str | None = None) -> dict[str, Any]:
|
||
|
|
"""生成整理报告并合并进 batch.meta.yaml。"""
|
||
|
|
batch_dir = batch_dir.resolve()
|
||
|
|
if not batch_dir.is_dir():
|
||
|
|
raise FileNotFoundError(batch_dir)
|
||
|
|
|
||
|
|
images = count_images(batch_dir / "images") + count_images(batch_dir / "images" / "train")
|
||
|
|
labels = count_label_files(batch_dir / "labels") + count_label_files(batch_dir / "labels" / "train")
|
||
|
|
|
||
|
|
report: dict[str, Any] = {
|
||
|
|
"task": task,
|
||
|
|
"images": images,
|
||
|
|
"labels": labels,
|
||
|
|
"pair_ratio": round(labels / images, 3) if images else 0,
|
||
|
|
"ready_for_ingest": images > 0 and labels > 0,
|
||
|
|
"issues": [],
|
||
|
|
}
|
||
|
|
if images and not labels:
|
||
|
|
report["issues"].append("missing_labels")
|
||
|
|
if labels and not images:
|
||
|
|
report["issues"].append("missing_images")
|
||
|
|
|
||
|
|
meta = read_meta(batch_dir) or {}
|
||
|
|
meta["organize_report"] = report
|
||
|
|
meta.setdefault("counts", {})
|
||
|
|
meta["counts"]["images"] = images
|
||
|
|
meta["counts"]["labels"] = labels
|
||
|
|
write_meta(batch_dir, meta)
|
||
|
|
return report
|