2026-05-25 16:59:59 +08:00
|
|
|
|
"""Adapter registry and auto detection for uploaded datasets."""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
from as_platform.data.ingest.base import IngestAdapter, IngestContext, NormalizedDataset
|
|
|
|
|
|
from as_platform.data.ingest.dms_coco import DmsCocoAdapter
|
2026-06-03 11:40:21 +08:00
|
|
|
|
from as_platform.data.ingest.dms_inbox_raw import DmsInboxRawAdapter
|
2026-05-25 16:59:59 +08:00
|
|
|
|
from as_platform.data.ingest.dms_yolo import DmsYoloAdapter
|
|
|
|
|
|
from as_platform.data.ingest.lane_lines import LaneLinesAdapter
|
|
|
|
|
|
from as_platform.data.ingest.lane_mask import LaneMaskAdapter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class UnknownFormatError(ValueError):
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ADAPTERS: tuple[IngestAdapter, ...] = (
|
|
|
|
|
|
DmsYoloAdapter(),
|
|
|
|
|
|
DmsCocoAdapter(),
|
2026-06-03 11:40:21 +08:00
|
|
|
|
DmsInboxRawAdapter(),
|
2026-05-25 16:59:59 +08:00
|
|
|
|
LaneMaskAdapter(),
|
|
|
|
|
|
LaneLinesAdapter(),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def available_formats(project: str) -> list[str]:
|
|
|
|
|
|
return [a.format_id for a in ADAPTERS if project in a.projects]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect_adapter(ctx: IngestContext) -> IngestAdapter:
|
|
|
|
|
|
for adapter in ADAPTERS:
|
|
|
|
|
|
if ctx.project not in adapter.projects:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if adapter.can_handle(ctx):
|
|
|
|
|
|
return adapter
|
2026-06-03 11:40:21 +08:00
|
|
|
|
hint = ""
|
|
|
|
|
|
if ctx.project == "dms":
|
|
|
|
|
|
hint = (
|
|
|
|
|
|
";DMS 送标/inbox 请使用批次根目录,且至少包含 images/train/*.jpg"
|
|
|
|
|
|
"(或已标注的 images/+labels/、COCO annotations/)"
|
|
|
|
|
|
)
|
2026-05-25 16:59:59 +08:00
|
|
|
|
raise UnknownFormatError(
|
|
|
|
|
|
f"unable to detect format for project={ctx.project}, task={ctx.task}, "
|
2026-06-03 11:40:21 +08:00
|
|
|
|
f"source={ctx.source_path}. supported={available_formats(ctx.project)}{hint}"
|
2026-05-25 16:59:59 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inspect_uploaded_dataset(project: str, task: str | None, source_path: str | Path) -> NormalizedDataset:
|
|
|
|
|
|
ctx = IngestContext(project=project, task=task, source_path=Path(source_path).resolve())
|
|
|
|
|
|
if not ctx.source_path.exists():
|
|
|
|
|
|
raise FileNotFoundError(f"source path not found: {ctx.source_path}")
|
|
|
|
|
|
adapter = detect_adapter(ctx)
|
|
|
|
|
|
out = adapter.inspect(ctx)
|
|
|
|
|
|
# Ensure adapter id is always reflected in output.
|
|
|
|
|
|
out.format_id = adapter.format_id
|
|
|
|
|
|
return out
|