platform/as_platform/data/ingest/dms_yolo.py

"""DMS YOLO-style dataset adapter."""
from __future__ import annotations

from pathlib import Path

from as_platform.data.ingest.base import IngestAdapter, IngestContext, NormalizedDataset

IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp", ".JPG", ".JPEG", ".PNG"}


def _count_images(path: Path) -> int:
    if not path.is_dir():
        return 0
    return sum(1 for p in path.rglob("*") if p.is_file() and p.suffix in IMG_EXTS)


def _count_txt(path: Path) -> int:
    if not path.is_dir():
        return 0
    return sum(1 for p in path.rglob("*.txt") if p.is_file())


class DmsYoloAdapter(IngestAdapter):
    format_id = "dms_yolo"
    projects = ("dms",)

    def can_handle(self, ctx: IngestContext) -> bool:
        root = ctx.source_path
        return (
            (root / "images").is_dir()
            and (root / "labels").is_dir()
        ) or (
            (root / "images" / "train").is_dir()
            and (root / "labels" / "train").is_dir()
        )

    def inspect(self, ctx: IngestContext) -> NormalizedDataset:
        root = ctx.source_path
        train_images = _count_images(root / "images" / "train")
        val_images = _count_images(root / "images" / "val")
        test_images = _count_images(root / "images" / "test")
        if train_images + val_images + test_images == 0:
            # fallback single-folder dataset
            train_images = _count_images(root / "images")
        train_labels = _count_txt(root / "labels" / "train")
        val_labels = _count_txt(root / "labels" / "val")
        test_labels = _count_txt(root / "labels" / "test")
        if train_labels + val_labels + test_labels == 0:
            train_labels = _count_txt(root / "labels")

        warnings: list[str] = []
        if train_images == 0:
            warnings.append("train split has no images")
        if train_labels == 0:
            warnings.append("train split has no labels")

        return NormalizedDataset(
            format_id=self.format_id,
            project=ctx.project,
            task=ctx.task,
            source_path=str(root),
            split_counts={"train": train_images, "val": val_images, "test": test_images},
            sample_count=train_images + val_images + test_images,
            annotation_count=train_labels + val_labels + test_labels,
            artifacts=["images/", "labels/"],
            warnings=warnings,
        )
feat: initial HSAP platform Huaxu Sentinel Active Safety Platform with embedded algorithm code, Docker Compose setup, and vendored dataset scaffolds for clone-and-run. Co-authored-by: Cursor <cursoragent@cursor.com> 2026-05-25 16:59:59 +08:00			`"""DMS YOLO-style dataset adapter."""`
			`from __future__ import annotations`

			`from pathlib import Path`

			`from as_platform.data.ingest.base import IngestAdapter, IngestContext, NormalizedDataset`

			`IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp", ".JPG", ".JPEG", ".PNG"}`


			`def _count_images(path: Path) -> int:`
			`if not path.is_dir():`
			`return 0`
			`return sum(1 for p in path.rglob("*") if p.is_file() and p.suffix in IMG_EXTS)`


			`def _count_txt(path: Path) -> int:`
			`if not path.is_dir():`
			`return 0`
			`return sum(1 for p in path.rglob("*.txt") if p.is_file())`


			`class DmsYoloAdapter(IngestAdapter):`
			`format_id = "dms_yolo"`
			`projects = ("dms",)`

			`def can_handle(self, ctx: IngestContext) -> bool:`
			`root = ctx.source_path`
			`return (`
			`(root / "images").is_dir()`
			`and (root / "labels").is_dir()`
			`) or (`
			`(root / "images" / "train").is_dir()`
			`and (root / "labels" / "train").is_dir()`
			`)`

			`def inspect(self, ctx: IngestContext) -> NormalizedDataset:`
			`root = ctx.source_path`
			`train_images = _count_images(root / "images" / "train")`
			`val_images = _count_images(root / "images" / "val")`
			`test_images = _count_images(root / "images" / "test")`
			`if train_images + val_images + test_images == 0:`
			`# fallback single-folder dataset`
			`train_images = _count_images(root / "images")`
			`train_labels = _count_txt(root / "labels" / "train")`
			`val_labels = _count_txt(root / "labels" / "val")`
			`test_labels = _count_txt(root / "labels" / "test")`
			`if train_labels + val_labels + test_labels == 0:`
			`train_labels = _count_txt(root / "labels")`

			`warnings: list[str] = []`
			`if train_images == 0:`
			`warnings.append("train split has no images")`
			`if train_labels == 0:`
			`warnings.append("train split has no labels")`

			`return NormalizedDataset(`
			`format_id=self.format_id,`
			`project=ctx.project,`
			`task=ctx.task,`
			`source_path=str(root),`
			`split_counts={"train": train_images, "val": val_images, "test": test_images},`
			`sample_count=train_images + val_images + test_images,`
			`annotation_count=train_labels + val_labels + test_labels,`
			`artifacts=["images/", "labels/"],`
			`warnings=warnings,`
			`)`