feat: initial HSAP platform
Huaxu Sentinel Active Safety Platform with embedded algorithm code, Docker Compose setup, and vendored dataset scaffolds for clone-and-run. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
88
platform/as_platform/data/ingest/dms_coco.py
Normal file
88
platform/as_platform/data/ingest/dms_coco.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""DMS COCO-format adapter."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from as_platform.data.ingest.base import IngestAdapter, IngestContext, NormalizedDataset
|
||||
|
||||
COCO_NAMES = ("instances_train.json", "instances_val.json", "instances_test.json", "annotations.json")
|
||||
|
||||
|
||||
def _read_json(path: Path) -> dict[str, Any] | None:
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
|
||||
class DmsCocoAdapter(IngestAdapter):
|
||||
format_id = "dms_coco"
|
||||
projects = ("dms",)
|
||||
|
||||
def _find_coco_files(self, root: Path) -> list[Path]:
|
||||
files: list[Path] = []
|
||||
for name in COCO_NAMES:
|
||||
p = root / "annotations" / name
|
||||
if p.is_file():
|
||||
files.append(p)
|
||||
for name in COCO_NAMES:
|
||||
p = root / name
|
||||
if p.is_file():
|
||||
files.append(p)
|
||||
return files
|
||||
|
||||
def can_handle(self, ctx: IngestContext) -> bool:
|
||||
root = ctx.source_path
|
||||
return len(self._find_coco_files(root)) > 0
|
||||
|
||||
def inspect(self, ctx: IngestContext) -> NormalizedDataset:
|
||||
root = ctx.source_path
|
||||
files = self._find_coco_files(root)
|
||||
split_counts = {"train": 0, "val": 0, "test": 0}
|
||||
ann_count = 0
|
||||
categories: set[str] = set()
|
||||
warnings: list[str] = []
|
||||
for f in files:
|
||||
data = _read_json(f)
|
||||
if not data:
|
||||
warnings.append(f"failed to parse {f.name}")
|
||||
continue
|
||||
images = data.get("images") or []
|
||||
anns = data.get("annotations") or []
|
||||
cats = data.get("categories") or []
|
||||
ann_count += len(anns)
|
||||
for c in cats:
|
||||
name = c.get("name")
|
||||
if isinstance(name, str):
|
||||
categories.add(name)
|
||||
lower = f.name.lower()
|
||||
if "train" in lower:
|
||||
split_counts["train"] += len(images)
|
||||
elif "val" in lower:
|
||||
split_counts["val"] += len(images)
|
||||
elif "test" in lower:
|
||||
split_counts["test"] += len(images)
|
||||
else:
|
||||
split_counts["train"] += len(images)
|
||||
|
||||
return NormalizedDataset(
|
||||
format_id=self.format_id,
|
||||
project=ctx.project,
|
||||
task=ctx.task,
|
||||
source_path=str(root),
|
||||
split_counts=split_counts,
|
||||
sample_count=sum(split_counts.values()),
|
||||
annotation_count=ann_count,
|
||||
artifacts=[self._artifact_name(root, f) for f in files],
|
||||
warnings=warnings,
|
||||
extra={"categories": sorted(categories)},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _artifact_name(root: Path, path: Path) -> str:
|
||||
try:
|
||||
return str(path.relative_to(root))
|
||||
except ValueError:
|
||||
return path.name
|
||||
Reference in New Issue
Block a user