Files
HSAP/datasets/lane.embedded.bak/scripts/rename_ufld_dataset.py
Chengfang Lu e72bc061c5 feat: HSAP platform v2 — modular navigation, quality review, audit log, world model simulation
Major changes:
- New frontend (platform/web/): Vite + React 18 + TypeScript + Tailwind
- 4-module navigation: 数据送标 / 模型管理 / 车队管理 / 系统管理
- Data catalog with charts (DMS/ADAS/Lane 3-tab view)
- Quality review workflow (标注质检): Good/Fine/Bad scoring with auto-advance
- Audit enhancements: batch operations, rejection categories, Feishu notifications
- Operation audit log (操作日志)
- World model simulation studio (仿真工坊)
- Dataset version management with snapshots and diff
- ADAS 7-class dataset integration (138K images organized + compressed)
- User management with Feishu integration and pagination
- CRUD/search/filter on all pages, card layout redesign
- PIL-optimized image overlay rendering
- Auto-snapshot on build, in_review workflow stage
- Removed embedded algorithm code (now in workspace)
2026-06-03 11:40:21 +08:00

326 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Rename lane0_copy/UFLD assets to a clearer layout and refresh index files.
Conventions
-----------
- Top-level sources: src_<type>_<device>_<YYYYMMDD> (seg_label/ mirrors the tree)
- Clips: clip_XX, scene_XX, unit_XX, driver_XXX_30fps, video_<id>
- Frames: frame_XXXXXX.jpg / .png (strip legacy _new suffix)
- Camera frames: frame_cam_<id>, frame_ts_<timestamp>
Usage:
python3 rename_ufld_dataset.py --dry-run
python3 rename_ufld_dataset.py --apply
"""
from __future__ import annotations
import argparse
import json
import os
import re
import shutil
from collections import defaultdict
from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
UFLD_ROOT = SCRIPT_DIR.parent / "UFLD"
TOP_LEVEL_MAP: dict[str, str] = {
"100HF": "src_freeway_100hf_day",
"60HF_night": "src_freeway_60hf_night",
"crv_lane": "src_vehicle_crv_lane",
"culane_data": "src_culane",
"dvr_0422_zxc": "src_dvr_zxc_20250422",
"dvr_0424_zxc": "src_dvr_zxc_20250424",
"dvr_0425_buick": "src_dvr_buick_20250425",
"dvr_0503_buick": "src_dvr_buick_20250503",
"jiqing_highway": "src_road_jiqing",
"pic_0507_zk282": "src_cam_zk282_20250507",
"pic_0511_zk282": "src_cam_zk282_20250511",
"pic_0514_zk282": "src_cam_zk282_20250514",
"pic_0613_zk282": "src_cam_zk282_20250613",
"pic_0620_zxc": "src_cam_zxc_20250620",
"pic_0624_zxc": "src_cam_zxc_20250624",
"pic_0628_zxc": "src_cam_zxc_20250628",
"pic_1009_zk282_front30dig": "src_cam_zk282_20241009_front30deg",
"pic_1209_zk282": "src_cam_zk282_20241209",
"pic_250211_zk282": "src_cam_zk282_20250211",
"pic_250515_zk425": "src_cam_zk425_20250515",
"pic_250609_zk425": "src_cam_zk425_20250609",
"shaoyang_data": "src_road_shaoyang",
"vil": "src_vil",
}
INDEX_FILES = [
"train_val_gt.txt",
"test_gt.txt",
"test.txt",
"test.json",
"train_val.json",
"test_label.json",
]
SKIP_BASENAMES = {
"train_val_gt.txt",
"test_gt.txt",
"test.txt",
"test.json",
"train_val.json",
"test_label.json",
}
def transform_dir_component(name: str) -> str:
if name in TOP_LEVEL_MAP:
return TOP_LEVEL_MAP[name]
m = re.match(r"^scene(\d+)$", name, re.I)
if m:
return f"scene_{int(m.group(1)):02d}"
m = re.match(r"^dvr_(\d+)$", name, re.I)
if m:
return f"unit_{int(m.group(1)):02d}"
m = re.match(r"^(\d+)$", name)
if m:
n = int(m.group(1))
return f"clip_{n:02d}" if n < 1000 else f"clip_{n}"
m = re.match(r"^driver_(\d+)_30frame$", name, re.I)
if m:
return f"driver_{int(m.group(1)):03d}_30fps"
if name.upper().endswith(".MP4"):
return "video_" + name[: -len(".MP4")]
m = re.match(r"^(\d+)_Road(\d+)_Trim(\d+)_frames$", name, re.I)
if m:
return f"road_{m.group(2)}_trim_{int(m.group(3)):03d}_seq_{int(m.group(1)):02d}"
if name == "image_curve":
return "curve"
if re.match(r"^highway_\d+$", name):
return "highway"
m = re.match(r"^img_(\d+)_(\d+)_batch(\d+)$", name, re.I)
if m:
return f"batch_{int(m.group(3)):02d}_stream{int(m.group(2))}"
m = re.match(r"^pic_(\d+)_([a-z]+)_batch(\d+)$", name, re.I)
if m:
return f"batch_{int(m.group(3)):02d}_{m.group(2)}"
m = re.search(r"batch(\d+)", name, re.I)
if m and ("batch" in name.lower()):
return f"batch_{int(m.group(1)):02d}"
return name
def transform_filename(name: str) -> str:
if name in SKIP_BASENAMES:
return name
base, ext = os.path.splitext(name)
if ext == ".lines.txt":
stem = base
if stem.endswith("_new"):
stem = stem[: -len("_new")]
m = re.match(r"^(\d{5})$", stem)
if m:
return f"frame_{m.group(1)}.lines.txt"
return name
if base.endswith("_new"):
base = base[: -len("_new")]
m = re.match(r"^(\d+)$", base)
if m:
return f"frame_{int(m.group(1)):06d}{ext}"
m = re.match(r"^camera_msg_(\d+)$", base, re.I)
if m:
return f"frame_cam_{m.group(1)}{ext}"
m = re.match(r"^camera_front_6mm_(\d+)$", base, re.I)
if m:
return f"frame_cam_{m.group(1)}{ext}"
m = re.match(r"^camera_+(\d+)$", base, re.I)
if m:
return f"frame_ts_{m.group(1)}{ext}"
m = re.match(r"^frame_(\d+)_(\d+)$", base)
if m:
return f"frame_{m.group(1)}_{m.group(2)}{ext}"
m = re.match(r"^frame_(\d+)$", base, re.I)
if m:
return f"frame_{int(m.group(1)):06d}{ext}"
m = re.match(r"^(\d{5})$", base)
if m:
return f"frame_{m.group(1)}{ext}"
return f"{base}{ext}"
def transform_rel_path(rel: str) -> str:
rel = rel.lstrip("/").replace("\\", "/")
if not rel:
return rel
parts = rel.split("/")
out: list[str] = []
i = 0
if parts[0] == "seg_label":
out.append("seg_label")
i = 1
if i < len(parts):
out.append(transform_dir_component(parts[i]))
i += 1
while i < len(parts):
comp = parts[i]
if i == len(parts) - 1:
out.append(transform_filename(comp))
else:
out.append(transform_dir_component(comp))
i += 1
return "/".join(out)
def collect_file_mappings(root: Path) -> dict[str, str]:
mapping: dict[str, str] = {}
for dirpath, _, files in os.walk(root):
rel_dir = os.path.relpath(dirpath, root)
if rel_dir == ".":
rel_dir = ""
for fn in files:
if fn in SKIP_BASENAMES:
continue
old_rel = f"{rel_dir}/{fn}" if rel_dir else fn
old_rel = old_rel.replace("\\", "/")
new_rel = transform_rel_path(old_rel)
if new_rel != old_rel:
mapping[old_rel] = new_rel
return mapping
def apply_renames(root: Path, mapping: dict[str, str], dry_run: bool) -> tuple[int, int]:
ok = 0
err = 0
# longest old paths first so nested dirs still resolve
for old_rel in sorted(mapping.keys(), key=lambda p: (-p.count("/"), p)):
new_rel = mapping[old_rel]
old_abs = root / old_rel
new_abs = root / new_rel
if not old_abs.is_file():
continue
if new_abs.exists() and new_abs.resolve() != old_abs.resolve():
print(f"COLLISION: {old_rel} -> {new_rel} (target exists)")
err += 1
continue
if dry_run:
ok += 1
continue
new_abs.parent.mkdir(parents=True, exist_ok=True)
os.rename(old_abs, new_abs)
ok += 1
return ok, err
def prune_empty_dirs(root: Path, dry_run: bool) -> int:
removed = 0
for dirpath, dirs, files in os.walk(root, topdown=False):
if not dirs and not files:
p = Path(dirpath)
if p == root:
continue
if dry_run:
removed += 1
else:
try:
p.rmdir()
removed += 1
except OSError:
pass
return removed
def replace_in_line(line: str, mapping: dict[str, str]) -> str:
out = line
# Replace longest paths first
for old, new in sorted(mapping.items(), key=lambda kv: -len(kv[0])):
old_slash = "/" + old
new_slash = "/" + new
out = out.replace(old_slash, new_slash)
if out.startswith(old + " ") or out.startswith(old + "\t"):
out = new + out[len(old) :]
if out == old or out.startswith(old + "\n"):
out = new + out[len(old) :]
return out
def update_index_files(root: Path, mapping: dict[str, str], dry_run: bool) -> None:
slash_map = {"/" + k: "/" + v for k, v in mapping.items()}
slash_map.update(mapping)
for name in INDEX_FILES:
path = root / name
if not path.is_file():
continue
if name.endswith(".json"):
text = path.read_text(encoding="utf-8", errors="replace")
if dry_run:
continue
backup = path.with_suffix(path.suffix + ".bak")
if not backup.exists():
shutil.copy2(path, backup)
new_text = replace_in_line(text, slash_map)
path.write_text(new_text, encoding="utf-8")
else:
lines = path.read_text(encoding="utf-8", errors="replace").splitlines(keepends=True)
new_lines = [replace_in_line(ln, slash_map) for ln in lines]
if dry_run:
continue
backup = path.with_suffix(path.suffix + ".bak")
if not backup.exists():
shutil.copy2(path, backup)
path.write_text("".join(new_lines), encoding="utf-8")
def check_collisions(mapping: dict[str, str]) -> list[str]:
rev: dict[str, list[str]] = defaultdict(list)
for old, new in mapping.items():
rev[new].append(old)
return [f"{new} <= {olds}" for new, olds in rev.items() if len(olds) > 1]
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--root", type=Path, default=UFLD_ROOT)
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--apply", action="store_true")
args = ap.parse_args()
if not args.dry_run and not args.apply:
args.dry_run = True
root = args.root.resolve()
print(f"Root: {root}")
mapping = collect_file_mappings(root)
print(f"File path mappings: {len(mapping)}")
collisions = check_collisions(mapping)
if collisions:
print(f"WARNING: {len(collisions)} target collisions (showing 20)")
for c in collisions[:20]:
print(" ", c)
if not args.dry_run:
raise SystemExit("Abort: fix collisions before apply")
ok, err = apply_renames(root, mapping, dry_run=args.dry_run)
print(f"Renames: ok={ok} err={err} dry_run={args.dry_run}")
if args.apply:
empty = prune_empty_dirs(root, dry_run=False)
print(f"Removed {empty} empty directories")
update_index_files(root, mapping, dry_run=False)
meta = {
"root": str(root),
"files_renamed": ok,
"mapping_count": len(mapping),
"top_level_map": TOP_LEVEL_MAP,
}
(root / "rename_manifest.json").write_text(
json.dumps({"meta": meta, "sample": dict(list(mapping.items())[:50])}, indent=2, ensure_ascii=False),
encoding="utf-8",
)
print("Updated index files (backups: *.bak)")
else:
samples = list(mapping.items())[:8]
for a, b in samples:
print(f" {a}\n -> {b}")
if __name__ == "__main__":
main()