839 lines
33 KiB
Python
Executable File
839 lines
33 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Decode 61-frame windows around issue frame ids for D4Q2 network-share cases."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import io
|
|
import json
|
|
import re
|
|
import sys
|
|
from collections import Counter, deque
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, Iterable, Optional
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
FILE = Path(__file__).resolve()
|
|
ROOT = FILE.parents[2]
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.append(str(ROOT))
|
|
|
|
from tools.model_inference.adapters.video_dir_inference_utils import (
|
|
get_video_frame_info,
|
|
read_video_frame_index,
|
|
)
|
|
|
|
try:
|
|
from pdcl_pyclip.decoder_struct import StructDecoder
|
|
from pdcl_pyclip.msg_camera import VideoMessage
|
|
from pdcl_pyclip.reader import ClipReader
|
|
except ImportError:
|
|
ClipReader = None
|
|
StructDecoder = None
|
|
VideoMessage = None
|
|
|
|
|
|
NETWORK_SHARE_MARKERS = ("hfs.minieye.tech", "192.168.2.122")
|
|
WINDOW_RADIUS = 30
|
|
WINDOW_SIZE = WINDOW_RADIUS * 2 + 1
|
|
DEFAULT_INPUT_JSON = ROOT / "tools" / "feishu_project" / "dongying_d4q2_zhibao_issue_list.json"
|
|
DEFAULT_DOWNLOAD_ROOT = Path("/data1/dongying/Mono3d/D4Q2/feishu_project/downloaded_issue_data")
|
|
DEFAULT_OUTPUT_ROOT = Path("/data1/dongying/Mono3d/D4Q2/feishu_project/decoded_issue_frame_windows")
|
|
FRAME_ID_CAMERA4_RE = re.compile(r"camera4\s*:\s*(\d+)", re.IGNORECASE)
|
|
FRAME_ID_ANY_CAMERA_RE = re.compile(r"(camera\d+)\s*:\s*(\d+)", re.IGNORECASE)
|
|
PURE_DIGIT_RE = re.compile(r"^\d+$")
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class TargetFrame:
|
|
camera: str
|
|
frame_id: int
|
|
raw_text: str
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class CaseSource:
|
|
issue_id: int
|
|
issue_name: str
|
|
issue_dir: Path
|
|
path_dir: Path
|
|
case_dir: Path
|
|
relative_case_dir: Path
|
|
target_frame: TargetFrame
|
|
decode_all: bool
|
|
source_mode: str
|
|
source_paths: tuple[Path, ...]
|
|
|
|
|
|
@dataclass
|
|
class CaseResult:
|
|
issue_id: int
|
|
issue_name: str
|
|
case_dir: str
|
|
relative_case_dir: str
|
|
target_camera: str
|
|
target_frame_id: int | None
|
|
decode_mode: str
|
|
source_mode: str | None
|
|
source_paths: list[str]
|
|
output_dir: str
|
|
status: str
|
|
detail: str
|
|
matched_field: str | None = None
|
|
matched_frame_idx: int | None = None
|
|
matched_topic: str | None = None
|
|
extracted_count: int = 0
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"issue_id": self.issue_id,
|
|
"issue_name": self.issue_name,
|
|
"case_dir": self.case_dir,
|
|
"relative_case_dir": self.relative_case_dir,
|
|
"target_camera": self.target_camera,
|
|
"target_frame_id": self.target_frame_id,
|
|
"decode_mode": self.decode_mode,
|
|
"source_mode": self.source_mode,
|
|
"source_paths": self.source_paths,
|
|
"output_dir": self.output_dir,
|
|
"status": self.status,
|
|
"detail": self.detail,
|
|
"matched_field": self.matched_field,
|
|
"matched_frame_idx": self.matched_frame_idx,
|
|
"matched_topic": self.matched_topic,
|
|
"extracted_count": self.extracted_count,
|
|
}
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Decode 61-frame windows around issue frame ids for downloaded D4Q2 network-share cases."
|
|
)
|
|
parser.add_argument("--input-json", default=str(DEFAULT_INPUT_JSON))
|
|
parser.add_argument("--download-root", default=str(DEFAULT_DOWNLOAD_ROOT))
|
|
parser.add_argument("--output-root", default=str(DEFAULT_OUTPUT_ROOT))
|
|
parser.add_argument("--manifest-path", default="")
|
|
parser.add_argument("--issue-id", action="append", dest="issue_ids", type=int)
|
|
parser.add_argument("--decode-all-issue-id", action="append", dest="decode_all_issue_ids", type=int)
|
|
parser.add_argument("--window-radius", type=int, default=WINDOW_RADIUS)
|
|
parser.add_argument("--jpg-quality", type=int, default=95)
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
parser.add_argument("--skip-existing", action="store_true")
|
|
return parser.parse_args()
|
|
|
|
|
|
def ensure_dir(path: Path, dry_run: bool) -> None:
|
|
if dry_run:
|
|
return
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def load_issue_items(path: Path) -> list[dict[str, Any]]:
|
|
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
return payload["items"]
|
|
|
|
|
|
def parse_target_frame(frame_text: object) -> TargetFrame | None:
|
|
text = "" if frame_text is None else str(frame_text).strip()
|
|
if not text:
|
|
return None
|
|
camera4_match = FRAME_ID_CAMERA4_RE.search(text)
|
|
if camera4_match:
|
|
return TargetFrame(camera="camera4", frame_id=int(camera4_match.group(1)), raw_text=text)
|
|
if PURE_DIGIT_RE.fullmatch(text):
|
|
return TargetFrame(camera="any", frame_id=int(text), raw_text=text)
|
|
any_camera_match = FRAME_ID_ANY_CAMERA_RE.search(text)
|
|
if any_camera_match:
|
|
return TargetFrame(camera=any_camera_match.group(1).lower(), frame_id=int(any_camera_match.group(2)), raw_text=text)
|
|
return None
|
|
|
|
|
|
def is_network_share_issue(item: dict[str, Any]) -> bool:
|
|
address = str(item.get("问题数据地址") or "")
|
|
return any(marker in address for marker in NETWORK_SHARE_MARKERS)
|
|
|
|
|
|
def find_candidate_mcaps(case_dir: Path) -> list[Path]:
|
|
root_level = sorted(
|
|
path
|
|
for path in case_dir.iterdir()
|
|
if path.is_file() and path.suffix.lower() == ".mcap" and "_PB_" not in path.name
|
|
)
|
|
if root_level:
|
|
return root_level
|
|
|
|
recursive = sorted(
|
|
path
|
|
for path in case_dir.rglob("*.mcap")
|
|
if path.is_file() and "_PB_" not in path.name
|
|
)
|
|
return recursive
|
|
|
|
|
|
def find_camera4_bin(case_dir: Path) -> Path | None:
|
|
candidates = sorted(case_dir.rglob("camera4.bin"), key=lambda p: (len(p.relative_to(case_dir).parts), str(p)))
|
|
return candidates[0] if candidates else None
|
|
|
|
|
|
def discover_case_sources(
|
|
items: list[dict[str, Any]],
|
|
download_root: Path,
|
|
issue_filter: set[int] | None,
|
|
decode_all_issue_filter: set[int],
|
|
) -> tuple[list[CaseSource], list[CaseResult]]:
|
|
discovered: list[CaseSource] = []
|
|
skipped: list[CaseResult] = []
|
|
|
|
for item in items:
|
|
issue_id = int(item["id"])
|
|
if issue_filter and issue_id not in issue_filter:
|
|
continue
|
|
if not is_network_share_issue(item):
|
|
continue
|
|
|
|
issue_name = str(item["name"])
|
|
issue_dir = download_root / f"issue_{issue_id}"
|
|
target_frame = parse_target_frame(item.get("问题发生frameid"))
|
|
if target_frame is None:
|
|
skipped.append(
|
|
CaseResult(
|
|
issue_id=issue_id,
|
|
issue_name=issue_name,
|
|
case_dir="",
|
|
relative_case_dir="",
|
|
target_camera="",
|
|
target_frame_id=None,
|
|
decode_mode="window",
|
|
source_mode=None,
|
|
source_paths=[],
|
|
output_dir="",
|
|
status="skipped_missing_frame_id",
|
|
detail=f"unparseable frame id: {item.get('问题发生frameid')!r}",
|
|
)
|
|
)
|
|
continue
|
|
|
|
if target_frame.camera not in {"camera4", "camera1", "any"}:
|
|
skipped.append(
|
|
CaseResult(
|
|
issue_id=issue_id,
|
|
issue_name=issue_name,
|
|
case_dir="",
|
|
relative_case_dir="",
|
|
target_camera=target_frame.camera,
|
|
target_frame_id=target_frame.frame_id,
|
|
decode_mode="all" if issue_id in decode_all_issue_filter else "window",
|
|
source_mode=None,
|
|
source_paths=[],
|
|
output_dir="",
|
|
status="skipped_unsupported_camera",
|
|
detail=f"unsupported camera selector in frame id: {target_frame.raw_text}",
|
|
)
|
|
)
|
|
continue
|
|
|
|
if not issue_dir.is_dir():
|
|
skipped.append(
|
|
CaseResult(
|
|
issue_id=issue_id,
|
|
issue_name=issue_name,
|
|
case_dir=str(issue_dir),
|
|
relative_case_dir=str(issue_dir.name),
|
|
target_camera=target_frame.camera,
|
|
target_frame_id=target_frame.frame_id,
|
|
decode_mode="all" if issue_id in decode_all_issue_filter else "window",
|
|
source_mode=None,
|
|
source_paths=[],
|
|
output_dir="",
|
|
status="skipped_missing_issue_dir",
|
|
detail=f"issue download directory not found: {issue_dir}",
|
|
)
|
|
)
|
|
continue
|
|
|
|
path_dirs = sorted(path for path in issue_dir.iterdir() if path.is_dir() and path.name.startswith("path_"))
|
|
if not path_dirs:
|
|
skipped.append(
|
|
CaseResult(
|
|
issue_id=issue_id,
|
|
issue_name=issue_name,
|
|
case_dir=str(issue_dir),
|
|
relative_case_dir=str(issue_dir.name),
|
|
target_camera=target_frame.camera,
|
|
target_frame_id=target_frame.frame_id,
|
|
decode_mode="all" if issue_id in decode_all_issue_filter else "window",
|
|
source_mode=None,
|
|
source_paths=[],
|
|
output_dir="",
|
|
status="skipped_no_path_cases",
|
|
detail="no path_* directories found for network-share issue",
|
|
)
|
|
)
|
|
continue
|
|
|
|
for path_dir in path_dirs:
|
|
case_dirs = sorted(path for path in path_dir.iterdir() if path.is_dir())
|
|
if not case_dirs:
|
|
skipped.append(
|
|
CaseResult(
|
|
issue_id=issue_id,
|
|
issue_name=issue_name,
|
|
case_dir=str(path_dir),
|
|
relative_case_dir=str(path_dir.relative_to(download_root)),
|
|
target_camera=target_frame.camera,
|
|
target_frame_id=target_frame.frame_id,
|
|
decode_mode="all" if issue_id in decode_all_issue_filter else "window",
|
|
source_mode=None,
|
|
source_paths=[],
|
|
output_dir="",
|
|
status="skipped_empty_path_dir",
|
|
detail="path_* directory does not contain any case subdirectory",
|
|
)
|
|
)
|
|
continue
|
|
|
|
for case_dir in case_dirs:
|
|
relative_case_dir = case_dir.relative_to(download_root)
|
|
candidate_mcaps = find_candidate_mcaps(case_dir)
|
|
if candidate_mcaps:
|
|
discovered.append(
|
|
CaseSource(
|
|
issue_id=issue_id,
|
|
issue_name=issue_name,
|
|
issue_dir=issue_dir,
|
|
path_dir=path_dir,
|
|
case_dir=case_dir,
|
|
relative_case_dir=relative_case_dir,
|
|
target_frame=target_frame,
|
|
decode_all=issue_id in decode_all_issue_filter,
|
|
source_mode="mcap_stream",
|
|
source_paths=tuple(candidate_mcaps),
|
|
)
|
|
)
|
|
continue
|
|
|
|
camera4_bin = find_camera4_bin(case_dir)
|
|
if camera4_bin is not None:
|
|
discovered.append(
|
|
CaseSource(
|
|
issue_id=issue_id,
|
|
issue_name=issue_name,
|
|
issue_dir=issue_dir,
|
|
path_dir=path_dir,
|
|
case_dir=case_dir,
|
|
relative_case_dir=relative_case_dir,
|
|
target_frame=target_frame,
|
|
decode_all=issue_id in decode_all_issue_filter,
|
|
source_mode="camera4_bin",
|
|
source_paths=(camera4_bin,),
|
|
)
|
|
)
|
|
continue
|
|
|
|
skipped.append(
|
|
CaseResult(
|
|
issue_id=issue_id,
|
|
issue_name=issue_name,
|
|
case_dir=str(case_dir),
|
|
relative_case_dir=str(relative_case_dir),
|
|
target_camera=target_frame.camera,
|
|
target_frame_id=target_frame.frame_id,
|
|
decode_mode="all" if issue_id in decode_all_issue_filter else "window",
|
|
source_mode=None,
|
|
source_paths=[],
|
|
output_dir="",
|
|
status="skipped_no_source",
|
|
detail="no non-PB .mcap or camera4.bin found under case directory",
|
|
)
|
|
)
|
|
return discovered, skipped
|
|
|
|
|
|
def _plane_to_ndarray(plane) -> np.ndarray:
|
|
stride = plane.line_size
|
|
height = plane.height
|
|
width = plane.width
|
|
array = np.frombuffer(plane, dtype=np.uint8)
|
|
if stride == width:
|
|
return array.reshape(height, width)
|
|
return array.reshape(height, stride)[:, :width]
|
|
|
|
|
|
def _h265_payload_to_bgr(payload: bytes) -> np.ndarray:
|
|
try:
|
|
import av
|
|
except ImportError as exc:
|
|
raise ImportError("PyAV is required for MCAP frame decoding.") from exc
|
|
|
|
container = av.open(io.BytesIO(payload))
|
|
for frame in container.decode(video=0):
|
|
y_plane = _plane_to_ndarray(frame.planes[0])
|
|
u_plane = _plane_to_ndarray(frame.planes[1])
|
|
v_plane = _plane_to_ndarray(frame.planes[2])
|
|
uv_plane = np.zeros((u_plane.shape[0], u_plane.shape[1] * 2), dtype=np.uint8)
|
|
uv_plane[:, 0::2] = u_plane
|
|
uv_plane[:, 1::2] = v_plane
|
|
yuv_image = np.concatenate((y_plane.copy(), uv_plane), axis=0)
|
|
return cv2.cvtColor(yuv_image, cv2.COLOR_YUV2BGR_NV12)
|
|
raise ValueError("decode failed: no video frame in payload")
|
|
|
|
|
|
def iter_mcap_frames(mcap_paths: Iterable[Path], topic_candidates: tuple[str, ...]) -> Iterable[dict[str, Any]]:
|
|
if ClipReader is None or StructDecoder is None or VideoMessage is None:
|
|
raise ImportError("pdcl_pyclip is required for MCAP extraction.")
|
|
|
|
decoder = StructDecoder()
|
|
for mcap_path in mcap_paths:
|
|
reader = ClipReader(str(mcap_path))
|
|
for schema, channel, msg in reader.iter_messages():
|
|
data = decoder.decode(schema, channel, msg)
|
|
if not isinstance(data, VideoMessage):
|
|
continue
|
|
frame_id = getattr(data, "frame_id", None)
|
|
if frame_id is None:
|
|
continue
|
|
yield {
|
|
"source_path": mcap_path,
|
|
"topic": getattr(channel, "topic", ""),
|
|
"frame_id": int(frame_id),
|
|
"payload": data.payload,
|
|
"timestamp": str(getattr(data, "timestamp", "")),
|
|
}
|
|
|
|
|
|
def topic_matches_camera(topic: str, target_camera: str) -> bool:
|
|
normalized_topic = str(topic or "").lower()
|
|
if target_camera == "any":
|
|
return "camera" in normalized_topic
|
|
return target_camera in normalized_topic
|
|
|
|
|
|
def collect_mcap_window(
|
|
mcap_paths: tuple[Path, ...],
|
|
target_camera: str,
|
|
target_frame_id: int,
|
|
window_radius: int,
|
|
) -> tuple[list[dict[str, Any]], str, str]:
|
|
buffer_before: deque[dict[str, Any]] = deque(maxlen=window_radius)
|
|
selected: list[dict[str, Any]] = []
|
|
trailing_needed = window_radius
|
|
found = False
|
|
matched_topic = ""
|
|
|
|
for frame in iter_mcap_frames(mcap_paths, tuple()):
|
|
if not topic_matches_camera(frame["topic"], target_camera):
|
|
continue
|
|
if not found:
|
|
if frame["frame_id"] == target_frame_id:
|
|
selected = list(buffer_before) + [frame]
|
|
found = True
|
|
matched_topic = str(frame["topic"])
|
|
else:
|
|
buffer_before.append(frame)
|
|
else:
|
|
selected.append(frame)
|
|
trailing_needed -= 1
|
|
if trailing_needed <= 0:
|
|
break
|
|
|
|
if not found:
|
|
raise FileNotFoundError(f"target frame_id {target_frame_id} not found in MCAP sources for camera selector {target_camera}")
|
|
|
|
return selected, "frame_id", matched_topic
|
|
|
|
|
|
def collect_mcap_all_frames(
|
|
mcap_paths: tuple[Path, ...],
|
|
target_camera: str,
|
|
) -> tuple[list[dict[str, Any]], str]:
|
|
selected: list[dict[str, Any]] = []
|
|
matched_topic = ""
|
|
for frame in iter_mcap_frames(mcap_paths, tuple()):
|
|
if not topic_matches_camera(frame["topic"], target_camera):
|
|
continue
|
|
if not matched_topic:
|
|
matched_topic = str(frame["topic"])
|
|
selected.append(frame)
|
|
|
|
if not selected:
|
|
raise FileNotFoundError(f"no MCAP frames found for camera selector {target_camera}")
|
|
|
|
return selected, matched_topic
|
|
|
|
|
|
def find_video_frame_match(index_payload: Optional[dict[str, Any]], target_frame_id: int) -> tuple[int | None, str | None]:
|
|
if not index_payload:
|
|
return None, None
|
|
|
|
fields = index_payload.get("fields", {}) or {}
|
|
index_list = index_payload.get("index", []) or []
|
|
for field_name in ("frame_id", "cve_frame_id"):
|
|
if field_name not in fields:
|
|
continue
|
|
for frame_idx in range(len(index_list)):
|
|
info = get_video_frame_info(index_payload, frame_idx)
|
|
if info is None:
|
|
continue
|
|
value = info.get(field_name)
|
|
try:
|
|
if int(value) == target_frame_id:
|
|
return frame_idx, field_name
|
|
except (TypeError, ValueError):
|
|
continue
|
|
return None, None
|
|
|
|
|
|
def collect_video_window(
|
|
video_path: Path,
|
|
target_frame_id: int,
|
|
window_radius: int,
|
|
) -> tuple[list[dict[str, Any]], str, int]:
|
|
index_payload = read_video_frame_index(video_path)
|
|
matched_frame_idx, matched_field = find_video_frame_match(index_payload, target_frame_id)
|
|
if matched_frame_idx is None or matched_field is None:
|
|
raise FileNotFoundError(f"target frame_id {target_frame_id} not found in video index")
|
|
|
|
start_idx = max(0, matched_frame_idx - window_radius)
|
|
end_idx = matched_frame_idx + window_radius
|
|
|
|
cap = cv2.VideoCapture(str(video_path))
|
|
if not cap.isOpened():
|
|
raise RuntimeError(f"failed to open video file: {video_path}")
|
|
|
|
selected: list[dict[str, Any]] = []
|
|
try:
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, start_idx)
|
|
current_idx = start_idx
|
|
while current_idx <= end_idx:
|
|
ret, frame = cap.read()
|
|
if not ret:
|
|
break
|
|
frame_info = get_video_frame_info(index_payload, current_idx) if index_payload else None
|
|
selected.append(
|
|
{
|
|
"source_path": video_path,
|
|
"frame_idx": current_idx,
|
|
"frame_id": None if frame_info is None else frame_info.get("frame_id"),
|
|
"cve_frame_id": None if frame_info is None else frame_info.get("cve_frame_id"),
|
|
"timestamp": "" if frame_info is None else str(frame_info.get("timestamp", "")),
|
|
"image": frame,
|
|
}
|
|
)
|
|
current_idx += 1
|
|
finally:
|
|
cap.release()
|
|
|
|
return selected, matched_field, matched_frame_idx
|
|
|
|
|
|
def collect_video_all_frames(video_path: Path) -> tuple[list[dict[str, Any]], int]:
|
|
index_payload = read_video_frame_index(video_path)
|
|
cap = cv2.VideoCapture(str(video_path))
|
|
if not cap.isOpened():
|
|
raise RuntimeError(f"failed to open video file: {video_path}")
|
|
|
|
selected: list[dict[str, Any]] = []
|
|
current_idx = 0
|
|
try:
|
|
while True:
|
|
ret, frame = cap.read()
|
|
if not ret:
|
|
break
|
|
frame_info = get_video_frame_info(index_payload, current_idx) if index_payload else None
|
|
selected.append(
|
|
{
|
|
"source_path": video_path,
|
|
"frame_idx": current_idx,
|
|
"frame_id": None if frame_info is None else frame_info.get("frame_id"),
|
|
"cve_frame_id": None if frame_info is None else frame_info.get("cve_frame_id"),
|
|
"timestamp": "" if frame_info is None else str(frame_info.get("timestamp", "")),
|
|
"image": frame,
|
|
}
|
|
)
|
|
current_idx += 1
|
|
finally:
|
|
cap.release()
|
|
|
|
if not selected:
|
|
raise FileNotFoundError(f"no readable frames found in video file: {video_path}")
|
|
return selected, len(selected)
|
|
|
|
|
|
def save_decoded_frames(
|
|
issue_id: int,
|
|
output_dir: Path,
|
|
frames: list[dict[str, Any]],
|
|
target_frame_id: int,
|
|
window_radius: int,
|
|
jpg_quality: int,
|
|
dry_run: bool,
|
|
decode_all: bool,
|
|
) -> int:
|
|
images_dir = output_dir / ("frames_all" if decode_all else "frames_window")
|
|
if dry_run:
|
|
return len(frames)
|
|
|
|
ensure_dir(images_dir, dry_run=False)
|
|
encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), int(jpg_quality)]
|
|
target_index = None if decode_all else min(window_radius, len(frames) - 1)
|
|
for index, frame in enumerate(frames):
|
|
offset = 0 if target_index is None else index - target_index
|
|
frame_id = frame.get("frame_id")
|
|
if frame_id is None:
|
|
frame_id = frame.get("cve_frame_id")
|
|
frame_id_token = "na" if frame_id is None else str(frame_id)
|
|
topic = str(frame.get("topic") or "")
|
|
camera_id_match = re.search(r"camera(\d+)", topic, re.IGNORECASE)
|
|
if camera_id_match:
|
|
camera_id_token = f"camera{camera_id_match.group(1)}"
|
|
else:
|
|
source_path = Path(str(frame.get("source_path") or ""))
|
|
camera_id_token = source_path.stem if source_path.stem else "camera"
|
|
if decode_all:
|
|
filename = f"{issue_id}_{camera_id_token}_{frame_id_token}.jpg"
|
|
else:
|
|
filename = f"{issue_id}_{camera_id_token}_{frame_id_token}.jpg"
|
|
|
|
image = frame.get("image")
|
|
if image is None:
|
|
image = _h265_payload_to_bgr(frame["payload"])
|
|
if not cv2.imwrite(str(images_dir / filename), image, encode_params):
|
|
raise IOError(f"failed to write image: {images_dir / filename}")
|
|
return len(frames)
|
|
|
|
|
|
def process_case(case: CaseSource, output_root: Path, window_radius: int, jpg_quality: int, dry_run: bool, skip_existing: bool) -> CaseResult:
|
|
output_dir = output_root / case.relative_case_dir
|
|
images_dir = output_dir / ("frames_all" if case.decode_all else "frames_window")
|
|
if skip_existing and images_dir.is_dir():
|
|
existing_images = [path for path in images_dir.iterdir() if path.is_file()]
|
|
if existing_images:
|
|
return CaseResult(
|
|
issue_id=case.issue_id,
|
|
issue_name=case.issue_name,
|
|
case_dir=str(case.case_dir),
|
|
relative_case_dir=str(case.relative_case_dir),
|
|
target_camera=case.target_frame.camera,
|
|
target_frame_id=case.target_frame.frame_id,
|
|
decode_mode="all" if case.decode_all else "window",
|
|
source_mode=case.source_mode,
|
|
source_paths=[str(path) for path in case.source_paths],
|
|
output_dir=str(output_dir),
|
|
status="skipped_existing",
|
|
detail="existing extracted frames found",
|
|
extracted_count=len(existing_images),
|
|
)
|
|
|
|
try:
|
|
if case.source_mode == "mcap_stream":
|
|
try:
|
|
if case.decode_all:
|
|
frames, matched_topic = collect_mcap_all_frames(case.source_paths, case.target_frame.camera)
|
|
matched_field = None
|
|
else:
|
|
frames, matched_field, matched_topic = collect_mcap_window(
|
|
case.source_paths,
|
|
case.target_frame.camera,
|
|
case.target_frame.frame_id,
|
|
window_radius,
|
|
)
|
|
extracted_count = save_decoded_frames(
|
|
case.issue_id, output_dir, frames, case.target_frame.frame_id, window_radius, jpg_quality, dry_run, case.decode_all
|
|
)
|
|
detail = (
|
|
f"decoded {extracted_count} frames from {len(case.source_paths)} mcap file(s)"
|
|
if case.decode_all
|
|
else f"decoded {extracted_count} frames from {len(case.source_paths)} mcap file(s)"
|
|
)
|
|
return CaseResult(
|
|
issue_id=case.issue_id,
|
|
issue_name=case.issue_name,
|
|
case_dir=str(case.case_dir),
|
|
relative_case_dir=str(case.relative_case_dir),
|
|
target_camera=case.target_frame.camera,
|
|
target_frame_id=case.target_frame.frame_id,
|
|
decode_mode="all" if case.decode_all else "window",
|
|
source_mode=case.source_mode,
|
|
source_paths=[str(path) for path in case.source_paths],
|
|
output_dir=str(output_dir),
|
|
status="planned" if dry_run else "decoded_mcap",
|
|
detail=("would " + detail) if dry_run else detail,
|
|
matched_field=matched_field,
|
|
matched_frame_idx=None,
|
|
matched_topic=matched_topic,
|
|
extracted_count=extracted_count,
|
|
)
|
|
except Exception as mcap_exc:
|
|
fallback_camera4 = find_camera4_bin(case.case_dir)
|
|
if fallback_camera4 is None or case.target_frame.camera not in {"camera4", "any"}:
|
|
raise mcap_exc
|
|
|
|
if case.decode_all:
|
|
frames, extracted_count_all = collect_video_all_frames(fallback_camera4)
|
|
matched_field = None
|
|
matched_frame_idx = None
|
|
extracted_count = save_decoded_frames(
|
|
case.issue_id, output_dir, frames, case.target_frame.frame_id, window_radius, jpg_quality, dry_run, case.decode_all
|
|
)
|
|
else:
|
|
frames, matched_field, matched_frame_idx = collect_video_window(
|
|
fallback_camera4,
|
|
case.target_frame.frame_id,
|
|
window_radius,
|
|
)
|
|
extracted_count = save_decoded_frames(
|
|
case.issue_id, output_dir, frames, case.target_frame.frame_id, window_radius, jpg_quality, dry_run, case.decode_all
|
|
)
|
|
detail = (
|
|
f"decoded {extracted_count} frames from camera4.bin fallback after mcap lookup failed: "
|
|
f"{type(mcap_exc).__name__}: {mcap_exc}"
|
|
)
|
|
return CaseResult(
|
|
issue_id=case.issue_id,
|
|
issue_name=case.issue_name,
|
|
case_dir=str(case.case_dir),
|
|
relative_case_dir=str(case.relative_case_dir),
|
|
target_camera=case.target_frame.camera,
|
|
target_frame_id=case.target_frame.frame_id,
|
|
decode_mode="all" if case.decode_all else "window",
|
|
source_mode="camera4_bin_fallback",
|
|
source_paths=[str(path) for path in case.source_paths] + [str(fallback_camera4)],
|
|
output_dir=str(output_dir),
|
|
status="planned" if dry_run else "decoded_camera4_bin_fallback",
|
|
detail=("would " + detail) if dry_run else detail,
|
|
matched_field=matched_field,
|
|
matched_frame_idx=matched_frame_idx,
|
|
matched_topic=None,
|
|
extracted_count=extracted_count,
|
|
)
|
|
|
|
if case.source_mode == "camera4_bin":
|
|
if case.decode_all:
|
|
frames, _ = collect_video_all_frames(case.source_paths[0])
|
|
matched_field, matched_frame_idx = None, None
|
|
else:
|
|
frames, matched_field, matched_frame_idx = collect_video_window(case.source_paths[0], case.target_frame.frame_id, window_radius)
|
|
extracted_count = save_decoded_frames(
|
|
case.issue_id, output_dir, frames, case.target_frame.frame_id, window_radius, jpg_quality, dry_run, case.decode_all
|
|
)
|
|
detail = f"decoded {extracted_count} frames from camera4.bin"
|
|
return CaseResult(
|
|
issue_id=case.issue_id,
|
|
issue_name=case.issue_name,
|
|
case_dir=str(case.case_dir),
|
|
relative_case_dir=str(case.relative_case_dir),
|
|
target_camera=case.target_frame.camera,
|
|
target_frame_id=case.target_frame.frame_id,
|
|
decode_mode="all" if case.decode_all else "window",
|
|
source_mode=case.source_mode,
|
|
source_paths=[str(path) for path in case.source_paths],
|
|
output_dir=str(output_dir),
|
|
status="planned" if dry_run else "decoded_camera4_bin",
|
|
detail=("would " + detail) if dry_run else detail,
|
|
matched_field=matched_field,
|
|
matched_frame_idx=matched_frame_idx,
|
|
matched_topic=None,
|
|
extracted_count=extracted_count,
|
|
)
|
|
|
|
return CaseResult(
|
|
issue_id=case.issue_id,
|
|
issue_name=case.issue_name,
|
|
case_dir=str(case.case_dir),
|
|
relative_case_dir=str(case.relative_case_dir),
|
|
target_camera=case.target_frame.camera,
|
|
target_frame_id=case.target_frame.frame_id,
|
|
decode_mode="all" if case.decode_all else "window",
|
|
source_mode=case.source_mode,
|
|
source_paths=[str(path) for path in case.source_paths],
|
|
output_dir=str(output_dir),
|
|
status="skipped_no_source",
|
|
detail=f"unsupported source mode: {case.source_mode}",
|
|
)
|
|
except Exception as exc:
|
|
return CaseResult(
|
|
issue_id=case.issue_id,
|
|
issue_name=case.issue_name,
|
|
case_dir=str(case.case_dir),
|
|
relative_case_dir=str(case.relative_case_dir),
|
|
target_camera=case.target_frame.camera,
|
|
target_frame_id=case.target_frame.frame_id,
|
|
decode_mode="all" if case.decode_all else "window",
|
|
source_mode=case.source_mode,
|
|
source_paths=[str(path) for path in case.source_paths],
|
|
output_dir=str(output_dir),
|
|
status="failed",
|
|
detail=f"{type(exc).__name__}: {exc}",
|
|
)
|
|
|
|
|
|
def build_manifest(args: argparse.Namespace, input_json: Path, download_root: Path, output_root: Path, discovered: list[CaseSource], results: list[CaseResult]) -> dict[str, Any]:
|
|
summary = Counter(result.status for result in results)
|
|
return {
|
|
"generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
|
|
"input_json": str(input_json),
|
|
"download_root": str(download_root),
|
|
"output_root": str(output_root),
|
|
"issue_filter": args.issue_ids or [],
|
|
"decode_all_issue_filter": args.decode_all_issue_ids or [],
|
|
"window_radius": args.window_radius,
|
|
"window_size": args.window_radius * 2 + 1,
|
|
"jpg_quality": args.jpg_quality,
|
|
"dry_run": args.dry_run,
|
|
"skip_existing": args.skip_existing,
|
|
"total_discovered_cases": len(discovered),
|
|
"summary": dict(summary),
|
|
"cases": [result.to_dict() for result in results],
|
|
}
|
|
|
|
|
|
def print_summary(manifest: dict[str, Any]) -> None:
|
|
print(f"input_json: {manifest['input_json']}")
|
|
print(f"download_root: {manifest['download_root']}")
|
|
print(f"output_root: {manifest['output_root']}")
|
|
print(f"window_size: {manifest['window_size']}")
|
|
print(f"dry_run: {manifest['dry_run']}")
|
|
print(f"total_discovered_cases: {manifest['total_discovered_cases']}")
|
|
for status, count in sorted(manifest["summary"].items()):
|
|
print(f"{status}: {count}")
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
input_json = Path(args.input_json).resolve()
|
|
download_root = Path(args.download_root).resolve()
|
|
output_root = Path(args.output_root).resolve()
|
|
manifest_path = (
|
|
Path(args.manifest_path).resolve()
|
|
if args.manifest_path
|
|
else output_root / "decode_manifest.json"
|
|
)
|
|
|
|
items = load_issue_items(input_json)
|
|
issue_filter = set(args.issue_ids) if args.issue_ids else None
|
|
decode_all_issue_filter = set(args.decode_all_issue_ids or [])
|
|
discovered, skipped = discover_case_sources(items, download_root, issue_filter, decode_all_issue_filter)
|
|
results = list(skipped)
|
|
for case in discovered:
|
|
results.append(process_case(case, output_root, args.window_radius, args.jpg_quality, args.dry_run, args.skip_existing))
|
|
|
|
manifest = build_manifest(args, input_json, download_root, output_root, discovered, results)
|
|
if not args.dry_run:
|
|
ensure_dir(manifest_path.parent, dry_run=False)
|
|
manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
|
|
print_summary(manifest)
|
|
if args.dry_run:
|
|
print(f"manifest (not written in dry-run): {manifest_path}")
|
|
else:
|
|
print(f"manifest: {manifest_path}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|