Files
2026-06-24 09:35:46 +08:00

589 lines
21 KiB
Python
Executable File

"""
Data parser for ground truth and detection results.
Supports both TXT and JSON formats.
TXT GT format: normalized xywh bbox + 47-dim 3D labels (47-dim label format per CLAUDE.md)
JSON GT format: absolute pixel box2d, 3d_ori=[x3d,y3d,z3d,l,h,w,rot_y,xc,yc,...], 3d_front/back/left/right=[x3d,y3d,z3d,alpha,xc,yc,score,is_visible]
TXT Det format: class_name conf x1 y1 x2 y2 coord_sys x3d y3d z3d l h w rot_y face_type
JSON Det format: type/type_name, score, box2d (absolute pixels), xyzlhwyaw=[x3d,y3d,z3d,l,h,w,rot_y], face_cls
"""
import json
import numpy as np
from ..class_config import CLASS_NAMES, CLASS_NAME_TO_ID, CLASSES_3D, FACE_3D_CLASSES
def yaw_to_radians(yaw_value, coord_system):
"""Convert parsed yaw to radians for downstream evaluation."""
yaw = float(yaw_value)
if coord_system == 'ego':
return float(np.deg2rad(yaw))
return yaw
class GroundTruthParser:
"""Parse ground truth annotation files."""
# Class ID to name mapping — imported from eval_tools/class_config.py
CLASS_NAMES = CLASS_NAMES
# 3D classes — imported from eval_tools/class_config.py
CLASSES_3D = CLASSES_3D
VALID_COORD_SYSTEMS = {"camera", "ego"}
def __init__(self, min_box_size=8, coord_system='camera'):
"""
Initialize ground truth parser.
Args:
min_box_size: float, minimum bbox width or height in pixels.
Boxes smaller than this will be filtered out.
Default is 8. Should be calculated based on ROI config:
- ROI0 (1920->704): 8 * 1920 / 704 ≈ 21.8
- ROI1 (704->704): 8 * 704 / 704 = 8
"""
self.min_box_size = min_box_size
if coord_system not in self.VALID_COORD_SYSTEMS:
raise ValueError(f"Unsupported coord_system: {coord_system}")
self.coord_system = coord_system
def parse_line(self, line, img_width, img_height):
"""
Parse a single line of ground truth annotation.
Args:
line: str, annotation line
img_width: int, image width
img_height: int, image height
Returns:
dict with keys:
- label: int
- bbox_2d: [x1, y1, x2, y2] in pixel coordinates
- has_3d: bool
- 3d_info: dict or None
"""
values = [float(x) for x in line.strip().split()]
if len(values) < 6:
return None
label = int(values[0])
# Parse 2D bbox (normalized center + width/height to pixel corners)
x_center_norm, y_center_norm = values[1], values[2]
w_norm, h_norm = values[3], values[4]
x_center_px = x_center_norm * img_width
y_center_px = y_center_norm * img_height
w_px = w_norm * img_width
h_px = h_norm * img_height
x1 = x_center_px - w_px / 2
y1 = y_center_px - h_px / 2
x2 = x_center_px + w_px / 2
y2 = y_center_px + h_px / 2
bbox_2d = [x1, y1, x2, y2]
# Filter out small objects based on configured minimum size
bbox_width = x2 - x1
bbox_height = y2 - y1
if bbox_width < self.min_box_size or bbox_height < self.min_box_size:
return None
# Check if has 3D annotation
has_3d = self.is_3d_annotated(values)
result = {
'label': label,
'bbox_2d': bbox_2d,
'has_3d': has_3d,
'3d_info': None
}
if has_3d:
result['3d_info'] = self._parse_3d_info(values, label)
return result
def is_3d_annotated(self, values):
"""Check if the annotation contains 3D information."""
if len(values) == 6 and values[5] == -1:
return False
if len(values) >= 18:
return True
return False
def _parse_3d_info(self, values, label):
"""Parse 3D information from annotation values."""
info = {
'center': [values[5], values[6], values[7]], # x3d_ori, y3d_ori, z3d_ori
'dimensions': [values[8], values[9], values[10]], # l3d, h3d, w3d
'rotation': values[11], # rot_y
'faces': None
}
# For face_3d_classes, parse face information
if label in FACE_3D_CLASSES and len(values) == 50:
info['faces'] = {
'front': values[18:26], # x3d, y3d, z3d, alpha, xc, yc, score, is_occ
'back': values[26:34],
'left': values[34:42],
'right': values[42:50]
}
return info
def get_class_name(self, label_id):
"""Get class name from label ID."""
return self.CLASS_NAMES.get(label_id, "unknown")
def _should_filter_negative_id_gt(self, entry, label):
"""
Filter JSON GT objects that should not participate in 3D-class evaluation.
Rule:
- Only applies to 3D classes: vehicle, pedestrian, bicycle, rider
- If GT carries an `id` field and id < 0, drop this GT entirely
"""
if label not in self.CLASSES_3D:
return False
object_id = entry.get('id')
if object_id is None:
return False
try:
return int(object_id) < 0
except (ValueError, TypeError):
return False
def parse_gt_json_entry(self, entry, img_width, img_height):
"""
Parse a single entry from a GT JSON file.
GT JSON entry format:
{
"type": "0", # class id string
"type_name": "vehicle",
"roi_id": "1",
"box2d": ["x1","y1","x2","y2"], # absolute pixel coords
"3d_ori": ["x3d","y3d","z3d","l","h","w","rot_y","xc","yc",...,"alpha","flag"],
"3d_front": ["x3d","y3d","z3d","alpha","xc","yc","score","is_visible"],
"3d_back": [...],
"3d_left": [...],
"3d_right": [...]
}
Args:
entry: dict, single GT JSON entry
img_width: int, image width (unused for JSON, bbox already in pixels)
img_height: int, image height (unused for JSON, bbox already in pixels)
Returns:
dict or None
"""
raw_type = entry.get('type')
if raw_type is None or str(raw_type).strip().lower() in ('', 'none', 'null'):
return None
try:
label = int(raw_type)
except (ValueError, TypeError):
return None
if self._should_filter_negative_id_gt(entry, label):
return None
box2d = entry.get('box2d', [])
if len(box2d) < 4:
return None
x1, y1, x2, y2 = float(box2d[0]), float(box2d[1]), float(box2d[2]), float(box2d[3])
bbox_2d = [x1, y1, x2, y2]
# Filter small objects
if (x2 - x1) < self.min_box_size or (y2 - y1) < self.min_box_size:
return None
# Check whether 3D annotation is present and valid
ori_key = '3d_ori_ego' if self.coord_system == 'ego' else '3d_ori'
if self.coord_system == 'ego' and ori_key not in entry and '3d_ori' in entry:
has_camera_3d = False
try:
has_camera_3d = len(entry['3d_ori']) >= 7 and float(entry['3d_ori'][0]) != -1
except (ValueError, TypeError):
has_camera_3d = False
if has_camera_3d:
raise ValueError(
"GT JSON is missing ego-coordinate fields (3d_ori_ego). "
"Please regenerate ground truth with ego fields before running ego-coordinate evaluation."
)
d3_ori = entry.get(ori_key)
has_3d = False
d3_info = None
if d3_ori is not None and len(d3_ori) >= 7:
# x3d is d3_ori[0]; -1 indicates no 3D annotation
try:
has_3d = float(d3_ori[0]) != -1
except (ValueError, TypeError):
has_3d = False
if has_3d:
d3_info = self._parse_3d_info_from_json(entry, label)
return {
'label': label,
'bbox_2d': bbox_2d,
'has_3d': has_3d,
'3d_info': d3_info,
'id': entry.get('id'),
}
def _parse_3d_info_from_json(self, entry, label):
"""Parse 3D information from a JSON GT entry."""
ori_key = '3d_ori_ego' if self.coord_system == 'ego' else '3d_ori'
d3_ori = entry[ori_key]
info = {
'center': [float(d3_ori[0]), float(d3_ori[1]), float(d3_ori[2])], # x3d, y3d, z3d
'dimensions': [float(d3_ori[3]), float(d3_ori[4]), float(d3_ori[5])], # l, h, w
'rotation': yaw_to_radians(d3_ori[6], self.coord_system), # rot_y
'faces': None,
'coord_system': self.coord_system,
}
# Parse face information for face_3d_classes (vehicle, bus, truck, tanker, unknown)
face_keys = {'front': '3d_front', 'back': '3d_back', 'left': '3d_left', 'right': '3d_right'}
if self.coord_system == 'ego':
face_keys = {name: f"{key}_ego" for name, key in face_keys.items()}
if label in FACE_3D_CLASSES and all(k in entry for k in face_keys.values()):
info['faces'] = {}
for face_name, json_key in face_keys.items():
face_data = entry[json_key]
if len(face_data) >= 8:
info['faces'][face_name] = [float(v) for v in face_data[:8]]
else:
info['faces'][face_name] = [float(v) for v in face_data]
return info
def parse_gt_json_file(self, file_path, img_width, img_height):
"""
Parse an entire GT JSON file.
The JSON file is a dict keyed by object index ("0", "1", ...).
Returns:
list of parsed annotation dicts
"""
try:
with open(file_path, 'r') as f:
data = json.load(f)
except FileNotFoundError:
print(f"Warning: File not found: {file_path}")
return []
except json.JSONDecodeError as e:
print(f"Warning: JSON decode error in {file_path}: {e}")
return []
if data is None:
return []
if isinstance(data, dict):
items = sorted(data.items(), key=lambda item: int(item[0]) if str(item[0]).isdigit() else str(item[0]))
elif isinstance(data, list):
items = list(enumerate(data))
else:
print(f"Warning: unsupported GT JSON root type {type(data).__name__} in {file_path}")
return []
annotations = []
for key, entry in items:
if not isinstance(entry, dict):
print(f"Warning: skipping non-dict GT entry at key={key!r} in {file_path}")
continue
raw_type = entry.get('type')
if raw_type is None or str(raw_type).strip().lower() in ('', 'none', 'null'):
print(f"Warning: skipping entry with invalid type={raw_type!r} "
f"(key={key!r}) in {file_path}")
continue
parsed = self.parse_gt_json_entry(entry, img_width, img_height)
if parsed is not None:
annotations.append(parsed)
return annotations
def parse_file(self, file_path, img_width, img_height):
"""
Parse entire annotation file. Dispatches to JSON or TXT parser based on extension.
Args:
file_path: str, path to annotation file (.txt or .json)
img_width: int, image width
img_height: int, image height
Returns:
list of parsed annotations
"""
if str(file_path).endswith('.json'):
return self.parse_gt_json_file(file_path, img_width, img_height)
annotations = []
try:
with open(file_path, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue
parsed = self.parse_line(line, img_width, img_height)
if parsed is not None:
annotations.append(parsed)
except FileNotFoundError:
print(f"Warning: File not found: {file_path}")
return []
return annotations
class DetectionParser:
"""Parse detection result files."""
# Class name to ID mapping — imported from eval_tools/class_config.py
CLASS_NAME_TO_ID = CLASS_NAME_TO_ID
# 3D classes — imported from eval_tools/class_config.py
CLASSES_3D = CLASSES_3D
VALID_COORD_SYSTEMS = {"camera", "ego"}
def __init__(self, min_box_size=0, coord_system='camera'):
"""
Initialize detection parser.
Args:
min_box_size: float, minimum bbox width or height in pixels.
Detections smaller than this will be filtered out.
Should match the GT min_box_size to ensure
symmetric filtering. Default is 0 (no filtering).
"""
self.min_box_size = min_box_size
if coord_system not in self.VALID_COORD_SYSTEMS:
raise ValueError(f"Unsupported coord_system: {coord_system}")
self.coord_system = coord_system
def parse_line(self, line):
"""
Parse a single line of detection result.
Args:
line: str, detection line
Returns:
dict with keys:
- label: int
- confidence: float
- bbox_2d: [x1, y1, x2, y2] in pixel coordinates
- 3d_info: dict or None
"""
parts = line.strip().split()
if len(parts) < 6:
return None
class_name = parts[0]
label = self.map_class_name(class_name)
confidence = float(parts[1])
# 2D bbox
x1, y1, x2, y2 = float(parts[2]), float(parts[3]), float(parts[4]), float(parts[5])
bbox_2d = [x1, y1, x2, y2]
# Filter small detections
if self.min_box_size > 0 and ((x2 - x1) < self.min_box_size or (y2 - y1) < self.min_box_size):
return None
result = {
'label': label,
'confidence': confidence,
'bbox_2d': bbox_2d,
'3d_info': None
}
# Check if this is a 3D class and has 3D info
if label in self.CLASSES_3D and len(parts) >= 15:
result['3d_info'] = self._parse_3d_info(parts)
return result
def _parse_3d_info(self, parts):
"""Parse 3D information from detection parts."""
if self.coord_system == 'ego':
raise ValueError("TXT detection format does not support ego-coordinate 3D evaluation.")
# Format: label conf x1 y1 x2 y2 coord_sys x3d y3d z3d l3d h3d w3d rot_y face_type
# Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Get face_type and normalize it
face_type = parts[14] if len(parts) > 14 else 'whole'
# Normalize rear/tail to back for consistency
if face_type.lower() in ['rear', 'tail']:
face_type = 'back'
info = {
'center': [float(parts[7]), float(parts[8]), float(parts[9])], # x3d, y3d, z3d
'dimensions': [float(parts[10]), float(parts[11]), float(parts[12])], # l3d, h3d, w3d
'rotation': float(parts[13]), # rot_y
'face_type': face_type,
'coord_system': 'camera',
}
return info
def map_class_name(self, name_str):
"""Map class name string to class ID."""
return self.CLASS_NAME_TO_ID.get(name_str.lower(), -1)
def parse_det_json_entry(self, entry):
"""
Parse a single entry from a detection JSON file.
Det JSON entry format:
{
"type": "0", # class id string
"type_name": "vehicle",
"score": "0.93",
"roi_id": "0",
"box2d": ["x1","y1","x2","y2"], # absolute pixel coords
"xyzlhwyaw": ["x3d","y3d","z3d","l","h","w","rot_y"],
"face_cls": "front", # front/tail/rear/left/right/whole/none
"cut_cls": "0",
"cut_cls_name": "nocut"
}
Returns:
dict or None
"""
try:
label = int(entry['type'])
except (KeyError, ValueError, TypeError):
class_name = entry.get('type_name', '')
label = self.map_class_name(class_name)
try:
confidence = float(entry['score'])
except (KeyError, ValueError, TypeError):
confidence = 0.0
box2d = entry.get('box2d', [])
if len(box2d) < 4:
return None
x1, y1, x2, y2 = float(box2d[0]), float(box2d[1]), float(box2d[2]), float(box2d[3])
bbox_2d = [x1, y1, x2, y2]
# Filter small detections
if self.min_box_size > 0 and ((x2 - x1) < self.min_box_size or (y2 - y1) < self.min_box_size):
return None
result = {
'label': label,
'confidence': confidence,
'bbox_2d': bbox_2d,
'3d_info': None,
'id': entry.get('track_id', entry.get('id')),
'roi_id': self._normalize_roi_id(entry.get('roi_id')),
}
# Parse 3D info for 3D classes
if label in self.CLASSES_3D:
xyz_key = 'xyzlhwyaw_ego' if self.coord_system == 'ego' else 'xyzlhwyaw'
xyzlhwyaw = entry.get(xyz_key, [])
using_coord_system = self.coord_system
if len(xyzlhwyaw) < 7 and self.coord_system == 'ego':
has_camera_3d = False
camera_xyz = entry.get('xyzlhwyaw', [])
try:
has_camera_3d = len(camera_xyz) >= 7 and str(camera_xyz[0]) != '-1'
except (ValueError, TypeError):
has_camera_3d = False
if has_camera_3d:
raise ValueError(
"Detection JSON is missing ego-coordinate fields (xyzlhwyaw_ego). "
"Please export ego-coordinate detection results before running ego-coordinate evaluation."
)
if len(xyzlhwyaw) >= 7 and str(xyzlhwyaw[0]) != '-1':
face_type = entry.get('face_cls', 'whole') or 'whole'
if face_type.lower() in ('rear', 'tail'):
face_type = 'back'
result['3d_info'] = {
'center': [float(xyzlhwyaw[0]), float(xyzlhwyaw[1]), float(xyzlhwyaw[2])],
'dimensions': [float(xyzlhwyaw[3]), float(xyzlhwyaw[4]), float(xyzlhwyaw[5])],
'rotation': yaw_to_radians(xyzlhwyaw[6], using_coord_system),
'face_type': face_type,
'coord_system': using_coord_system,
}
return result
@staticmethod
def _normalize_roi_id(roi_id):
"""Normalize ROI identifiers like 'roi0'/'0' to plain numeric strings."""
if roi_id is None:
return None
roi_id_str = str(roi_id).strip().lower()
if roi_id_str.startswith('roi'):
roi_id_str = roi_id_str[3:]
return roi_id_str or None
def parse_det_json_file(self, file_path):
"""
Parse an entire detection JSON file.
The JSON file is a dict keyed by object index ("0", "1", ...).
Returns:
list of parsed detection dicts
"""
try:
with open(file_path, 'r') as f:
data = json.load(f)
except FileNotFoundError:
print(f"Warning: File not found: {file_path}")
return []
except json.JSONDecodeError as e:
print(f"Warning: JSON decode error in {file_path}: {e}")
return []
detections = []
for key in sorted(data.keys(), key=lambda k: int(k) if k.isdigit() else k):
parsed = self.parse_det_json_entry(data[key])
if parsed is not None:
detections.append(parsed)
return detections
def parse_file(self, file_path):
"""
Parse entire detection file. Dispatches to JSON or TXT parser based on extension.
Args:
file_path: str, path to detection file (.txt or .json)
Returns:
list of parsed detections
"""
if str(file_path).endswith('.json'):
return self.parse_det_json_file(file_path)
detections = []
try:
with open(file_path, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue
parsed = self.parse_line(line)
if parsed is not None:
detections.append(parsed)
except FileNotFoundError:
print(f"Warning: File not found: {file_path}")
return []
return detections