589 lines
21 KiB
Python
Executable File
589 lines
21 KiB
Python
Executable File
"""
|
|
Data parser for ground truth and detection results.
|
|
Supports both TXT and JSON formats.
|
|
|
|
TXT GT format: normalized xywh bbox + 47-dim 3D labels (47-dim label format per CLAUDE.md)
|
|
JSON GT format: absolute pixel box2d, 3d_ori=[x3d,y3d,z3d,l,h,w,rot_y,xc,yc,...], 3d_front/back/left/right=[x3d,y3d,z3d,alpha,xc,yc,score,is_visible]
|
|
|
|
TXT Det format: class_name conf x1 y1 x2 y2 coord_sys x3d y3d z3d l h w rot_y face_type
|
|
JSON Det format: type/type_name, score, box2d (absolute pixels), xyzlhwyaw=[x3d,y3d,z3d,l,h,w,rot_y], face_cls
|
|
"""
|
|
import json
|
|
import numpy as np
|
|
|
|
from ..class_config import CLASS_NAMES, CLASS_NAME_TO_ID, CLASSES_3D, FACE_3D_CLASSES
|
|
|
|
|
|
def yaw_to_radians(yaw_value, coord_system):
|
|
"""Convert parsed yaw to radians for downstream evaluation."""
|
|
yaw = float(yaw_value)
|
|
if coord_system == 'ego':
|
|
return float(np.deg2rad(yaw))
|
|
return yaw
|
|
|
|
|
|
class GroundTruthParser:
|
|
"""Parse ground truth annotation files."""
|
|
|
|
# Class ID to name mapping — imported from eval_tools/class_config.py
|
|
CLASS_NAMES = CLASS_NAMES
|
|
|
|
# 3D classes — imported from eval_tools/class_config.py
|
|
CLASSES_3D = CLASSES_3D
|
|
VALID_COORD_SYSTEMS = {"camera", "ego"}
|
|
|
|
def __init__(self, min_box_size=8, coord_system='camera'):
|
|
"""
|
|
Initialize ground truth parser.
|
|
|
|
Args:
|
|
min_box_size: float, minimum bbox width or height in pixels.
|
|
Boxes smaller than this will be filtered out.
|
|
Default is 8. Should be calculated based on ROI config:
|
|
- ROI0 (1920->704): 8 * 1920 / 704 ≈ 21.8
|
|
- ROI1 (704->704): 8 * 704 / 704 = 8
|
|
"""
|
|
self.min_box_size = min_box_size
|
|
if coord_system not in self.VALID_COORD_SYSTEMS:
|
|
raise ValueError(f"Unsupported coord_system: {coord_system}")
|
|
self.coord_system = coord_system
|
|
|
|
def parse_line(self, line, img_width, img_height):
|
|
"""
|
|
Parse a single line of ground truth annotation.
|
|
|
|
Args:
|
|
line: str, annotation line
|
|
img_width: int, image width
|
|
img_height: int, image height
|
|
|
|
Returns:
|
|
dict with keys:
|
|
- label: int
|
|
- bbox_2d: [x1, y1, x2, y2] in pixel coordinates
|
|
- has_3d: bool
|
|
- 3d_info: dict or None
|
|
"""
|
|
values = [float(x) for x in line.strip().split()]
|
|
|
|
if len(values) < 6:
|
|
return None
|
|
|
|
label = int(values[0])
|
|
|
|
# Parse 2D bbox (normalized center + width/height to pixel corners)
|
|
x_center_norm, y_center_norm = values[1], values[2]
|
|
w_norm, h_norm = values[3], values[4]
|
|
|
|
x_center_px = x_center_norm * img_width
|
|
y_center_px = y_center_norm * img_height
|
|
w_px = w_norm * img_width
|
|
h_px = h_norm * img_height
|
|
|
|
x1 = x_center_px - w_px / 2
|
|
y1 = y_center_px - h_px / 2
|
|
x2 = x_center_px + w_px / 2
|
|
y2 = y_center_px + h_px / 2
|
|
|
|
bbox_2d = [x1, y1, x2, y2]
|
|
|
|
# Filter out small objects based on configured minimum size
|
|
bbox_width = x2 - x1
|
|
bbox_height = y2 - y1
|
|
if bbox_width < self.min_box_size or bbox_height < self.min_box_size:
|
|
return None
|
|
|
|
# Check if has 3D annotation
|
|
has_3d = self.is_3d_annotated(values)
|
|
|
|
result = {
|
|
'label': label,
|
|
'bbox_2d': bbox_2d,
|
|
'has_3d': has_3d,
|
|
'3d_info': None
|
|
}
|
|
|
|
if has_3d:
|
|
result['3d_info'] = self._parse_3d_info(values, label)
|
|
|
|
return result
|
|
|
|
def is_3d_annotated(self, values):
|
|
"""Check if the annotation contains 3D information."""
|
|
if len(values) == 6 and values[5] == -1:
|
|
return False
|
|
if len(values) >= 18:
|
|
return True
|
|
return False
|
|
|
|
def _parse_3d_info(self, values, label):
|
|
"""Parse 3D information from annotation values."""
|
|
info = {
|
|
'center': [values[5], values[6], values[7]], # x3d_ori, y3d_ori, z3d_ori
|
|
'dimensions': [values[8], values[9], values[10]], # l3d, h3d, w3d
|
|
'rotation': values[11], # rot_y
|
|
'faces': None
|
|
}
|
|
|
|
# For face_3d_classes, parse face information
|
|
if label in FACE_3D_CLASSES and len(values) == 50:
|
|
info['faces'] = {
|
|
'front': values[18:26], # x3d, y3d, z3d, alpha, xc, yc, score, is_occ
|
|
'back': values[26:34],
|
|
'left': values[34:42],
|
|
'right': values[42:50]
|
|
}
|
|
|
|
return info
|
|
|
|
def get_class_name(self, label_id):
|
|
"""Get class name from label ID."""
|
|
return self.CLASS_NAMES.get(label_id, "unknown")
|
|
|
|
def _should_filter_negative_id_gt(self, entry, label):
|
|
"""
|
|
Filter JSON GT objects that should not participate in 3D-class evaluation.
|
|
|
|
Rule:
|
|
- Only applies to 3D classes: vehicle, pedestrian, bicycle, rider
|
|
- If GT carries an `id` field and id < 0, drop this GT entirely
|
|
"""
|
|
if label not in self.CLASSES_3D:
|
|
return False
|
|
|
|
object_id = entry.get('id')
|
|
if object_id is None:
|
|
return False
|
|
|
|
try:
|
|
return int(object_id) < 0
|
|
except (ValueError, TypeError):
|
|
return False
|
|
|
|
def parse_gt_json_entry(self, entry, img_width, img_height):
|
|
"""
|
|
Parse a single entry from a GT JSON file.
|
|
|
|
GT JSON entry format:
|
|
{
|
|
"type": "0", # class id string
|
|
"type_name": "vehicle",
|
|
"roi_id": "1",
|
|
"box2d": ["x1","y1","x2","y2"], # absolute pixel coords
|
|
"3d_ori": ["x3d","y3d","z3d","l","h","w","rot_y","xc","yc",...,"alpha","flag"],
|
|
"3d_front": ["x3d","y3d","z3d","alpha","xc","yc","score","is_visible"],
|
|
"3d_back": [...],
|
|
"3d_left": [...],
|
|
"3d_right": [...]
|
|
}
|
|
|
|
Args:
|
|
entry: dict, single GT JSON entry
|
|
img_width: int, image width (unused for JSON, bbox already in pixels)
|
|
img_height: int, image height (unused for JSON, bbox already in pixels)
|
|
|
|
Returns:
|
|
dict or None
|
|
"""
|
|
raw_type = entry.get('type')
|
|
if raw_type is None or str(raw_type).strip().lower() in ('', 'none', 'null'):
|
|
return None
|
|
try:
|
|
label = int(raw_type)
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
if self._should_filter_negative_id_gt(entry, label):
|
|
return None
|
|
|
|
box2d = entry.get('box2d', [])
|
|
if len(box2d) < 4:
|
|
return None
|
|
x1, y1, x2, y2 = float(box2d[0]), float(box2d[1]), float(box2d[2]), float(box2d[3])
|
|
bbox_2d = [x1, y1, x2, y2]
|
|
|
|
# Filter small objects
|
|
if (x2 - x1) < self.min_box_size or (y2 - y1) < self.min_box_size:
|
|
return None
|
|
|
|
# Check whether 3D annotation is present and valid
|
|
ori_key = '3d_ori_ego' if self.coord_system == 'ego' else '3d_ori'
|
|
if self.coord_system == 'ego' and ori_key not in entry and '3d_ori' in entry:
|
|
has_camera_3d = False
|
|
try:
|
|
has_camera_3d = len(entry['3d_ori']) >= 7 and float(entry['3d_ori'][0]) != -1
|
|
except (ValueError, TypeError):
|
|
has_camera_3d = False
|
|
if has_camera_3d:
|
|
raise ValueError(
|
|
"GT JSON is missing ego-coordinate fields (3d_ori_ego). "
|
|
"Please regenerate ground truth with ego fields before running ego-coordinate evaluation."
|
|
)
|
|
d3_ori = entry.get(ori_key)
|
|
has_3d = False
|
|
d3_info = None
|
|
if d3_ori is not None and len(d3_ori) >= 7:
|
|
# x3d is d3_ori[0]; -1 indicates no 3D annotation
|
|
try:
|
|
has_3d = float(d3_ori[0]) != -1
|
|
except (ValueError, TypeError):
|
|
has_3d = False
|
|
|
|
if has_3d:
|
|
d3_info = self._parse_3d_info_from_json(entry, label)
|
|
|
|
return {
|
|
'label': label,
|
|
'bbox_2d': bbox_2d,
|
|
'has_3d': has_3d,
|
|
'3d_info': d3_info,
|
|
'id': entry.get('id'),
|
|
}
|
|
|
|
def _parse_3d_info_from_json(self, entry, label):
|
|
"""Parse 3D information from a JSON GT entry."""
|
|
ori_key = '3d_ori_ego' if self.coord_system == 'ego' else '3d_ori'
|
|
d3_ori = entry[ori_key]
|
|
info = {
|
|
'center': [float(d3_ori[0]), float(d3_ori[1]), float(d3_ori[2])], # x3d, y3d, z3d
|
|
'dimensions': [float(d3_ori[3]), float(d3_ori[4]), float(d3_ori[5])], # l, h, w
|
|
'rotation': yaw_to_radians(d3_ori[6], self.coord_system), # rot_y
|
|
'faces': None,
|
|
'coord_system': self.coord_system,
|
|
}
|
|
|
|
# Parse face information for face_3d_classes (vehicle, bus, truck, tanker, unknown)
|
|
face_keys = {'front': '3d_front', 'back': '3d_back', 'left': '3d_left', 'right': '3d_right'}
|
|
if self.coord_system == 'ego':
|
|
face_keys = {name: f"{key}_ego" for name, key in face_keys.items()}
|
|
if label in FACE_3D_CLASSES and all(k in entry for k in face_keys.values()):
|
|
info['faces'] = {}
|
|
for face_name, json_key in face_keys.items():
|
|
face_data = entry[json_key]
|
|
if len(face_data) >= 8:
|
|
info['faces'][face_name] = [float(v) for v in face_data[:8]]
|
|
else:
|
|
info['faces'][face_name] = [float(v) for v in face_data]
|
|
|
|
return info
|
|
|
|
def parse_gt_json_file(self, file_path, img_width, img_height):
|
|
"""
|
|
Parse an entire GT JSON file.
|
|
|
|
The JSON file is a dict keyed by object index ("0", "1", ...).
|
|
|
|
Returns:
|
|
list of parsed annotation dicts
|
|
"""
|
|
try:
|
|
with open(file_path, 'r') as f:
|
|
data = json.load(f)
|
|
except FileNotFoundError:
|
|
print(f"Warning: File not found: {file_path}")
|
|
return []
|
|
except json.JSONDecodeError as e:
|
|
print(f"Warning: JSON decode error in {file_path}: {e}")
|
|
return []
|
|
|
|
if data is None:
|
|
return []
|
|
|
|
if isinstance(data, dict):
|
|
items = sorted(data.items(), key=lambda item: int(item[0]) if str(item[0]).isdigit() else str(item[0]))
|
|
elif isinstance(data, list):
|
|
items = list(enumerate(data))
|
|
else:
|
|
print(f"Warning: unsupported GT JSON root type {type(data).__name__} in {file_path}")
|
|
return []
|
|
|
|
annotations = []
|
|
for key, entry in items:
|
|
if not isinstance(entry, dict):
|
|
print(f"Warning: skipping non-dict GT entry at key={key!r} in {file_path}")
|
|
continue
|
|
raw_type = entry.get('type')
|
|
if raw_type is None or str(raw_type).strip().lower() in ('', 'none', 'null'):
|
|
print(f"Warning: skipping entry with invalid type={raw_type!r} "
|
|
f"(key={key!r}) in {file_path}")
|
|
continue
|
|
parsed = self.parse_gt_json_entry(entry, img_width, img_height)
|
|
if parsed is not None:
|
|
annotations.append(parsed)
|
|
return annotations
|
|
|
|
def parse_file(self, file_path, img_width, img_height):
|
|
"""
|
|
Parse entire annotation file. Dispatches to JSON or TXT parser based on extension.
|
|
|
|
Args:
|
|
file_path: str, path to annotation file (.txt or .json)
|
|
img_width: int, image width
|
|
img_height: int, image height
|
|
|
|
Returns:
|
|
list of parsed annotations
|
|
"""
|
|
if str(file_path).endswith('.json'):
|
|
return self.parse_gt_json_file(file_path, img_width, img_height)
|
|
|
|
annotations = []
|
|
try:
|
|
with open(file_path, 'r') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
parsed = self.parse_line(line, img_width, img_height)
|
|
if parsed is not None:
|
|
annotations.append(parsed)
|
|
except FileNotFoundError:
|
|
print(f"Warning: File not found: {file_path}")
|
|
return []
|
|
|
|
return annotations
|
|
|
|
|
|
class DetectionParser:
|
|
"""Parse detection result files."""
|
|
|
|
# Class name to ID mapping — imported from eval_tools/class_config.py
|
|
CLASS_NAME_TO_ID = CLASS_NAME_TO_ID
|
|
|
|
# 3D classes — imported from eval_tools/class_config.py
|
|
CLASSES_3D = CLASSES_3D
|
|
VALID_COORD_SYSTEMS = {"camera", "ego"}
|
|
|
|
def __init__(self, min_box_size=0, coord_system='camera'):
|
|
"""
|
|
Initialize detection parser.
|
|
|
|
Args:
|
|
min_box_size: float, minimum bbox width or height in pixels.
|
|
Detections smaller than this will be filtered out.
|
|
Should match the GT min_box_size to ensure
|
|
symmetric filtering. Default is 0 (no filtering).
|
|
"""
|
|
self.min_box_size = min_box_size
|
|
if coord_system not in self.VALID_COORD_SYSTEMS:
|
|
raise ValueError(f"Unsupported coord_system: {coord_system}")
|
|
self.coord_system = coord_system
|
|
|
|
def parse_line(self, line):
|
|
"""
|
|
Parse a single line of detection result.
|
|
|
|
Args:
|
|
line: str, detection line
|
|
|
|
Returns:
|
|
dict with keys:
|
|
- label: int
|
|
- confidence: float
|
|
- bbox_2d: [x1, y1, x2, y2] in pixel coordinates
|
|
- 3d_info: dict or None
|
|
"""
|
|
parts = line.strip().split()
|
|
|
|
if len(parts) < 6:
|
|
return None
|
|
|
|
class_name = parts[0]
|
|
label = self.map_class_name(class_name)
|
|
confidence = float(parts[1])
|
|
|
|
# 2D bbox
|
|
x1, y1, x2, y2 = float(parts[2]), float(parts[3]), float(parts[4]), float(parts[5])
|
|
bbox_2d = [x1, y1, x2, y2]
|
|
|
|
# Filter small detections
|
|
if self.min_box_size > 0 and ((x2 - x1) < self.min_box_size or (y2 - y1) < self.min_box_size):
|
|
return None
|
|
|
|
result = {
|
|
'label': label,
|
|
'confidence': confidence,
|
|
'bbox_2d': bbox_2d,
|
|
'3d_info': None
|
|
}
|
|
|
|
# Check if this is a 3D class and has 3D info
|
|
if label in self.CLASSES_3D and len(parts) >= 15:
|
|
result['3d_info'] = self._parse_3d_info(parts)
|
|
|
|
return result
|
|
|
|
def _parse_3d_info(self, parts):
|
|
"""Parse 3D information from detection parts."""
|
|
if self.coord_system == 'ego':
|
|
raise ValueError("TXT detection format does not support ego-coordinate 3D evaluation.")
|
|
# Format: label conf x1 y1 x2 y2 coord_sys x3d y3d z3d l3d h3d w3d rot_y face_type
|
|
# Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
|
|
|
|
# Get face_type and normalize it
|
|
face_type = parts[14] if len(parts) > 14 else 'whole'
|
|
# Normalize rear/tail to back for consistency
|
|
if face_type.lower() in ['rear', 'tail']:
|
|
face_type = 'back'
|
|
|
|
info = {
|
|
'center': [float(parts[7]), float(parts[8]), float(parts[9])], # x3d, y3d, z3d
|
|
'dimensions': [float(parts[10]), float(parts[11]), float(parts[12])], # l3d, h3d, w3d
|
|
'rotation': float(parts[13]), # rot_y
|
|
'face_type': face_type,
|
|
'coord_system': 'camera',
|
|
}
|
|
|
|
return info
|
|
|
|
def map_class_name(self, name_str):
|
|
"""Map class name string to class ID."""
|
|
return self.CLASS_NAME_TO_ID.get(name_str.lower(), -1)
|
|
|
|
def parse_det_json_entry(self, entry):
|
|
"""
|
|
Parse a single entry from a detection JSON file.
|
|
|
|
Det JSON entry format:
|
|
{
|
|
"type": "0", # class id string
|
|
"type_name": "vehicle",
|
|
"score": "0.93",
|
|
"roi_id": "0",
|
|
"box2d": ["x1","y1","x2","y2"], # absolute pixel coords
|
|
"xyzlhwyaw": ["x3d","y3d","z3d","l","h","w","rot_y"],
|
|
"face_cls": "front", # front/tail/rear/left/right/whole/none
|
|
"cut_cls": "0",
|
|
"cut_cls_name": "nocut"
|
|
}
|
|
|
|
Returns:
|
|
dict or None
|
|
"""
|
|
try:
|
|
label = int(entry['type'])
|
|
except (KeyError, ValueError, TypeError):
|
|
class_name = entry.get('type_name', '')
|
|
label = self.map_class_name(class_name)
|
|
|
|
try:
|
|
confidence = float(entry['score'])
|
|
except (KeyError, ValueError, TypeError):
|
|
confidence = 0.0
|
|
|
|
box2d = entry.get('box2d', [])
|
|
if len(box2d) < 4:
|
|
return None
|
|
x1, y1, x2, y2 = float(box2d[0]), float(box2d[1]), float(box2d[2]), float(box2d[3])
|
|
bbox_2d = [x1, y1, x2, y2]
|
|
|
|
# Filter small detections
|
|
if self.min_box_size > 0 and ((x2 - x1) < self.min_box_size or (y2 - y1) < self.min_box_size):
|
|
return None
|
|
|
|
result = {
|
|
'label': label,
|
|
'confidence': confidence,
|
|
'bbox_2d': bbox_2d,
|
|
'3d_info': None,
|
|
'id': entry.get('track_id', entry.get('id')),
|
|
'roi_id': self._normalize_roi_id(entry.get('roi_id')),
|
|
}
|
|
|
|
# Parse 3D info for 3D classes
|
|
if label in self.CLASSES_3D:
|
|
xyz_key = 'xyzlhwyaw_ego' if self.coord_system == 'ego' else 'xyzlhwyaw'
|
|
xyzlhwyaw = entry.get(xyz_key, [])
|
|
using_coord_system = self.coord_system
|
|
if len(xyzlhwyaw) < 7 and self.coord_system == 'ego':
|
|
has_camera_3d = False
|
|
camera_xyz = entry.get('xyzlhwyaw', [])
|
|
try:
|
|
has_camera_3d = len(camera_xyz) >= 7 and str(camera_xyz[0]) != '-1'
|
|
except (ValueError, TypeError):
|
|
has_camera_3d = False
|
|
if has_camera_3d:
|
|
raise ValueError(
|
|
"Detection JSON is missing ego-coordinate fields (xyzlhwyaw_ego). "
|
|
"Please export ego-coordinate detection results before running ego-coordinate evaluation."
|
|
)
|
|
if len(xyzlhwyaw) >= 7 and str(xyzlhwyaw[0]) != '-1':
|
|
face_type = entry.get('face_cls', 'whole') or 'whole'
|
|
if face_type.lower() in ('rear', 'tail'):
|
|
face_type = 'back'
|
|
result['3d_info'] = {
|
|
'center': [float(xyzlhwyaw[0]), float(xyzlhwyaw[1]), float(xyzlhwyaw[2])],
|
|
'dimensions': [float(xyzlhwyaw[3]), float(xyzlhwyaw[4]), float(xyzlhwyaw[5])],
|
|
'rotation': yaw_to_radians(xyzlhwyaw[6], using_coord_system),
|
|
'face_type': face_type,
|
|
'coord_system': using_coord_system,
|
|
}
|
|
|
|
return result
|
|
|
|
@staticmethod
|
|
def _normalize_roi_id(roi_id):
|
|
"""Normalize ROI identifiers like 'roi0'/'0' to plain numeric strings."""
|
|
if roi_id is None:
|
|
return None
|
|
|
|
roi_id_str = str(roi_id).strip().lower()
|
|
if roi_id_str.startswith('roi'):
|
|
roi_id_str = roi_id_str[3:]
|
|
return roi_id_str or None
|
|
|
|
def parse_det_json_file(self, file_path):
|
|
"""
|
|
Parse an entire detection JSON file.
|
|
|
|
The JSON file is a dict keyed by object index ("0", "1", ...).
|
|
|
|
Returns:
|
|
list of parsed detection dicts
|
|
"""
|
|
try:
|
|
with open(file_path, 'r') as f:
|
|
data = json.load(f)
|
|
except FileNotFoundError:
|
|
print(f"Warning: File not found: {file_path}")
|
|
return []
|
|
except json.JSONDecodeError as e:
|
|
print(f"Warning: JSON decode error in {file_path}: {e}")
|
|
return []
|
|
|
|
detections = []
|
|
for key in sorted(data.keys(), key=lambda k: int(k) if k.isdigit() else k):
|
|
parsed = self.parse_det_json_entry(data[key])
|
|
if parsed is not None:
|
|
detections.append(parsed)
|
|
return detections
|
|
|
|
def parse_file(self, file_path):
|
|
"""
|
|
Parse entire detection file. Dispatches to JSON or TXT parser based on extension.
|
|
|
|
Args:
|
|
file_path: str, path to detection file (.txt or .json)
|
|
|
|
Returns:
|
|
list of parsed detections
|
|
"""
|
|
if str(file_path).endswith('.json'):
|
|
return self.parse_det_json_file(file_path)
|
|
|
|
detections = []
|
|
try:
|
|
with open(file_path, 'r') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
parsed = self.parse_line(line)
|
|
if parsed is not None:
|
|
detections.append(parsed)
|
|
except FileNotFoundError:
|
|
print(f"Warning: File not found: {file_path}")
|
|
return []
|
|
|
|
return detections
|