Files
yolov26_3d/tools/data_mining/convert_txt_to_json.py
2026-06-24 09:35:46 +08:00

317 lines
9.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Convert ground truth txt format to JSON format.
Usage:
python convert_txt_to_json.py <input_txt_file> <output_json_file> [--image-width WIDTH] [--image-height HEIGHT]
"""
import argparse
import json
from pathlib import Path
# from ultralytics.utils import YAML
# DEFAULT_DATA_CONFIG = Path(__file__).resolve().parents[2] / 'ultralytics' / 'cfg' / 'datasets' / 'mono3d_ground.yaml'
DEFAULT_CLASS_MAP = {
'car': 0,
'suv': 1,
'pickup': 2,
'medium_car': 3,
'van': 4,
'bus': 5,
'truck': 6,
'tanker': 6,
'large_truck': 6,
'construction_vehicle': 6,
'special_vehicle': 7,
'unknown': 8,
'pedestrian': 9,
'bicyclist': 10,
'motorcyclist': 10,
'bicycle': 11,
'motorcycle': 11,
'tricycle': 12,
'tricyclist': 12,
'traffic_sign': 13,
'wheel': 14,
'plate': 15,
'face': 16,
}
cutcls_map = {
0: 'nocut',
1: 'cutin',
2: 'cutout',
}
EMPTY_3D_ORI = ["-1.0"] * 13
EMPTY_3D_FACE = ["-1.0"] * 8
def load_class_map(data_config_path: str | Path | None = None) -> dict[str, int]:
"""Load class_map from dataset YAML, with a synced fallback for standalone use."""
# config_path = Path(data_config_path).expanduser().resolve() if data_config_path else DEFAULT_DATA_CONFIG
# if config_path.exists():
# class_map = YAML.load(config_path).get('class_map') or {}
# if class_map:
# return {str(key): int(value) for key, value in class_map.items()}
return DEFAULT_CLASS_MAP.copy()
def _stringify(value: float | int) -> str:
"""Convert numeric values to the string form used by evaluator JSON files."""
return str(value)
def _denormalize_box(x_norm: float, y_norm: float, w_norm: float, h_norm: float, img_width: int, img_height: int) -> list[str]:
"""Convert normalized xywh center box coordinates to absolute xyxy pixel strings."""
x_center_px = x_norm * img_width
y_center_px = y_norm * img_height
w_px = w_norm * img_width
h_px = h_norm * img_height
return [
_stringify(x_center_px - w_px / 2),
_stringify(y_center_px - h_px / 2),
_stringify(x_center_px + w_px / 2),
_stringify(y_center_px + h_px / 2),
]
def _empty_3d_result(result: dict) -> dict:
"""Populate a JSON entry with empty 3D fields."""
result["3d_ori"] = EMPTY_3D_ORI.copy()
return _empty_3d_faces(result)
def _empty_3d_faces(result: dict) -> dict:
"""Populate a JSON entry with empty face fields."""
result["3d_front"] = EMPTY_3D_FACE.copy()
result["3d_back"] = EMPTY_3D_FACE.copy()
result["3d_left"] = EMPTY_3D_FACE.copy()
result["3d_right"] = EMPTY_3D_FACE.copy()
return result
def _build_face(face_values: list[float], img_width: int, img_height: int) -> list[str]:
"""Convert one 8-value face block to evaluator JSON format."""
return [
_stringify(face_values[0]),
_stringify(face_values[1]),
_stringify(face_values[2]),
_stringify(face_values[3]),
_stringify(face_values[4] * img_width),
_stringify(face_values[5] * img_height),
_stringify(face_values[6]),
_stringify(face_values[7]),
]
def _extract_occlusion(parts: list[float], ncols: int) -> float:
"""Extract the occlusion attribute from a parsed txt line."""
if ncols in {6, 19, 51}:
return int(parts[-1])
if ncols == 7:
return int(parts[-2])
raise ValueError(f"Unsupported label column count {ncols} for occlusion extraction")
def parse_txt_line(line, class_map, img_width=1920, img_height=1080):
"""
Parse a single line from the txt file and convert to JSON object structure.
Args:
line: Single line from txt file
img_width: Image width for denormalization
img_height: Image height for denormalization
Returns:
Dictionary with parsed data in JSON format
"""
raw = line.strip().split()
if len(raw) < 2:
return None
label_name = raw[0]
label = class_map.get(label_name)
if label is None:
return None
try:
parts = list(map(float, raw[1:]))
except ValueError:
return None
ncols = len(raw)
if len(parts) < 4:
return None
x_norm, y_norm, w_norm, h_norm = parts[0:4]
result = {
"type": str(label),
"type_name": label_name,
"roi_id": "1",
"occlusion": _stringify(_extract_occlusion(parts, ncols)),
"box2d": _denormalize_box(x_norm, y_norm, w_norm, h_norm, img_width, img_height),
}
if ncols in {6, 7}:
return _empty_3d_result(result)
if ncols == 19:
x3d_ori, y3d_ori, z3d_ori = parts[4:7]
l3d, h3d, w3d = parts[7:10]
rot_y = parts[10]
xc_ori, yc_ori = parts[11:13]
xc_ori_d, yc_ori_d = parts[13:15]
alpha_ori = parts[15]
flag = parts[16]
result["3d_ori"] = [
_stringify(x3d_ori),
_stringify(y3d_ori),
_stringify(z3d_ori),
_stringify(l3d),
_stringify(h3d),
_stringify(w3d),
_stringify(rot_y),
_stringify(xc_ori * img_width),
_stringify(yc_ori * img_height),
_stringify(xc_ori_d * img_width),
_stringify(yc_ori_d * img_height),
_stringify(alpha_ori),
_stringify(int(flag) if float(flag).is_integer() else flag),
]
return _empty_3d_faces(result)
if ncols == 51:
x3d_ori, y3d_ori, z3d_ori = parts[4:7]
l3d, h3d, w3d = parts[7:10]
rot_y = parts[10]
xc_ori, yc_ori = parts[11:13]
xc_ori_d, yc_ori_d = parts[13:15]
alpha_ori = parts[15]
flag = parts[16]
result["3d_ori"] = [
_stringify(x3d_ori),
_stringify(y3d_ori),
_stringify(z3d_ori),
_stringify(l3d),
_stringify(h3d),
_stringify(w3d),
_stringify(rot_y),
_stringify(xc_ori * img_width),
_stringify(yc_ori * img_height),
_stringify(xc_ori_d * img_width),
_stringify(yc_ori_d * img_height),
_stringify(alpha_ori),
_stringify(int(flag) if float(flag).is_integer() else flag),
]
result["3d_front"] = _build_face(parts[17:25], img_width, img_height)
result["3d_back"] = _build_face(parts[25:33], img_width, img_height)
result["3d_left"] = _build_face(parts[33:41], img_width, img_height)
result["3d_right"] = _build_face(parts[41:49], img_width, img_height)
return result
raise ValueError(f"Unsupported label column count {ncols} for line: {line}")
def _resolve_convert_args(class_map_or_img_width, img_width, img_height, data_config_path):
"""Support both legacy convert_txt_to_json(txt, json, w, h) and current class_map-based calls."""
if isinstance(class_map_or_img_width, dict):
return class_map_or_img_width, int(img_width), int(img_height)
if isinstance(class_map_or_img_width, (int, float)) and not isinstance(class_map_or_img_width, bool):
return load_class_map(data_config_path), int(class_map_or_img_width), int(img_width)
if class_map_or_img_width is None:
return load_class_map(data_config_path), int(img_width), int(img_height)
raise TypeError("class_map_or_img_width must be a class_map dict, image width, or None")
def convert_txt_to_json(
txt_file,
json_file,
class_map_or_img_width=None,
img_width=1920,
img_height=1080,
data_config_path: str | Path | None = None,
):
"""
Convert txt ground truth file to JSON format.
Args:
txt_file: Path to input txt file
json_file: Path to output JSON file
img_width: Image width for denormalization
img_height: Image height for denormalization
"""
txt_path = Path(txt_file)
json_path = Path(json_file)
class_map, img_width, img_height = _resolve_convert_args(
class_map_or_img_width,
img_width,
img_height,
data_config_path,
)
if not txt_path.exists():
raise FileNotFoundError(f"Input file not found: {txt_file}")
# Read txt file
with open(txt_path, 'r') as f:
lines = f.readlines()
# Parse each line and build JSON structure
json_data = {}
for idx, line in enumerate(lines):
line = line.strip()
if not line: # Skip empty lines
continue
obj_data = parse_txt_line(line, class_map, img_width, img_height)
if obj_data:
json_data[str(idx)] = obj_data
# Write JSON file
with open(json_path, 'w') as f:
json.dump(json_data, f, indent=4)
print(f"Converted {len(json_data)} objects from {txt_file} to {json_file}")
print(f"Image dimensions used: {img_width}x{img_height}")
def main():
parser = argparse.ArgumentParser(
description='Convert ground truth txt format to JSON format'
)
parser.add_argument('input_txt', help='Input txt file path')
parser.add_argument('output_json', help='Output JSON file path')
parser.add_argument('--image-width', type=int, default=1920,
help='Image width for denormalization (default: 1920)')
parser.add_argument('--image-height', type=int, default=1080,
help='Image height for denormalization (default: 1080)')
parser.add_argument(
'--data-config',
type=str,
default='', #str(DEFAULT_DATA_CONFIG),
help='Dataset YAML path used to load class_map (default: mono3d_ground.yaml)',
)
args = parser.parse_args()
convert_txt_to_json(
args.input_txt,
args.output_json,
class_map_or_img_width=None,
img_width=args.image_width,
img_height=args.image_height,
data_config_path=args.data_config,
)
if __name__ == '__main__':
main()