317 lines
9.8 KiB
Python
Executable File
317 lines
9.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Convert ground truth txt format to JSON format.
|
|
|
|
Usage:
|
|
python convert_txt_to_json.py <input_txt_file> <output_json_file> [--image-width WIDTH] [--image-height HEIGHT]
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
|
|
# from ultralytics.utils import YAML
|
|
|
|
# DEFAULT_DATA_CONFIG = Path(__file__).resolve().parents[2] / 'ultralytics' / 'cfg' / 'datasets' / 'mono3d_ground.yaml'
|
|
DEFAULT_CLASS_MAP = {
|
|
'car': 0,
|
|
'suv': 1,
|
|
'pickup': 2,
|
|
'medium_car': 3,
|
|
'van': 4,
|
|
'bus': 5,
|
|
'truck': 6,
|
|
'tanker': 6,
|
|
'large_truck': 6,
|
|
'construction_vehicle': 6,
|
|
'special_vehicle': 7,
|
|
'unknown': 8,
|
|
'pedestrian': 9,
|
|
'bicyclist': 10,
|
|
'motorcyclist': 10,
|
|
'bicycle': 11,
|
|
'motorcycle': 11,
|
|
'tricycle': 12,
|
|
'tricyclist': 12,
|
|
'traffic_sign': 13,
|
|
'wheel': 14,
|
|
'plate': 15,
|
|
'face': 16,
|
|
}
|
|
|
|
cutcls_map = {
|
|
0: 'nocut',
|
|
1: 'cutin',
|
|
2: 'cutout',
|
|
}
|
|
|
|
EMPTY_3D_ORI = ["-1.0"] * 13
|
|
EMPTY_3D_FACE = ["-1.0"] * 8
|
|
|
|
|
|
def load_class_map(data_config_path: str | Path | None = None) -> dict[str, int]:
|
|
"""Load class_map from dataset YAML, with a synced fallback for standalone use."""
|
|
# config_path = Path(data_config_path).expanduser().resolve() if data_config_path else DEFAULT_DATA_CONFIG
|
|
# if config_path.exists():
|
|
# class_map = YAML.load(config_path).get('class_map') or {}
|
|
# if class_map:
|
|
# return {str(key): int(value) for key, value in class_map.items()}
|
|
return DEFAULT_CLASS_MAP.copy()
|
|
|
|
|
|
def _stringify(value: float | int) -> str:
|
|
"""Convert numeric values to the string form used by evaluator JSON files."""
|
|
return str(value)
|
|
|
|
|
|
def _denormalize_box(x_norm: float, y_norm: float, w_norm: float, h_norm: float, img_width: int, img_height: int) -> list[str]:
|
|
"""Convert normalized xywh center box coordinates to absolute xyxy pixel strings."""
|
|
x_center_px = x_norm * img_width
|
|
y_center_px = y_norm * img_height
|
|
w_px = w_norm * img_width
|
|
h_px = h_norm * img_height
|
|
return [
|
|
_stringify(x_center_px - w_px / 2),
|
|
_stringify(y_center_px - h_px / 2),
|
|
_stringify(x_center_px + w_px / 2),
|
|
_stringify(y_center_px + h_px / 2),
|
|
]
|
|
|
|
|
|
def _empty_3d_result(result: dict) -> dict:
|
|
"""Populate a JSON entry with empty 3D fields."""
|
|
result["3d_ori"] = EMPTY_3D_ORI.copy()
|
|
return _empty_3d_faces(result)
|
|
|
|
|
|
def _empty_3d_faces(result: dict) -> dict:
|
|
"""Populate a JSON entry with empty face fields."""
|
|
result["3d_front"] = EMPTY_3D_FACE.copy()
|
|
result["3d_back"] = EMPTY_3D_FACE.copy()
|
|
result["3d_left"] = EMPTY_3D_FACE.copy()
|
|
result["3d_right"] = EMPTY_3D_FACE.copy()
|
|
return result
|
|
|
|
|
|
def _build_face(face_values: list[float], img_width: int, img_height: int) -> list[str]:
|
|
"""Convert one 8-value face block to evaluator JSON format."""
|
|
return [
|
|
_stringify(face_values[0]),
|
|
_stringify(face_values[1]),
|
|
_stringify(face_values[2]),
|
|
_stringify(face_values[3]),
|
|
_stringify(face_values[4] * img_width),
|
|
_stringify(face_values[5] * img_height),
|
|
_stringify(face_values[6]),
|
|
_stringify(face_values[7]),
|
|
]
|
|
|
|
|
|
def _extract_occlusion(parts: list[float], ncols: int) -> float:
|
|
"""Extract the occlusion attribute from a parsed txt line."""
|
|
if ncols in {6, 19, 51}:
|
|
return int(parts[-1])
|
|
if ncols == 7:
|
|
return int(parts[-2])
|
|
raise ValueError(f"Unsupported label column count {ncols} for occlusion extraction")
|
|
|
|
|
|
def parse_txt_line(line, class_map, img_width=1920, img_height=1080):
|
|
"""
|
|
Parse a single line from the txt file and convert to JSON object structure.
|
|
|
|
Args:
|
|
line: Single line from txt file
|
|
img_width: Image width for denormalization
|
|
img_height: Image height for denormalization
|
|
|
|
Returns:
|
|
Dictionary with parsed data in JSON format
|
|
"""
|
|
|
|
raw = line.strip().split()
|
|
if len(raw) < 2:
|
|
return None
|
|
|
|
label_name = raw[0]
|
|
label = class_map.get(label_name)
|
|
if label is None:
|
|
return None
|
|
|
|
try:
|
|
parts = list(map(float, raw[1:]))
|
|
except ValueError:
|
|
return None
|
|
ncols = len(raw)
|
|
if len(parts) < 4:
|
|
return None
|
|
|
|
x_norm, y_norm, w_norm, h_norm = parts[0:4]
|
|
|
|
result = {
|
|
"type": str(label),
|
|
"type_name": label_name,
|
|
"roi_id": "1",
|
|
"occlusion": _stringify(_extract_occlusion(parts, ncols)),
|
|
"box2d": _denormalize_box(x_norm, y_norm, w_norm, h_norm, img_width, img_height),
|
|
}
|
|
|
|
if ncols in {6, 7}:
|
|
return _empty_3d_result(result)
|
|
|
|
if ncols == 19:
|
|
x3d_ori, y3d_ori, z3d_ori = parts[4:7]
|
|
l3d, h3d, w3d = parts[7:10]
|
|
rot_y = parts[10]
|
|
xc_ori, yc_ori = parts[11:13]
|
|
xc_ori_d, yc_ori_d = parts[13:15]
|
|
alpha_ori = parts[15]
|
|
flag = parts[16]
|
|
|
|
result["3d_ori"] = [
|
|
_stringify(x3d_ori),
|
|
_stringify(y3d_ori),
|
|
_stringify(z3d_ori),
|
|
_stringify(l3d),
|
|
_stringify(h3d),
|
|
_stringify(w3d),
|
|
_stringify(rot_y),
|
|
_stringify(xc_ori * img_width),
|
|
_stringify(yc_ori * img_height),
|
|
_stringify(xc_ori_d * img_width),
|
|
_stringify(yc_ori_d * img_height),
|
|
_stringify(alpha_ori),
|
|
_stringify(int(flag) if float(flag).is_integer() else flag),
|
|
]
|
|
return _empty_3d_faces(result)
|
|
|
|
if ncols == 51:
|
|
x3d_ori, y3d_ori, z3d_ori = parts[4:7]
|
|
l3d, h3d, w3d = parts[7:10]
|
|
rot_y = parts[10]
|
|
xc_ori, yc_ori = parts[11:13]
|
|
xc_ori_d, yc_ori_d = parts[13:15]
|
|
alpha_ori = parts[15]
|
|
flag = parts[16]
|
|
|
|
result["3d_ori"] = [
|
|
_stringify(x3d_ori),
|
|
_stringify(y3d_ori),
|
|
_stringify(z3d_ori),
|
|
_stringify(l3d),
|
|
_stringify(h3d),
|
|
_stringify(w3d),
|
|
_stringify(rot_y),
|
|
_stringify(xc_ori * img_width),
|
|
_stringify(yc_ori * img_height),
|
|
_stringify(xc_ori_d * img_width),
|
|
_stringify(yc_ori_d * img_height),
|
|
_stringify(alpha_ori),
|
|
_stringify(int(flag) if float(flag).is_integer() else flag),
|
|
]
|
|
result["3d_front"] = _build_face(parts[17:25], img_width, img_height)
|
|
result["3d_back"] = _build_face(parts[25:33], img_width, img_height)
|
|
result["3d_left"] = _build_face(parts[33:41], img_width, img_height)
|
|
result["3d_right"] = _build_face(parts[41:49], img_width, img_height)
|
|
return result
|
|
|
|
raise ValueError(f"Unsupported label column count {ncols} for line: {line}")
|
|
|
|
|
|
def _resolve_convert_args(class_map_or_img_width, img_width, img_height, data_config_path):
|
|
"""Support both legacy convert_txt_to_json(txt, json, w, h) and current class_map-based calls."""
|
|
if isinstance(class_map_or_img_width, dict):
|
|
return class_map_or_img_width, int(img_width), int(img_height)
|
|
|
|
if isinstance(class_map_or_img_width, (int, float)) and not isinstance(class_map_or_img_width, bool):
|
|
return load_class_map(data_config_path), int(class_map_or_img_width), int(img_width)
|
|
|
|
if class_map_or_img_width is None:
|
|
return load_class_map(data_config_path), int(img_width), int(img_height)
|
|
|
|
raise TypeError("class_map_or_img_width must be a class_map dict, image width, or None")
|
|
|
|
|
|
def convert_txt_to_json(
|
|
txt_file,
|
|
json_file,
|
|
class_map_or_img_width=None,
|
|
img_width=1920,
|
|
img_height=1080,
|
|
data_config_path: str | Path | None = None,
|
|
):
|
|
"""
|
|
Convert txt ground truth file to JSON format.
|
|
|
|
Args:
|
|
txt_file: Path to input txt file
|
|
json_file: Path to output JSON file
|
|
img_width: Image width for denormalization
|
|
img_height: Image height for denormalization
|
|
"""
|
|
txt_path = Path(txt_file)
|
|
json_path = Path(json_file)
|
|
class_map, img_width, img_height = _resolve_convert_args(
|
|
class_map_or_img_width,
|
|
img_width,
|
|
img_height,
|
|
data_config_path,
|
|
)
|
|
|
|
if not txt_path.exists():
|
|
raise FileNotFoundError(f"Input file not found: {txt_file}")
|
|
|
|
# Read txt file
|
|
with open(txt_path, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
# Parse each line and build JSON structure
|
|
json_data = {}
|
|
for idx, line in enumerate(lines):
|
|
line = line.strip()
|
|
if not line: # Skip empty lines
|
|
continue
|
|
|
|
obj_data = parse_txt_line(line, class_map, img_width, img_height)
|
|
if obj_data:
|
|
json_data[str(idx)] = obj_data
|
|
|
|
# Write JSON file
|
|
with open(json_path, 'w') as f:
|
|
json.dump(json_data, f, indent=4)
|
|
|
|
print(f"Converted {len(json_data)} objects from {txt_file} to {json_file}")
|
|
print(f"Image dimensions used: {img_width}x{img_height}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Convert ground truth txt format to JSON format'
|
|
)
|
|
parser.add_argument('input_txt', help='Input txt file path')
|
|
parser.add_argument('output_json', help='Output JSON file path')
|
|
parser.add_argument('--image-width', type=int, default=1920,
|
|
help='Image width for denormalization (default: 1920)')
|
|
parser.add_argument('--image-height', type=int, default=1080,
|
|
help='Image height for denormalization (default: 1080)')
|
|
parser.add_argument(
|
|
'--data-config',
|
|
type=str,
|
|
default='', #str(DEFAULT_DATA_CONFIG),
|
|
help='Dataset YAML path used to load class_map (default: mono3d_ground.yaml)',
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
convert_txt_to_json(
|
|
args.input_txt,
|
|
args.output_json,
|
|
class_map_or_img_width=None,
|
|
img_width=args.image_width,
|
|
img_height=args.image_height,
|
|
data_config_path=args.data_config,
|
|
)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|