"""标注格式转换器:KITTI / CVAT JSON / YOLO / COCO / HSAP quaternion 互转。""" from __future__ import annotations import json import math from pathlib import Path from typing import Any # ═══════════════════════════════════════════════════════ # Quaternion ↔ Euler 辅助 # ═══════════════════════════════════════════════════════ def quat_to_rot_y(qw: float, qx: float, qy: float, qz: float) -> float: """四元数 → 绕 Y 轴旋转角 (KITTI rot_y)。""" # rot_y = atan2(2*(qw*qy + qx*qz), 1 - 2*(qy^2 + qx^2)) sin_y = 2.0 * (qw * qy + qx * qz) cos_y = 1.0 - 2.0 * (qy * qy + qx * qx) return math.atan2(sin_y, cos_y) def rot_y_to_quat(rot_y: float) -> tuple[float, float, float, float]: """绕 Y 轴旋转角 → 四元数 (qw, qx, qy, qz)。""" half = rot_y / 2.0 return (math.cos(half), 0.0, math.sin(half), 0.0) # ═══════════════════════════════════════════════════════ # 原始 quaternion 格式 → KITTI label_2 # ═══════════════════════════════════════════════════════ def quaternion_line_to_kitti(line: str, calib_bbox_fn=None) -> str | None: """将一行 quaternion 格式转为 KITTI label_2 行。 输入格式 (空格分隔): Class x y z w l h qw qx qy qz class_id truncation [bbox_2d_8values] [extra...] 输出格式 (KITTI label_2): Class truncated occluded alpha bbox_x1 bbox_y1 bbox_x2 bbox_y2 h w l x y z rot_y [score] """ parts = line.strip().split() if len(parts) < 13: return None label = parts[0] x = float(parts[1]) y = float(parts[2]) z = float(parts[3]) w = float(parts[4]) l = float(parts[5]) h_dim = float(parts[6]) qw = float(parts[7]) qx = float(parts[8]) qy = float(parts[9]) qz = float(parts[10]) class_id = int(parts[11]) if len(parts) > 11 else 0 truncation = int(parts[12]) if len(parts) > 12 else 0 rot_y = quat_to_rot_y(qw, qx, qy, qz) # KITTI: alpha = rot_y - arctan(center_x / center_z), 简化处理 alpha = rot_y # 2D bbox (如果存在: 后续8个值) bbox_2d = None if len(parts) >= 21: try: bbox_2d = [float(p) for p in parts[13:21]] except ValueError: pass # 截断和遮挡 occluded = 0 # quaternion 格式没有直接对应 # KITTI 位置是 camera coordinate: x(right), y(down), z(forward) # quaternion 格式是 LiDAR coordinate: x(forward), y(left), z(up) # 简化转换:x_kitti = -y_lidar, y_kitti = -z_lidar, z_kitti = x_lidar kitti_x = -y kitti_y = -z kitti_z = x # KITTI 3D 尺寸: height, width, length if bbox_2d and len(bbox_2d) == 8: x1, y1, x2, y2 = bbox_2d[0:4] else: x1 = y1 = x2 = y2 = 0 # Format: Class truncated occluded alpha x1 y1 x2 y2 h w l x y z rot_y return ( f"{label} {truncation} {occluded} {alpha:.6f} " f"{x1:.2f} {y1:.2f} {x2:.2f} {y2:.2f} " f"{h_dim:.6f} {w:.6f} {l:.6f} " f"{kitti_x:.6f} {kitti_y:.6f} {kitti_z:.6f} " f"{rot_y:.6f}" ) # ═══════════════════════════════════════════════════════ # KITTI → 原始 quaternion 格式 # ═══════════════════════════════════════════════════════ def kitti_line_to_quaternion(line: str) -> str | None: """KITTI label_2 → quaternion 格式(回传HSAP)。""" parts = line.strip().split() if len(parts) < 15: return None label = parts[0] alpha = float(parts[3]) bbox = [float(p) for p in parts[4:8]] h_dim = float(parts[8]) w = float(parts[9]) l = float(parts[10]) kx = float(parts[11]) ky = float(parts[12]) kz = float(parts[13]) rot_y = float(parts[14]) # 逆转换 x = kz # LiDAR X = KITTI Z y = -kx # LiDAR Y = -KITTI X z = -ky # LiDAR Z = -KITTI Y qw, qx, qy, qz = rot_y_to_quat(rot_y) # 输出 quaternion 格式 return ( f"{label} {x:.6f} {y:.6f} {z:.6f} {w:.6f} {l:.6f} {h_dim:.6f} " f"{qw:.6f} {qx:.6f} {qy:.6f} {qz:.6f} 0 0 " f"{bbox[0]:.2f} {bbox[1]:.2f} {bbox[2]:.2f} {bbox[3]:.2f} " f"0 0 0 0 0 0 1" ) # ═══════════════════════════════════════════════════════ # CVAT Job API shapes → HSAP / YOLO # ═══════════════════════════════════════════════════════ def cvat_shape_to_result_item( shape: dict[str, Any], label_map: dict[int, str], ) -> dict[str, Any]: """CVAT Job annotations API 单条 shape → HSAP result 条目。""" label = label_map.get(shape.get("label_id"), "unknown") stype = shape.get("type", "") item: dict[str, Any] = { "type": stype, "label": label, "source": "cvat", "cvat_id": shape.get("id"), "frame": shape.get("frame", 0), } if stype == "rectangle": item["points"] = [ shape.get("xtl", 0), shape.get("ytl", 0), shape.get("xbr", 0), shape.get("ybr", 0), ] elif stype == "cuboid": for key in ( "xtl1", "ytl1", "xtr1", "ytr1", "xbl1", "ybl1", "xbr1", "ybr1", "xtl2", "ytl2", "xtr2", "ytr2", "xbl2", "ybl2", "xbr2", "ybr2", ): if key in shape: item[key] = shape[key] if shape.get("points"): item["points"] = shape["points"] elif stype in ("polyline", "polygon", "points"): item["points"] = shape.get("points", []) return item def cvat_job_shapes_to_yolo_lines( shapes: list[dict[str, Any]], label_map: dict[int, str], class_map: dict[str, int], img_width: int, img_height: int, ) -> list[str]: lines: list[str] = [] for shape in shapes: if shape.get("type") != "rectangle": continue label = label_map.get(shape.get("label_id"), "") class_id = class_map.get(label) if class_id is None: # 尝试大小写不敏感匹配 for name, cid in class_map.items(): if name.lower() == label.lower(): class_id = cid break if class_id is None: continue x1, y1, x2, y2 = ( float(shape.get("xtl", 0)), float(shape.get("ytl", 0)), float(shape.get("xbr", 0)), float(shape.get("ybr", 0)), ) if img_width <= 0 or img_height <= 0: continue cx = ((x1 + x2) / 2) / img_width cy = ((y1 + y2) / 2) / img_height bw = (x2 - x1) / img_width bh = (y2 - y1) / img_height lines.append(f"{class_id} {cx:.6f} {cy:.6f} {bw:.6f} {bh:.6f}") return lines def group_cvat_job_shapes_by_frame( job_annotations: dict[str, Any], ) -> dict[int, list[dict[str, Any]]]: grouped: dict[int, list[dict[str, Any]]] = {} for shape in job_annotations.get("shapes") or []: frame = int(shape.get("frame", 0)) grouped.setdefault(frame, []).append(shape) return grouped def cvat_shapes_to_export_regions( shapes: list[dict[str, Any]], label_map: dict[int, str], img_width: int, img_height: int, ) -> list[dict[str, Any]]: """CVAT Job shapes → HSAP 导出链兼容的 result[](原 Label Studio 字段布局)。""" if img_width <= 0 or img_height <= 0: return [] regions: list[dict[str, Any]] = [] for shape in shapes: stype = shape.get("type") or "" label = label_map.get(shape.get("label_id"), "unknown") base = { "id": str(shape.get("id", "")), "original_width": img_width, "original_height": img_height, } if stype == "rectangle": xtl = float(shape.get("xtl", 0)) ytl = float(shape.get("ytl", 0)) xbr = float(shape.get("xbr", 0)) ybr = float(shape.get("ybr", 0)) regions.append({ **base, "type": "rectanglelabels", "value": { "x": xtl / img_width * 100.0, "y": ytl / img_height * 100.0, "width": (xbr - xtl) / img_width * 100.0, "height": (ybr - ytl) / img_height * 100.0, "rotation": 0, "rectanglelabels": [label], }, }) elif stype == "points": pts = shape.get("points") or [] if len(pts) < 2: continue regions.append({ **base, "type": "keypointlabels", "value": { "x": float(pts[0]) / img_width * 100.0, "y": float(pts[1]) / img_height * 100.0, "width": 0.5, "keypointlabels": [label], }, }) elif stype in ("polyline", "polygon"): regions.append({ **base, "type": "polyline", "label": label, "points": list(shape.get("points") or []), }) elif stype == "cuboid": item = cvat_shape_to_result_item(shape, label_map) item["original_width"] = img_width item["original_height"] = img_height regions.append(item) return regions # ═══════════════════════════════════════════════════════ # CVAT JSON → YOLO bbox # ═══════════════════════════════════════════════════════ def cvat_json_to_yolo( cvat_annotations: dict[str, Any], class_map: dict[str, int], img_width: int = 1920, img_height: int = 1080, ) -> dict[str, list[str]]: """CVAT annotations JSON → YOLO 格式文件内容。 返回 {image_name: [yolo_line, ...]} 的字典。 """ result: dict[str, list[str]] = {} for img_ann in cvat_annotations.get("annotations", []): frame = img_ann.get("frame", 0) img_name = _resolve_image_name(cvat_annotations, img_ann) lines: list[str] = [] for shape in img_ann.get("shapes", []): shape_type = shape.get("type", "") label_name = shape.get("label", "") class_id = class_map.get(label_name) if class_id is None: continue if shape_type == "rectangle": # YOLO: class_id cx cy w h (归一化 0-1) x1, y1, x2, y2 = (shape.get(p, 0) for p in ("xtl", "ytl", "xbr", "ybr")) cx = ((x1 + x2) / 2) / img_width cy = ((y1 + y2) / 2) / img_height bw = (x2 - x1) / img_width bh = (y2 - y1) / img_height lines.append(f"{class_id} {cx:.6f} {cy:.6f} {bw:.6f} {bh:.6f}") if lines: result[img_name] = lines return result # ═══════════════════════════════════════════════════════ # CVAT JSON → COCO keypoints # ═══════════════════════════════════════════════════════ def cvat_json_to_coco_keypoints( cvat_annotations: dict[str, Any], keypoint_labels: list[str], image_dir: Path | None = None, ) -> dict[str, Any]: """提取 CVAT 关键点标注 → COCO keypoints 格式。""" images: list[dict[str, Any]] = [] annotations: list[dict[str, Any]] = [] # 构建 keypoint_label → id 映射 kp_map = {name: i for i, name in enumerate(keypoint_labels)} ann_id = 0 for img_idx, img_ann in enumerate(cvat_annotations.get("annotations", [])): img_name = _resolve_image_name(cvat_annotations, img_ann) img_w = img_ann.get("width", 1920) img_h = img_ann.get("height", 1080) img_id = img_idx + 1 images.append({"id": img_id, "file_name": img_name, "width": img_w, "height": img_h}) for shape in img_ann.get("shapes", []): if shape.get("type") != "points": continue points = shape.get("points", []) if not points: continue # points 格式: [[x1,y1], [x2,y2], ...] keypoints_list: list[float] = [] num_keypoints = 0 for kp_label in keypoint_labels: kp_data = next((p for p in points if p.get("label") == kp_label), None) if kp_data: keypoints_list.extend([kp_data.get("x", 0), kp_data.get("y", 0), 2]) # visible num_keypoints += 1 else: keypoints_list.extend([0, 0, 0]) # not labeled annotations.append({ "id": ann_id, "image_id": img_id, "category_id": 1, "keypoints": keypoints_list, "num_keypoints": num_keypoints, "bbox": _keypoint_bbox(keypoints_list, img_w, img_h), }) ann_id += 1 return { "images": images, "annotations": annotations, "categories": [{"id": 1, "name": "person", "keypoints": keypoint_labels, "skeleton": []}], } # ═══════════════════════════════════════════════════════ # CVAT JSON → HSAP Lane polyline # ═══════════════════════════════════════════════════════ def cvat_json_to_lane_polylines( cvat_annotations: dict[str, Any], ) -> dict[str, list[dict[str, Any]]]: """提取 CVAT 折线标注 → HSAP 车道线格式。""" result: dict[str, list[dict[str, Any]]] = {} for img_ann in cvat_annotations.get("annotations", []): img_name = _resolve_image_name(cvat_annotations, img_ann) polylines: list[dict[str, Any]] = [] for shape in img_ann.get("shapes", []): if shape.get("type") not in ("polyline", "polygon"): continue points = shape.get("points", []) if not points: continue attrs = {a.get("name"): a.get("value") for a in (shape.get("attributes") or [])} polylines.append({ "label": shape.get("label", "lane_line"), "attributes": attrs, "points": [[p.get("x", 0), p.get("y", 0)] for p in points], }) if polylines: result[img_name] = polylines return result # ═══════════════════════════════════════════════════════ # 辅助函数 # ═══════════════════════════════════════════════════════ def _resolve_image_name(annotations: dict[str, Any], img_ann: dict[str, Any]) -> str: """从 CVAT annotation JSON 中解析图像文件名。""" frame = img_ann.get("frame", 0) images = annotations.get("images", []) if isinstance(images, list) and frame < len(images): img_info = images[frame] if isinstance(img_info, dict): return img_info.get("file_name", f"frame_{frame}") return img_ann.get("name", f"frame_{frame}") def _keypoint_bbox(kpts: list[float], img_w: int, img_h: int) -> list[float]: """从 keypoints 列表计算 bbox [x, y, w, h]。""" xs = [kpts[i] for i in range(0, len(kpts), 3) if kpts[i + 2] > 0] ys = [kpts[i + 1] for i in range(0, len(kpts), 3) if kpts[i + 2] > 0] if not xs or not ys: return [0, 0, 0, 0] x_min, x_max = min(xs), max(xs) y_min, y_max = min(ys), max(ys) return [x_min, y_min, x_max - x_min, y_max - y_min] # ═══════════════════════════════════════════════════════ # 批量 KITTI 转换 # ═══════════════════════════════════════════════════════ def convert_quaternion_dir_to_kitti(label_dir: Path, output_dir: Path) -> int: """将 quaternion 格式目录批量转换为 KITTI label_2 格式。""" output_dir.mkdir(parents=True, exist_ok=True) count = 0 for txt_file in sorted(label_dir.rglob("*.txt")): kitti_lines: list[str] = [] for line in txt_file.read_text(encoding="utf-8").strip().split("\n"): if not line.strip(): continue kitti_line = quaternion_line_to_kitti(line) if kitti_line: kitti_lines.append(kitti_line) if kitti_lines: out_file = output_dir / txt_file.name out_file.write_text("\n".join(kitti_lines) + "\n", encoding="utf-8") count += 1 return count def convert_cvat_kitti_export_to_hsap(kitti_data: bytes, output_dir: Path) -> int: """将 CVAT KITTI 导出(zip 字节)解压并转为 HSAP quaternion 格式。""" import io import zipfile output_dir.mkdir(parents=True, exist_ok=True) count = 0 with zipfile.ZipFile(io.BytesIO(kitti_data)) as zf: for name in zf.namelist(): if not name.endswith(".txt") or "label_2" not in name: continue content = zf.read(name).decode("utf-8") hsap_lines: list[str] = [] for line in content.strip().split("\n"): if not line.strip(): continue hsap_line = kitti_line_to_quaternion(line) if hsap_line: hsap_lines.append(hsap_line) if hsap_lines: fname = Path(name).name (output_dir / fname).write_text("\n".join(hsap_lines) + "\n", encoding="utf-8") count += 1 return count # ═══════════════════════════════════════════════════════ # ADAS 3D Quaternion JSON → CVAT cuboid XML # ═══════════════════════════════════════════════════════ from xml.etree.ElementTree import Element, SubElement, tostring from xml.dom import minidom from datetime import datetime, timezone def _get_np(): """Lazy numpy import.""" import numpy as np return np def _quat_to_rotation_matrix(qw: float, qx: float, qy: float, qz: float): np = _get_np() return np.array([ [1 - 2*qy**2 - 2*qz**2, 2*qx*qy - 2*qz*qw, 2*qx*qz + 2*qy*qw], [2*qx*qy + 2*qz*qw, 1 - 2*qx**2 - 2*qz**2, 2*qy*qz - 2*qx*qw], [2*qx*qz - 2*qy*qw, 2*qy*qz + 2*qx*qw, 1 - 2*qx**2 - 2*qy**2], ]) def _get_3d_corners(center, w, l, h, qw, qx, qy, qz): """Compute 8 corners in camera coordinates. Object frame: x=forward(±l/2), y=left(±w/2), z=up(±h/2).""" np = _get_np() ox = np.array([-l/2, -l/2, -l/2, -l/2, l/2, l/2, l/2, l/2]) oy = np.array([-w/2, w/2, w/2, -w/2, -w/2, w/2, w/2, -w/2]) oz = np.array([-h/2, -h/2, h/2, h/2, -h/2, -h/2, h/2, h/2]) corners_obj = np.stack([ox, oy, oz], axis=1) R = _quat_to_rotation_matrix(qw, qx, qy, qz) return (R @ corners_obj.T).T + np.array(center) def _project_2d(pts_3d, K): pts = pts_3d @ K.T return pts[:, :2] / pts[:, 2:] def quaternion_json_to_cvat_cuboid_xml( json_dir: str | Path, image_names: list[str], task_id: int | None = None, ) -> str: """将 ADAS 3D quaternion JSON 标注转换为 CVAT cuboid XML。 Args: json_dir: 包含 .json 标注文件的目录 image_names: 图像文件名列表(与 CVAT task 中的 frame 顺序对应) task_id: 可选 CVAT task ID Returns: CVAT for images 1.1 XML 字符串 """ json_dir = Path(json_dir) root = Element("annotations") SubElement(root, "version").text = "1.1" meta = SubElement(root, "meta") te = SubElement(meta, "task") SubElement(te, "id").text = str(task_id or 0) SubElement(te, "name").text = "ADAS 3D" SubElement(te, "size").text = str(len(image_names)) SubElement(te, "mode").text = "annotation" SubElement(te, "overlap").text = "0" now = datetime.now(timezone.utc).isoformat() SubElement(te, "created").text = now SubElement(te, "updated").text = now le = SubElement(te, "labels") for lbl in ["car", "pedestrian", "truck", "bus", "motorcycle", "tricycle", "traffic cone"]: l = SubElement(le, "label"); SubElement(l, "name").text = lbl; SubElement(l, "attributes") se = SubElement(te, "segments"); s = SubElement(se, "segment") SubElement(s, "id").text = "1"; SubElement(s, "start").text = "0" SubElement(s, "stop").text = str(len(image_names) - 1) ow = SubElement(te, "owner"); SubElement(ow, "username").text = "platform"; SubElement(ow, "email").text = "" SubElement(meta, "dumped").text = now total = 0 for fid, img_name in enumerate(image_names): stem = Path(img_name).stem jp = json_dir / f"{stem}.json" if not jp.is_file(): continue ann = json.loads(jp.read_text(encoding="utf-8")) np = _get_np() K = np.array(ann["K"]) img_w, img_h = ann["image_size"] ie = SubElement(root, "image") ie.set("id", str(fid)) ie.set("name", Path(img_name).name) ie.set("width", str(img_w)) ie.set("height", str(img_h)) for det in ann.get("detections", []): w, l, h = det["dimensions_wlh"] c3d = _get_3d_corners(det["center_3d"], w, l, h, *det["quaternion_wxyz"]) if _get_np().any(c3d[:, 2] <= 0): continue c2d = _project_2d(c3d, K) # 4 edge-pairs: (rear, front) × (tl, tr, bl, br) pairs = [(3, 7), (2, 6), (0, 4), (1, 5)] pd = [] for ri, fi in pairs: mid = (c2d[ri] + c2d[fi]) / 2.0 f1_i, f2_i = (fi, ri) if c3d[fi, 2] <= c3d[ri, 2] else (ri, fi) pd.append({"mid": mid, "f1_i": f1_i, "f2_i": f2_i}) pd.sort(key=lambda p: p["mid"][1]) top = sorted(pd[:2], key=lambda p: p["mid"][0]) bot = sorted(pd[2:], key=lambda p: p["mid"][0]) tl, tr = top[0], top[1] bl, br = bot[0], bot[1] cub = SubElement(ie, "cuboid") cub.set("label", det["class_name"]); cub.set("source", "manual"); cub.set("occluded", "0") cub.set("xtl1", f"{c2d[tl['f1_i']][0]:.2f}"); cub.set("ytl1", f"{c2d[tl['f1_i']][1]:.2f}") cub.set("xtr1", f"{c2d[tr['f1_i']][0]:.2f}"); cub.set("ytr1", f"{c2d[tr['f1_i']][1]:.2f}") cub.set("xbl1", f"{c2d[bl['f1_i']][0]:.2f}"); cub.set("ybl1", f"{c2d[bl['f1_i']][1]:.2f}") cub.set("xbr1", f"{c2d[br['f1_i']][0]:.2f}"); cub.set("ybr1", f"{c2d[br['f1_i']][1]:.2f}") cub.set("xtl2", f"{c2d[tl['f2_i']][0]:.2f}"); cub.set("ytl2", f"{c2d[tl['f2_i']][1]:.2f}") cub.set("xtr2", f"{c2d[tr['f2_i']][0]:.2f}"); cub.set("ytr2", f"{c2d[tr['f2_i']][1]:.2f}") cub.set("xbl2", f"{c2d[bl['f2_i']][0]:.2f}"); cub.set("ybl2", f"{c2d[bl['f2_i']][1]:.2f}") cub.set("xbr2", f"{c2d[br['f2_i']][0]:.2f}"); cub.set("ybr2", f"{c2d[br['f2_i']][1]:.2f}") cub.set("z_order", "0") total += 1 xml_str = minidom.parseString(tostring(root, 'utf-8')).toprettyxml(indent=" ") return xml_str