# Ultralytics AGPL-3.0 License - https://ultralytics.com/license """3D detection metrics for monocular 3D object detection. Ported from yolov5-3d/utils/metrics.py. Provides depth error, orientation error, 3D center error, UV error, and grouped aggregation for matched prediction-GT pairs. """ import math import numpy as np def compute_depth_error(pred_depth, gt_depth, eps=1e-7): """Compute depth error metrics between predicted and ground truth depth. Args: pred_depth: Predicted depth values (N,). gt_depth: Ground truth depth values (N,). eps: Small value to avoid division by zero. Returns: Dict with abs_error, rel_error, rmse. """ pred_depth = np.asarray(pred_depth, dtype=np.float64) gt_depth = np.asarray(gt_depth, dtype=np.float64) valid = np.isfinite(pred_depth) & np.isfinite(gt_depth) & (gt_depth > 0) if not np.any(valid): return {"abs_error": 0.0, "rel_error": 0.0, "rmse": 0.0} p, g = pred_depth[valid], gt_depth[valid] abs_err = np.abs(p - g) return { "abs_error": float(np.mean(abs_err)), "rel_error": float(np.mean(abs_err / (g + eps))), "rmse": float(np.sqrt(np.mean((p - g) ** 2))), } def compute_orientation_error(pred_yaw, gt_yaw): """Compute mean absolute orientation error in degrees with wrap-around handling. Args: pred_yaw: Predicted yaw angles in radians (N,). gt_yaw: Ground truth yaw angles in radians (N,). Returns: Mean absolute orientation error in degrees. """ pred_yaw = np.asarray(pred_yaw, dtype=np.float64) gt_yaw = np.asarray(gt_yaw, dtype=np.float64) valid = np.isfinite(pred_yaw) & np.isfinite(gt_yaw) if not np.any(valid): return 0.0 diff = np.abs(pred_yaw[valid] - gt_yaw[valid]) diff = np.minimum(diff, 2 * math.pi - diff) return float(np.mean(np.degrees(diff))) def count_valid_orientation_pairs(pred_yaw, gt_yaw): """Count valid orientation pairs with finite prediction and ground-truth yaw.""" pred_yaw = np.asarray(pred_yaw, dtype=np.float64) gt_yaw = np.asarray(gt_yaw, dtype=np.float64) return int(np.sum(np.isfinite(pred_yaw) & np.isfinite(gt_yaw))) def compute_visible_orientation_metrics(pred_direct_yaw, pred_edge_yaw, gt_yaw): """Compute visible-face direct and edge orientation errors against GT yaw.""" gt_yaw = np.asarray(gt_yaw, dtype=np.float64) pred_direct_yaw = np.asarray(pred_direct_yaw, dtype=np.float64) pred_edge_yaw = np.asarray(pred_edge_yaw, dtype=np.float64) return { "direct_orient_visible": compute_orientation_error(pred_direct_yaw, gt_yaw), "edge_orient_visible": compute_orientation_error(pred_edge_yaw, gt_yaw), "_direct_orient_visible_matched": count_valid_orientation_pairs(pred_direct_yaw, gt_yaw), "_edge_orient_visible_matched": count_valid_orientation_pairs(pred_edge_yaw, gt_yaw), } def compute_3d_center_error(pred_center, gt_center): """Compute mean Euclidean distance between predicted and GT 3D centers. Args: pred_center: Predicted 3D centers (N, 3). gt_center: Ground truth 3D centers (N, 3). Returns: Mean Euclidean distance in meters. """ pred_center = np.asarray(pred_center, dtype=np.float64) gt_center = np.asarray(gt_center, dtype=np.float64) valid = np.all(np.isfinite(pred_center), axis=1) & np.all(np.isfinite(gt_center), axis=1) if not np.any(valid): return 0.0 return float(np.mean(np.linalg.norm(pred_center[valid] - gt_center[valid], axis=1))) def compute_size_error(pred_dims, gt_dims): """Compute mean absolute size error for L, H, W dimensions. Args: pred_dims: Predicted dimensions (N, 3) - [l, h, w]. gt_dims: Ground truth dimensions (N, 3) - [l, h, w]. Returns: Mean absolute size error in meters. """ pred_dims = np.asarray(pred_dims, dtype=np.float64) gt_dims = np.asarray(gt_dims, dtype=np.float64) valid = np.all(np.isfinite(pred_dims), axis=1) & np.all(np.isfinite(gt_dims), axis=1) if not np.any(valid): return 0.0 return float(np.mean(np.abs(pred_dims[valid] - gt_dims[valid]))) def compute_uv_error(pred_uv, gt_uv): """Compute mean per-coordinate L1 pixel error between predicted and GT UV coordinates. This matches the training-time UV logging, which averages absolute U and V errors instead of using Euclidean point distance. """ pred_uv = np.asarray(pred_uv, dtype=np.float64) gt_uv = np.asarray(gt_uv, dtype=np.float64) valid = np.all(np.isfinite(pred_uv), axis=1) & np.all(np.isfinite(gt_uv), axis=1) if not np.any(valid): return 0.0 diff = np.abs(pred_uv[valid] - gt_uv[valid]) return float(np.mean(diff)) def empty_3d_metrics(include_orient=True, include_size=True, include_uv=True, include_visible_orient=False): """Return default 3D metrics used for logging when no matches are available.""" metrics = { "depth_abs": 0.0, "depth_rel": 0.0, "depth_rmse": 0.0, "center": 0.0, "matched": 0, } if include_uv: metrics["uv"] = 0.0 if include_orient: metrics["orient"] = 0.0 if include_size: metrics["size"] = 0.0 if include_visible_orient: metrics["direct_orient_visible"] = 0.0 metrics["edge_orient_visible"] = 0.0 return metrics def aggregate_3d_metric_groups(stats_by_group): """Aggregate grouped 3D metrics with matched-count weighting.""" aggregated = {} for group, entries in stats_by_group.items(): if not entries: aggregated[group] = empty_3d_metrics( include_orient=group == "whole", include_size=group == "whole", include_visible_orient=group == "face", ) continue template = {k: 0.0 for k in entries[0] if k != "matched" and not k.startswith("_")} template["matched"] = 0 total_matched = sum(entry["matched"] for entry in entries) total_pos_matched = sum(entry.get("_pos_matched", entry["matched"]) for entry in entries) if total_matched <= 0 and total_pos_matched <= 0: aggregated[group] = template continue for key in template: if key == "matched": continue if key in {"depth_abs", "depth_rel", "depth_rmse", "center", "uv"}: weight_key = "_pos_matched" elif key == "direct_orient_visible": weight_key = "_direct_orient_visible_matched" elif key == "edge_orient_visible": weight_key = "_edge_orient_visible_matched" else: weight_key = "matched" total_weight = sum(entry.get(weight_key, entry.get("matched", 0)) for entry in entries) if total_weight <= 0: template[key] = float("nan") if key in {"direct_orient_visible", "edge_orient_visible"} else 0.0 continue weighted = sum( entry[key] * entry.get(weight_key, entry.get("matched", 0)) for entry in entries if entry.get(weight_key, entry.get("matched", 0)) > 0 ) template[key] = round(weighted / total_weight, 5) template["matched"] = total_matched aggregated[group] = template return aggregated def compute_3d_metrics_for_matched( pred_3d_attrs, gt_3d_attrs, include_orient=True, include_size=True, include_uv=False, include_visible_orient=False, ): """Compute 3D metrics for pre-matched prediction-GT pairs. Args: pred_3d_attrs: Dict with keys: - center: (N, 3) predicted 3D centers [x, y, z] - depth: (N,) predicted z3d - yaw: (N,) predicted rotation_y in radians - edge_yaw: (N,) predicted visible-face yaw in radians (optional) - dims: (N, 3) predicted [l, h, w] - uv: (N, 2) predicted [u, v] in pixels (optional) gt_3d_attrs: Dict with same keys for ground truth. include_orient: Whether to compute orientation error. include_size: Whether to compute size error. include_uv: Whether to compute UV pixel error. include_visible_orient: Whether to compute visible-face direct and edge orientation errors. Returns: Dict with aggregated metrics and matched count. """ n = len(pred_3d_attrs.get("depth", [])) if n == 0: return empty_3d_metrics( include_orient=include_orient, include_size=include_size, include_uv=include_uv, include_visible_orient=include_visible_orient, ) depth_m = compute_depth_error(pred_3d_attrs["depth"], gt_3d_attrs["depth"]) center_m = compute_3d_center_error(pred_3d_attrs["center"], gt_3d_attrs["center"]) metrics = { "depth_abs": depth_m["abs_error"], "depth_rel": depth_m["rel_error"], "depth_rmse": depth_m["rmse"], "center": center_m, "matched": n, } if include_uv: metrics["uv"] = compute_uv_error(pred_3d_attrs["uv"], gt_3d_attrs["uv"]) if include_orient: metrics["orient"] = compute_orientation_error(pred_3d_attrs["yaw"], gt_3d_attrs["yaw"]) if include_size: metrics["size"] = compute_size_error(pred_3d_attrs["dims"], gt_3d_attrs["dims"]) if include_visible_orient: metrics.update( compute_visible_orientation_metrics( pred_3d_attrs["yaw"], pred_3d_attrs.get("edge_yaw", np.full(n, np.nan, dtype=np.float64)), gt_3d_attrs["yaw"], ) ) return metrics