Files

268 lines
9.6 KiB
Python
Raw Permalink Normal View History

2026-06-24 09:35:46 +08:00
# Ultralytics AGPL-3.0 License - https://ultralytics.com/license
"""3D detection metrics for monocular 3D object detection.
Ported from yolov5-3d/utils/metrics.py. Provides depth error, orientation error,
3D center error, UV error, and grouped aggregation for matched prediction-GT pairs.
"""
import math
import numpy as np
def compute_depth_error(pred_depth, gt_depth, eps=1e-7):
"""Compute depth error metrics between predicted and ground truth depth.
Args:
pred_depth: Predicted depth values (N,).
gt_depth: Ground truth depth values (N,).
eps: Small value to avoid division by zero.
Returns:
Dict with abs_error, rel_error, rmse.
"""
pred_depth = np.asarray(pred_depth, dtype=np.float64)
gt_depth = np.asarray(gt_depth, dtype=np.float64)
valid = np.isfinite(pred_depth) & np.isfinite(gt_depth) & (gt_depth > 0)
if not np.any(valid):
return {"abs_error": 0.0, "rel_error": 0.0, "rmse": 0.0}
p, g = pred_depth[valid], gt_depth[valid]
abs_err = np.abs(p - g)
return {
"abs_error": float(np.mean(abs_err)),
"rel_error": float(np.mean(abs_err / (g + eps))),
"rmse": float(np.sqrt(np.mean((p - g) ** 2))),
}
def compute_orientation_error(pred_yaw, gt_yaw):
"""Compute mean absolute orientation error in degrees with wrap-around handling.
Args:
pred_yaw: Predicted yaw angles in radians (N,).
gt_yaw: Ground truth yaw angles in radians (N,).
Returns:
Mean absolute orientation error in degrees.
"""
pred_yaw = np.asarray(pred_yaw, dtype=np.float64)
gt_yaw = np.asarray(gt_yaw, dtype=np.float64)
valid = np.isfinite(pred_yaw) & np.isfinite(gt_yaw)
if not np.any(valid):
return 0.0
diff = np.abs(pred_yaw[valid] - gt_yaw[valid])
diff = np.minimum(diff, 2 * math.pi - diff)
return float(np.mean(np.degrees(diff)))
def count_valid_orientation_pairs(pred_yaw, gt_yaw):
"""Count valid orientation pairs with finite prediction and ground-truth yaw."""
pred_yaw = np.asarray(pred_yaw, dtype=np.float64)
gt_yaw = np.asarray(gt_yaw, dtype=np.float64)
return int(np.sum(np.isfinite(pred_yaw) & np.isfinite(gt_yaw)))
def compute_visible_orientation_metrics(pred_direct_yaw, pred_edge_yaw, gt_yaw):
"""Compute visible-face direct and edge orientation errors against GT yaw."""
gt_yaw = np.asarray(gt_yaw, dtype=np.float64)
pred_direct_yaw = np.asarray(pred_direct_yaw, dtype=np.float64)
pred_edge_yaw = np.asarray(pred_edge_yaw, dtype=np.float64)
return {
"direct_orient_visible": compute_orientation_error(pred_direct_yaw, gt_yaw),
"edge_orient_visible": compute_orientation_error(pred_edge_yaw, gt_yaw),
"_direct_orient_visible_matched": count_valid_orientation_pairs(pred_direct_yaw, gt_yaw),
"_edge_orient_visible_matched": count_valid_orientation_pairs(pred_edge_yaw, gt_yaw),
}
def compute_3d_center_error(pred_center, gt_center):
"""Compute mean Euclidean distance between predicted and GT 3D centers.
Args:
pred_center: Predicted 3D centers (N, 3).
gt_center: Ground truth 3D centers (N, 3).
Returns:
Mean Euclidean distance in meters.
"""
pred_center = np.asarray(pred_center, dtype=np.float64)
gt_center = np.asarray(gt_center, dtype=np.float64)
valid = np.all(np.isfinite(pred_center), axis=1) & np.all(np.isfinite(gt_center), axis=1)
if not np.any(valid):
return 0.0
return float(np.mean(np.linalg.norm(pred_center[valid] - gt_center[valid], axis=1)))
def compute_size_error(pred_dims, gt_dims):
"""Compute mean absolute size error for L, H, W dimensions.
Args:
pred_dims: Predicted dimensions (N, 3) - [l, h, w].
gt_dims: Ground truth dimensions (N, 3) - [l, h, w].
Returns:
Mean absolute size error in meters.
"""
pred_dims = np.asarray(pred_dims, dtype=np.float64)
gt_dims = np.asarray(gt_dims, dtype=np.float64)
valid = np.all(np.isfinite(pred_dims), axis=1) & np.all(np.isfinite(gt_dims), axis=1)
if not np.any(valid):
return 0.0
return float(np.mean(np.abs(pred_dims[valid] - gt_dims[valid])))
def compute_uv_error(pred_uv, gt_uv):
"""Compute mean per-coordinate L1 pixel error between predicted and GT UV coordinates.
This matches the training-time UV logging, which averages absolute U and V errors
instead of using Euclidean point distance.
"""
pred_uv = np.asarray(pred_uv, dtype=np.float64)
gt_uv = np.asarray(gt_uv, dtype=np.float64)
valid = np.all(np.isfinite(pred_uv), axis=1) & np.all(np.isfinite(gt_uv), axis=1)
if not np.any(valid):
return 0.0
diff = np.abs(pred_uv[valid] - gt_uv[valid])
return float(np.mean(diff))
def empty_3d_metrics(include_orient=True, include_size=True, include_uv=True, include_visible_orient=False):
"""Return default 3D metrics used for logging when no matches are available."""
metrics = {
"depth_abs": 0.0,
"depth_rel": 0.0,
"depth_rmse": 0.0,
"center": 0.0,
"matched": 0,
}
if include_uv:
metrics["uv"] = 0.0
if include_orient:
metrics["orient"] = 0.0
if include_size:
metrics["size"] = 0.0
if include_visible_orient:
metrics["direct_orient_visible"] = 0.0
metrics["edge_orient_visible"] = 0.0
return metrics
def aggregate_3d_metric_groups(stats_by_group):
"""Aggregate grouped 3D metrics with matched-count weighting."""
aggregated = {}
for group, entries in stats_by_group.items():
if not entries:
aggregated[group] = empty_3d_metrics(
include_orient=group == "whole",
include_size=group == "whole",
include_visible_orient=group == "face",
)
continue
template = {k: 0.0 for k in entries[0] if k != "matched" and not k.startswith("_")}
template["matched"] = 0
total_matched = sum(entry["matched"] for entry in entries)
total_pos_matched = sum(entry.get("_pos_matched", entry["matched"]) for entry in entries)
if total_matched <= 0 and total_pos_matched <= 0:
aggregated[group] = template
continue
for key in template:
if key == "matched":
continue
if key in {"depth_abs", "depth_rel", "depth_rmse", "center", "uv"}:
weight_key = "_pos_matched"
elif key == "direct_orient_visible":
weight_key = "_direct_orient_visible_matched"
elif key == "edge_orient_visible":
weight_key = "_edge_orient_visible_matched"
else:
weight_key = "matched"
total_weight = sum(entry.get(weight_key, entry.get("matched", 0)) for entry in entries)
if total_weight <= 0:
template[key] = float("nan") if key in {"direct_orient_visible", "edge_orient_visible"} else 0.0
continue
weighted = sum(
entry[key] * entry.get(weight_key, entry.get("matched", 0))
for entry in entries
if entry.get(weight_key, entry.get("matched", 0)) > 0
)
template[key] = round(weighted / total_weight, 5)
template["matched"] = total_matched
aggregated[group] = template
return aggregated
def compute_3d_metrics_for_matched(
pred_3d_attrs,
gt_3d_attrs,
include_orient=True,
include_size=True,
include_uv=False,
include_visible_orient=False,
):
"""Compute 3D metrics for pre-matched prediction-GT pairs.
Args:
pred_3d_attrs: Dict with keys:
- center: (N, 3) predicted 3D centers [x, y, z]
- depth: (N,) predicted z3d
- yaw: (N,) predicted rotation_y in radians
- edge_yaw: (N,) predicted visible-face yaw in radians (optional)
- dims: (N, 3) predicted [l, h, w]
- uv: (N, 2) predicted [u, v] in pixels (optional)
gt_3d_attrs: Dict with same keys for ground truth.
include_orient: Whether to compute orientation error.
include_size: Whether to compute size error.
include_uv: Whether to compute UV pixel error.
include_visible_orient: Whether to compute visible-face direct and edge orientation errors.
Returns:
Dict with aggregated metrics and matched count.
"""
n = len(pred_3d_attrs.get("depth", []))
if n == 0:
return empty_3d_metrics(
include_orient=include_orient,
include_size=include_size,
include_uv=include_uv,
include_visible_orient=include_visible_orient,
)
depth_m = compute_depth_error(pred_3d_attrs["depth"], gt_3d_attrs["depth"])
center_m = compute_3d_center_error(pred_3d_attrs["center"], gt_3d_attrs["center"])
metrics = {
"depth_abs": depth_m["abs_error"],
"depth_rel": depth_m["rel_error"],
"depth_rmse": depth_m["rmse"],
"center": center_m,
"matched": n,
}
if include_uv:
metrics["uv"] = compute_uv_error(pred_3d_attrs["uv"], gt_3d_attrs["uv"])
if include_orient:
metrics["orient"] = compute_orientation_error(pred_3d_attrs["yaw"], gt_3d_attrs["yaw"])
if include_size:
metrics["size"] = compute_size_error(pred_3d_attrs["dims"], gt_3d_attrs["dims"])
if include_visible_orient:
metrics.update(
compute_visible_orientation_metrics(
pred_3d_attrs["yaw"],
pred_3d_attrs.get("edge_yaw", np.full(n, np.nan, dtype=np.float64)),
gt_3d_attrs["yaw"],
)
)
return metrics