268 lines
9.6 KiB
Python
Executable File
268 lines
9.6 KiB
Python
Executable File
# Ultralytics AGPL-3.0 License - https://ultralytics.com/license
|
|
|
|
"""3D detection metrics for monocular 3D object detection.
|
|
|
|
Ported from yolov5-3d/utils/metrics.py. Provides depth error, orientation error,
|
|
3D center error, UV error, and grouped aggregation for matched prediction-GT pairs.
|
|
"""
|
|
|
|
import math
|
|
|
|
import numpy as np
|
|
|
|
|
|
def compute_depth_error(pred_depth, gt_depth, eps=1e-7):
|
|
"""Compute depth error metrics between predicted and ground truth depth.
|
|
|
|
Args:
|
|
pred_depth: Predicted depth values (N,).
|
|
gt_depth: Ground truth depth values (N,).
|
|
eps: Small value to avoid division by zero.
|
|
|
|
Returns:
|
|
Dict with abs_error, rel_error, rmse.
|
|
"""
|
|
pred_depth = np.asarray(pred_depth, dtype=np.float64)
|
|
gt_depth = np.asarray(gt_depth, dtype=np.float64)
|
|
|
|
valid = np.isfinite(pred_depth) & np.isfinite(gt_depth) & (gt_depth > 0)
|
|
if not np.any(valid):
|
|
return {"abs_error": 0.0, "rel_error": 0.0, "rmse": 0.0}
|
|
|
|
p, g = pred_depth[valid], gt_depth[valid]
|
|
abs_err = np.abs(p - g)
|
|
return {
|
|
"abs_error": float(np.mean(abs_err)),
|
|
"rel_error": float(np.mean(abs_err / (g + eps))),
|
|
"rmse": float(np.sqrt(np.mean((p - g) ** 2))),
|
|
}
|
|
|
|
|
|
def compute_orientation_error(pred_yaw, gt_yaw):
|
|
"""Compute mean absolute orientation error in degrees with wrap-around handling.
|
|
|
|
Args:
|
|
pred_yaw: Predicted yaw angles in radians (N,).
|
|
gt_yaw: Ground truth yaw angles in radians (N,).
|
|
|
|
Returns:
|
|
Mean absolute orientation error in degrees.
|
|
"""
|
|
pred_yaw = np.asarray(pred_yaw, dtype=np.float64)
|
|
gt_yaw = np.asarray(gt_yaw, dtype=np.float64)
|
|
|
|
valid = np.isfinite(pred_yaw) & np.isfinite(gt_yaw)
|
|
if not np.any(valid):
|
|
return 0.0
|
|
|
|
diff = np.abs(pred_yaw[valid] - gt_yaw[valid])
|
|
diff = np.minimum(diff, 2 * math.pi - diff)
|
|
return float(np.mean(np.degrees(diff)))
|
|
|
|
|
|
def count_valid_orientation_pairs(pred_yaw, gt_yaw):
|
|
"""Count valid orientation pairs with finite prediction and ground-truth yaw."""
|
|
pred_yaw = np.asarray(pred_yaw, dtype=np.float64)
|
|
gt_yaw = np.asarray(gt_yaw, dtype=np.float64)
|
|
return int(np.sum(np.isfinite(pred_yaw) & np.isfinite(gt_yaw)))
|
|
|
|
|
|
def compute_visible_orientation_metrics(pred_direct_yaw, pred_edge_yaw, gt_yaw):
|
|
"""Compute visible-face direct and edge orientation errors against GT yaw."""
|
|
gt_yaw = np.asarray(gt_yaw, dtype=np.float64)
|
|
pred_direct_yaw = np.asarray(pred_direct_yaw, dtype=np.float64)
|
|
pred_edge_yaw = np.asarray(pred_edge_yaw, dtype=np.float64)
|
|
|
|
return {
|
|
"direct_orient_visible": compute_orientation_error(pred_direct_yaw, gt_yaw),
|
|
"edge_orient_visible": compute_orientation_error(pred_edge_yaw, gt_yaw),
|
|
"_direct_orient_visible_matched": count_valid_orientation_pairs(pred_direct_yaw, gt_yaw),
|
|
"_edge_orient_visible_matched": count_valid_orientation_pairs(pred_edge_yaw, gt_yaw),
|
|
}
|
|
|
|
|
|
def compute_3d_center_error(pred_center, gt_center):
|
|
"""Compute mean Euclidean distance between predicted and GT 3D centers.
|
|
|
|
Args:
|
|
pred_center: Predicted 3D centers (N, 3).
|
|
gt_center: Ground truth 3D centers (N, 3).
|
|
|
|
Returns:
|
|
Mean Euclidean distance in meters.
|
|
"""
|
|
pred_center = np.asarray(pred_center, dtype=np.float64)
|
|
gt_center = np.asarray(gt_center, dtype=np.float64)
|
|
|
|
valid = np.all(np.isfinite(pred_center), axis=1) & np.all(np.isfinite(gt_center), axis=1)
|
|
if not np.any(valid):
|
|
return 0.0
|
|
|
|
return float(np.mean(np.linalg.norm(pred_center[valid] - gt_center[valid], axis=1)))
|
|
|
|
|
|
def compute_size_error(pred_dims, gt_dims):
|
|
"""Compute mean absolute size error for L, H, W dimensions.
|
|
|
|
Args:
|
|
pred_dims: Predicted dimensions (N, 3) - [l, h, w].
|
|
gt_dims: Ground truth dimensions (N, 3) - [l, h, w].
|
|
|
|
Returns:
|
|
Mean absolute size error in meters.
|
|
"""
|
|
pred_dims = np.asarray(pred_dims, dtype=np.float64)
|
|
gt_dims = np.asarray(gt_dims, dtype=np.float64)
|
|
|
|
valid = np.all(np.isfinite(pred_dims), axis=1) & np.all(np.isfinite(gt_dims), axis=1)
|
|
if not np.any(valid):
|
|
return 0.0
|
|
|
|
return float(np.mean(np.abs(pred_dims[valid] - gt_dims[valid])))
|
|
|
|
|
|
def compute_uv_error(pred_uv, gt_uv):
|
|
"""Compute mean per-coordinate L1 pixel error between predicted and GT UV coordinates.
|
|
|
|
This matches the training-time UV logging, which averages absolute U and V errors
|
|
instead of using Euclidean point distance.
|
|
"""
|
|
pred_uv = np.asarray(pred_uv, dtype=np.float64)
|
|
gt_uv = np.asarray(gt_uv, dtype=np.float64)
|
|
|
|
valid = np.all(np.isfinite(pred_uv), axis=1) & np.all(np.isfinite(gt_uv), axis=1)
|
|
if not np.any(valid):
|
|
return 0.0
|
|
|
|
diff = np.abs(pred_uv[valid] - gt_uv[valid])
|
|
return float(np.mean(diff))
|
|
|
|
|
|
def empty_3d_metrics(include_orient=True, include_size=True, include_uv=True, include_visible_orient=False):
|
|
"""Return default 3D metrics used for logging when no matches are available."""
|
|
metrics = {
|
|
"depth_abs": 0.0,
|
|
"depth_rel": 0.0,
|
|
"depth_rmse": 0.0,
|
|
"center": 0.0,
|
|
"matched": 0,
|
|
}
|
|
if include_uv:
|
|
metrics["uv"] = 0.0
|
|
if include_orient:
|
|
metrics["orient"] = 0.0
|
|
if include_size:
|
|
metrics["size"] = 0.0
|
|
if include_visible_orient:
|
|
metrics["direct_orient_visible"] = 0.0
|
|
metrics["edge_orient_visible"] = 0.0
|
|
return metrics
|
|
|
|
|
|
def aggregate_3d_metric_groups(stats_by_group):
|
|
"""Aggregate grouped 3D metrics with matched-count weighting."""
|
|
aggregated = {}
|
|
for group, entries in stats_by_group.items():
|
|
if not entries:
|
|
aggregated[group] = empty_3d_metrics(
|
|
include_orient=group == "whole",
|
|
include_size=group == "whole",
|
|
include_visible_orient=group == "face",
|
|
)
|
|
continue
|
|
|
|
template = {k: 0.0 for k in entries[0] if k != "matched" and not k.startswith("_")}
|
|
template["matched"] = 0
|
|
total_matched = sum(entry["matched"] for entry in entries)
|
|
total_pos_matched = sum(entry.get("_pos_matched", entry["matched"]) for entry in entries)
|
|
if total_matched <= 0 and total_pos_matched <= 0:
|
|
aggregated[group] = template
|
|
continue
|
|
|
|
for key in template:
|
|
if key == "matched":
|
|
continue
|
|
if key in {"depth_abs", "depth_rel", "depth_rmse", "center", "uv"}:
|
|
weight_key = "_pos_matched"
|
|
elif key == "direct_orient_visible":
|
|
weight_key = "_direct_orient_visible_matched"
|
|
elif key == "edge_orient_visible":
|
|
weight_key = "_edge_orient_visible_matched"
|
|
else:
|
|
weight_key = "matched"
|
|
total_weight = sum(entry.get(weight_key, entry.get("matched", 0)) for entry in entries)
|
|
if total_weight <= 0:
|
|
template[key] = float("nan") if key in {"direct_orient_visible", "edge_orient_visible"} else 0.0
|
|
continue
|
|
weighted = sum(
|
|
entry[key] * entry.get(weight_key, entry.get("matched", 0))
|
|
for entry in entries
|
|
if entry.get(weight_key, entry.get("matched", 0)) > 0
|
|
)
|
|
template[key] = round(weighted / total_weight, 5)
|
|
template["matched"] = total_matched
|
|
aggregated[group] = template
|
|
return aggregated
|
|
|
|
|
|
def compute_3d_metrics_for_matched(
|
|
pred_3d_attrs,
|
|
gt_3d_attrs,
|
|
include_orient=True,
|
|
include_size=True,
|
|
include_uv=False,
|
|
include_visible_orient=False,
|
|
):
|
|
"""Compute 3D metrics for pre-matched prediction-GT pairs.
|
|
|
|
Args:
|
|
pred_3d_attrs: Dict with keys:
|
|
- center: (N, 3) predicted 3D centers [x, y, z]
|
|
- depth: (N,) predicted z3d
|
|
- yaw: (N,) predicted rotation_y in radians
|
|
- edge_yaw: (N,) predicted visible-face yaw in radians (optional)
|
|
- dims: (N, 3) predicted [l, h, w]
|
|
- uv: (N, 2) predicted [u, v] in pixels (optional)
|
|
gt_3d_attrs: Dict with same keys for ground truth.
|
|
include_orient: Whether to compute orientation error.
|
|
include_size: Whether to compute size error.
|
|
include_uv: Whether to compute UV pixel error.
|
|
include_visible_orient: Whether to compute visible-face direct and edge orientation errors.
|
|
|
|
Returns:
|
|
Dict with aggregated metrics and matched count.
|
|
"""
|
|
n = len(pred_3d_attrs.get("depth", []))
|
|
if n == 0:
|
|
return empty_3d_metrics(
|
|
include_orient=include_orient,
|
|
include_size=include_size,
|
|
include_uv=include_uv,
|
|
include_visible_orient=include_visible_orient,
|
|
)
|
|
|
|
depth_m = compute_depth_error(pred_3d_attrs["depth"], gt_3d_attrs["depth"])
|
|
center_m = compute_3d_center_error(pred_3d_attrs["center"], gt_3d_attrs["center"])
|
|
metrics = {
|
|
"depth_abs": depth_m["abs_error"],
|
|
"depth_rel": depth_m["rel_error"],
|
|
"depth_rmse": depth_m["rmse"],
|
|
"center": center_m,
|
|
"matched": n,
|
|
}
|
|
if include_uv:
|
|
metrics["uv"] = compute_uv_error(pred_3d_attrs["uv"], gt_3d_attrs["uv"])
|
|
if include_orient:
|
|
metrics["orient"] = compute_orientation_error(pred_3d_attrs["yaw"], gt_3d_attrs["yaw"])
|
|
if include_size:
|
|
metrics["size"] = compute_size_error(pred_3d_attrs["dims"], gt_3d_attrs["dims"])
|
|
if include_visible_orient:
|
|
metrics.update(
|
|
compute_visible_orientation_metrics(
|
|
pred_3d_attrs["yaw"],
|
|
pred_3d_attrs.get("edge_yaw", np.full(n, np.nan, dtype=np.float64)),
|
|
gt_3d_attrs["yaw"],
|
|
)
|
|
)
|
|
return metrics
|