ultralytics/utils/metrics_3d.py

# Ultralytics AGPL-3.0 License - https://ultralytics.com/license

"""3D detection metrics for monocular 3D object detection.

Ported from yolov5-3d/utils/metrics.py. Provides depth error, orientation error,
3D center error, UV error, and grouped aggregation for matched prediction-GT pairs.
"""

import math

import numpy as np


def compute_depth_error(pred_depth, gt_depth, eps=1e-7):
    """Compute depth error metrics between predicted and ground truth depth.

    Args:
        pred_depth: Predicted depth values (N,).
        gt_depth: Ground truth depth values (N,).
        eps: Small value to avoid division by zero.

    Returns:
        Dict with abs_error, rel_error, rmse.
    """
    pred_depth = np.asarray(pred_depth, dtype=np.float64)
    gt_depth = np.asarray(gt_depth, dtype=np.float64)

    valid = np.isfinite(pred_depth) & np.isfinite(gt_depth) & (gt_depth > 0)
    if not np.any(valid):
        return {"abs_error": 0.0, "rel_error": 0.0, "rmse": 0.0}

    p, g = pred_depth[valid], gt_depth[valid]
    abs_err = np.abs(p - g)
    return {
        "abs_error": float(np.mean(abs_err)),
        "rel_error": float(np.mean(abs_err / (g + eps))),
        "rmse": float(np.sqrt(np.mean((p - g) ** 2))),
    }


def compute_orientation_error(pred_yaw, gt_yaw):
    """Compute mean absolute orientation error in degrees with wrap-around handling.

    Args:
        pred_yaw: Predicted yaw angles in radians (N,).
        gt_yaw: Ground truth yaw angles in radians (N,).

    Returns:
        Mean absolute orientation error in degrees.
    """
    pred_yaw = np.asarray(pred_yaw, dtype=np.float64)
    gt_yaw = np.asarray(gt_yaw, dtype=np.float64)

    valid = np.isfinite(pred_yaw) & np.isfinite(gt_yaw)
    if not np.any(valid):
        return 0.0

    diff = np.abs(pred_yaw[valid] - gt_yaw[valid])
    diff = np.minimum(diff, 2 * math.pi - diff)
    return float(np.mean(np.degrees(diff)))


def count_valid_orientation_pairs(pred_yaw, gt_yaw):
    """Count valid orientation pairs with finite prediction and ground-truth yaw."""
    pred_yaw = np.asarray(pred_yaw, dtype=np.float64)
    gt_yaw = np.asarray(gt_yaw, dtype=np.float64)
    return int(np.sum(np.isfinite(pred_yaw) & np.isfinite(gt_yaw)))


def compute_visible_orientation_metrics(pred_direct_yaw, pred_edge_yaw, gt_yaw):
    """Compute visible-face direct and edge orientation errors against GT yaw."""
    gt_yaw = np.asarray(gt_yaw, dtype=np.float64)
    pred_direct_yaw = np.asarray(pred_direct_yaw, dtype=np.float64)
    pred_edge_yaw = np.asarray(pred_edge_yaw, dtype=np.float64)

    return {
        "direct_orient_visible": compute_orientation_error(pred_direct_yaw, gt_yaw),
        "edge_orient_visible": compute_orientation_error(pred_edge_yaw, gt_yaw),
        "_direct_orient_visible_matched": count_valid_orientation_pairs(pred_direct_yaw, gt_yaw),
        "_edge_orient_visible_matched": count_valid_orientation_pairs(pred_edge_yaw, gt_yaw),
    }


def compute_3d_center_error(pred_center, gt_center):
    """Compute mean Euclidean distance between predicted and GT 3D centers.

    Args:
        pred_center: Predicted 3D centers (N, 3).
        gt_center: Ground truth 3D centers (N, 3).

    Returns:
        Mean Euclidean distance in meters.
    """
    pred_center = np.asarray(pred_center, dtype=np.float64)
    gt_center = np.asarray(gt_center, dtype=np.float64)

    valid = np.all(np.isfinite(pred_center), axis=1) & np.all(np.isfinite(gt_center), axis=1)
    if not np.any(valid):
        return 0.0

    return float(np.mean(np.linalg.norm(pred_center[valid] - gt_center[valid], axis=1)))


def compute_size_error(pred_dims, gt_dims):
    """Compute mean absolute size error for L, H, W dimensions.

    Args:
        pred_dims: Predicted dimensions (N, 3) - [l, h, w].
        gt_dims: Ground truth dimensions (N, 3) - [l, h, w].

    Returns:
        Mean absolute size error in meters.
    """
    pred_dims = np.asarray(pred_dims, dtype=np.float64)
    gt_dims = np.asarray(gt_dims, dtype=np.float64)

    valid = np.all(np.isfinite(pred_dims), axis=1) & np.all(np.isfinite(gt_dims), axis=1)
    if not np.any(valid):
        return 0.0

    return float(np.mean(np.abs(pred_dims[valid] - gt_dims[valid])))


def compute_uv_error(pred_uv, gt_uv):
    """Compute mean per-coordinate L1 pixel error between predicted and GT UV coordinates.

    This matches the training-time UV logging, which averages absolute U and V errors
    instead of using Euclidean point distance.
    """
    pred_uv = np.asarray(pred_uv, dtype=np.float64)
    gt_uv = np.asarray(gt_uv, dtype=np.float64)

    valid = np.all(np.isfinite(pred_uv), axis=1) & np.all(np.isfinite(gt_uv), axis=1)
    if not np.any(valid):
        return 0.0

    diff = np.abs(pred_uv[valid] - gt_uv[valid])
    return float(np.mean(diff))


def empty_3d_metrics(include_orient=True, include_size=True, include_uv=True, include_visible_orient=False):
    """Return default 3D metrics used for logging when no matches are available."""
    metrics = {
        "depth_abs": 0.0,
        "depth_rel": 0.0,
        "depth_rmse": 0.0,
        "center": 0.0,
        "matched": 0,
    }
    if include_uv:
        metrics["uv"] = 0.0
    if include_orient:
        metrics["orient"] = 0.0
    if include_size:
        metrics["size"] = 0.0
    if include_visible_orient:
        metrics["direct_orient_visible"] = 0.0
        metrics["edge_orient_visible"] = 0.0
    return metrics


def aggregate_3d_metric_groups(stats_by_group):
    """Aggregate grouped 3D metrics with matched-count weighting."""
    aggregated = {}
    for group, entries in stats_by_group.items():
        if not entries:
            aggregated[group] = empty_3d_metrics(
                include_orient=group == "whole",
                include_size=group == "whole",
                include_visible_orient=group == "face",
            )
            continue

        template = {k: 0.0 for k in entries[0] if k != "matched" and not k.startswith("_")}
        template["matched"] = 0
        total_matched = sum(entry["matched"] for entry in entries)
        total_pos_matched = sum(entry.get("_pos_matched", entry["matched"]) for entry in entries)
        if total_matched <= 0 and total_pos_matched <= 0:
            aggregated[group] = template
            continue

        for key in template:
            if key == "matched":
                continue
            if key in {"depth_abs", "depth_rel", "depth_rmse", "center", "uv"}:
                weight_key = "_pos_matched"
            elif key == "direct_orient_visible":
                weight_key = "_direct_orient_visible_matched"
            elif key == "edge_orient_visible":
                weight_key = "_edge_orient_visible_matched"
            else:
                weight_key = "matched"
            total_weight = sum(entry.get(weight_key, entry.get("matched", 0)) for entry in entries)
            if total_weight <= 0:
                template[key] = float("nan") if key in {"direct_orient_visible", "edge_orient_visible"} else 0.0
                continue
            weighted = sum(
                entry[key] * entry.get(weight_key, entry.get("matched", 0))
                for entry in entries
                if entry.get(weight_key, entry.get("matched", 0)) > 0
            )
            template[key] = round(weighted / total_weight, 5)
        template["matched"] = total_matched
        aggregated[group] = template
    return aggregated


def compute_3d_metrics_for_matched(
    pred_3d_attrs,
    gt_3d_attrs,
    include_orient=True,
    include_size=True,
    include_uv=False,
    include_visible_orient=False,
):
    """Compute 3D metrics for pre-matched prediction-GT pairs.

    Args:
        pred_3d_attrs: Dict with keys:
            - center: (N, 3) predicted 3D centers [x, y, z]
            - depth: (N,) predicted z3d
            - yaw: (N,) predicted rotation_y in radians
            - edge_yaw: (N,) predicted visible-face yaw in radians (optional)
            - dims: (N, 3) predicted [l, h, w]
            - uv: (N, 2) predicted [u, v] in pixels (optional)
        gt_3d_attrs: Dict with same keys for ground truth.
        include_orient: Whether to compute orientation error.
        include_size: Whether to compute size error.
        include_uv: Whether to compute UV pixel error.
        include_visible_orient: Whether to compute visible-face direct and edge orientation errors.

    Returns:
        Dict with aggregated metrics and matched count.
    """
    n = len(pred_3d_attrs.get("depth", []))
    if n == 0:
        return empty_3d_metrics(
            include_orient=include_orient,
            include_size=include_size,
            include_uv=include_uv,
            include_visible_orient=include_visible_orient,
        )

    depth_m = compute_depth_error(pred_3d_attrs["depth"], gt_3d_attrs["depth"])
    center_m = compute_3d_center_error(pred_3d_attrs["center"], gt_3d_attrs["center"])
    metrics = {
        "depth_abs": depth_m["abs_error"],
        "depth_rel": depth_m["rel_error"],
        "depth_rmse": depth_m["rmse"],
        "center": center_m,
        "matched": n,
    }
    if include_uv:
        metrics["uv"] = compute_uv_error(pred_3d_attrs["uv"], gt_3d_attrs["uv"])
    if include_orient:
        metrics["orient"] = compute_orientation_error(pred_3d_attrs["yaw"], gt_3d_attrs["yaw"])
    if include_size:
        metrics["size"] = compute_size_error(pred_3d_attrs["dims"], gt_3d_attrs["dims"])
    if include_visible_orient:
        metrics.update(
            compute_visible_orientation_metrics(
                pred_3d_attrs["yaw"],
                pred_3d_attrs.get("edge_yaw", np.full(n, np.nan, dtype=np.float64)),
                gt_3d_attrs["yaw"],
            )
        )
    return metrics
单目3D初始代码 2026-06-24 09:35:46 +08:00			`# Ultralytics AGPL-3.0 License - https://ultralytics.com/license`

			`"""3D detection metrics for monocular 3D object detection.`

			`Ported from yolov5-3d/utils/metrics.py. Provides depth error, orientation error,`
			`3D center error, UV error, and grouped aggregation for matched prediction-GT pairs.`
			`"""`

			`import math`

			`import numpy as np`


			`def compute_depth_error(pred_depth, gt_depth, eps=1e-7):`
			`"""Compute depth error metrics between predicted and ground truth depth.`

			`Args:`
			`pred_depth: Predicted depth values (N,).`
			`gt_depth: Ground truth depth values (N,).`
			`eps: Small value to avoid division by zero.`

			`Returns:`
			`Dict with abs_error, rel_error, rmse.`
			`"""`
			`pred_depth = np.asarray(pred_depth, dtype=np.float64)`
			`gt_depth = np.asarray(gt_depth, dtype=np.float64)`

			`valid = np.isfinite(pred_depth) & np.isfinite(gt_depth) & (gt_depth > 0)`
			`if not np.any(valid):`
			`return {"abs_error": 0.0, "rel_error": 0.0, "rmse": 0.0}`

			`p, g = pred_depth[valid], gt_depth[valid]`
			`abs_err = np.abs(p - g)`
			`return {`
			`"abs_error": float(np.mean(abs_err)),`
			`"rel_error": float(np.mean(abs_err / (g + eps))),`
			`"rmse": float(np.sqrt(np.mean((p - g) ** 2))),`
			`}`


			`def compute_orientation_error(pred_yaw, gt_yaw):`
			`"""Compute mean absolute orientation error in degrees with wrap-around handling.`

			`Args:`
			`pred_yaw: Predicted yaw angles in radians (N,).`
			`gt_yaw: Ground truth yaw angles in radians (N,).`

			`Returns:`
			`Mean absolute orientation error in degrees.`
			`"""`
			`pred_yaw = np.asarray(pred_yaw, dtype=np.float64)`
			`gt_yaw = np.asarray(gt_yaw, dtype=np.float64)`

			`valid = np.isfinite(pred_yaw) & np.isfinite(gt_yaw)`
			`if not np.any(valid):`
			`return 0.0`

			`diff = np.abs(pred_yaw[valid] - gt_yaw[valid])`
			`diff = np.minimum(diff, 2 * math.pi - diff)`
			`return float(np.mean(np.degrees(diff)))`


			`def count_valid_orientation_pairs(pred_yaw, gt_yaw):`
			`"""Count valid orientation pairs with finite prediction and ground-truth yaw."""`
			`pred_yaw = np.asarray(pred_yaw, dtype=np.float64)`
			`gt_yaw = np.asarray(gt_yaw, dtype=np.float64)`
			`return int(np.sum(np.isfinite(pred_yaw) & np.isfinite(gt_yaw)))`


			`def compute_visible_orientation_metrics(pred_direct_yaw, pred_edge_yaw, gt_yaw):`
			`"""Compute visible-face direct and edge orientation errors against GT yaw."""`
			`gt_yaw = np.asarray(gt_yaw, dtype=np.float64)`
			`pred_direct_yaw = np.asarray(pred_direct_yaw, dtype=np.float64)`
			`pred_edge_yaw = np.asarray(pred_edge_yaw, dtype=np.float64)`

			`return {`
			`"direct_orient_visible": compute_orientation_error(pred_direct_yaw, gt_yaw),`
			`"edge_orient_visible": compute_orientation_error(pred_edge_yaw, gt_yaw),`
			`"_direct_orient_visible_matched": count_valid_orientation_pairs(pred_direct_yaw, gt_yaw),`
			`"_edge_orient_visible_matched": count_valid_orientation_pairs(pred_edge_yaw, gt_yaw),`
			`}`


			`def compute_3d_center_error(pred_center, gt_center):`
			`"""Compute mean Euclidean distance between predicted and GT 3D centers.`

			`Args:`
			`pred_center: Predicted 3D centers (N, 3).`
			`gt_center: Ground truth 3D centers (N, 3).`

			`Returns:`
			`Mean Euclidean distance in meters.`
			`"""`
			`pred_center = np.asarray(pred_center, dtype=np.float64)`
			`gt_center = np.asarray(gt_center, dtype=np.float64)`

			`valid = np.all(np.isfinite(pred_center), axis=1) & np.all(np.isfinite(gt_center), axis=1)`
			`if not np.any(valid):`
			`return 0.0`

			`return float(np.mean(np.linalg.norm(pred_center[valid] - gt_center[valid], axis=1)))`


			`def compute_size_error(pred_dims, gt_dims):`
			`"""Compute mean absolute size error for L, H, W dimensions.`

			`Args:`
			`pred_dims: Predicted dimensions (N, 3) - [l, h, w].`
			`gt_dims: Ground truth dimensions (N, 3) - [l, h, w].`

			`Returns:`
			`Mean absolute size error in meters.`
			`"""`
			`pred_dims = np.asarray(pred_dims, dtype=np.float64)`
			`gt_dims = np.asarray(gt_dims, dtype=np.float64)`

			`valid = np.all(np.isfinite(pred_dims), axis=1) & np.all(np.isfinite(gt_dims), axis=1)`
			`if not np.any(valid):`
			`return 0.0`

			`return float(np.mean(np.abs(pred_dims[valid] - gt_dims[valid])))`


			`def compute_uv_error(pred_uv, gt_uv):`
			`"""Compute mean per-coordinate L1 pixel error between predicted and GT UV coordinates.`

			`This matches the training-time UV logging, which averages absolute U and V errors`
			`instead of using Euclidean point distance.`
			`"""`
			`pred_uv = np.asarray(pred_uv, dtype=np.float64)`
			`gt_uv = np.asarray(gt_uv, dtype=np.float64)`

			`valid = np.all(np.isfinite(pred_uv), axis=1) & np.all(np.isfinite(gt_uv), axis=1)`
			`if not np.any(valid):`
			`return 0.0`

			`diff = np.abs(pred_uv[valid] - gt_uv[valid])`
			`return float(np.mean(diff))`


			`def empty_3d_metrics(include_orient=True, include_size=True, include_uv=True, include_visible_orient=False):`
			`"""Return default 3D metrics used for logging when no matches are available."""`
			`metrics = {`
			`"depth_abs": 0.0,`
			`"depth_rel": 0.0,`
			`"depth_rmse": 0.0,`
			`"center": 0.0,`
			`"matched": 0,`
			`}`
			`if include_uv:`
			`metrics["uv"] = 0.0`
			`if include_orient:`
			`metrics["orient"] = 0.0`
			`if include_size:`
			`metrics["size"] = 0.0`
			`if include_visible_orient:`
			`metrics["direct_orient_visible"] = 0.0`
			`metrics["edge_orient_visible"] = 0.0`
			`return metrics`


			`def aggregate_3d_metric_groups(stats_by_group):`
			`"""Aggregate grouped 3D metrics with matched-count weighting."""`
			`aggregated = {}`
			`for group, entries in stats_by_group.items():`
			`if not entries:`
			`aggregated[group] = empty_3d_metrics(`
			`include_orient=group == "whole",`
			`include_size=group == "whole",`
			`include_visible_orient=group == "face",`
			`)`
			`continue`

			`template = {k: 0.0 for k in entries[0] if k != "matched" and not k.startswith("_")}`
			`template["matched"] = 0`
			`total_matched = sum(entry["matched"] for entry in entries)`
			`total_pos_matched = sum(entry.get("_pos_matched", entry["matched"]) for entry in entries)`
			`if total_matched <= 0 and total_pos_matched <= 0:`
			`aggregated[group] = template`
			`continue`

			`for key in template:`
			`if key == "matched":`
			`continue`
			`if key in {"depth_abs", "depth_rel", "depth_rmse", "center", "uv"}:`
			`weight_key = "_pos_matched"`
			`elif key == "direct_orient_visible":`
			`weight_key = "_direct_orient_visible_matched"`
			`elif key == "edge_orient_visible":`
			`weight_key = "_edge_orient_visible_matched"`
			`else:`
			`weight_key = "matched"`
			`total_weight = sum(entry.get(weight_key, entry.get("matched", 0)) for entry in entries)`
			`if total_weight <= 0:`
			`template[key] = float("nan") if key in {"direct_orient_visible", "edge_orient_visible"} else 0.0`
			`continue`
			`weighted = sum(`
			`entry[key] * entry.get(weight_key, entry.get("matched", 0))`
			`for entry in entries`
			`if entry.get(weight_key, entry.get("matched", 0)) > 0`
			`)`
			`template[key] = round(weighted / total_weight, 5)`
			`template["matched"] = total_matched`
			`aggregated[group] = template`
			`return aggregated`


			`def compute_3d_metrics_for_matched(`
			`pred_3d_attrs,`
			`gt_3d_attrs,`
			`include_orient=True,`
			`include_size=True,`
			`include_uv=False,`
			`include_visible_orient=False,`
			`):`
			`"""Compute 3D metrics for pre-matched prediction-GT pairs.`

			`Args:`
			`pred_3d_attrs: Dict with keys:`
			`- center: (N, 3) predicted 3D centers [x, y, z]`
			`- depth: (N,) predicted z3d`
			`- yaw: (N,) predicted rotation_y in radians`
			`- edge_yaw: (N,) predicted visible-face yaw in radians (optional)`
			`- dims: (N, 3) predicted [l, h, w]`
			`- uv: (N, 2) predicted [u, v] in pixels (optional)`
			`gt_3d_attrs: Dict with same keys for ground truth.`
			`include_orient: Whether to compute orientation error.`
			`include_size: Whether to compute size error.`
			`include_uv: Whether to compute UV pixel error.`
			`include_visible_orient: Whether to compute visible-face direct and edge orientation errors.`

			`Returns:`
			`Dict with aggregated metrics and matched count.`
			`"""`
			`n = len(pred_3d_attrs.get("depth", []))`
			`if n == 0:`
			`return empty_3d_metrics(`
			`include_orient=include_orient,`
			`include_size=include_size,`
			`include_uv=include_uv,`
			`include_visible_orient=include_visible_orient,`
			`)`

			`depth_m = compute_depth_error(pred_3d_attrs["depth"], gt_3d_attrs["depth"])`
			`center_m = compute_3d_center_error(pred_3d_attrs["center"], gt_3d_attrs["center"])`
			`metrics = {`
			`"depth_abs": depth_m["abs_error"],`
			`"depth_rel": depth_m["rel_error"],`
			`"depth_rmse": depth_m["rmse"],`
			`"center": center_m,`
			`"matched": n,`
			`}`
			`if include_uv:`
			`metrics["uv"] = compute_uv_error(pred_3d_attrs["uv"], gt_3d_attrs["uv"])`
			`if include_orient:`
			`metrics["orient"] = compute_orientation_error(pred_3d_attrs["yaw"], gt_3d_attrs["yaw"])`
			`if include_size:`
			`metrics["size"] = compute_size_error(pred_3d_attrs["dims"], gt_3d_attrs["dims"])`
			`if include_visible_orient:`
			`metrics.update(`
			`compute_visible_orientation_metrics(`
			`pred_3d_attrs["yaw"],`
			`pred_3d_attrs.get("edge_yaw", np.full(n, np.nan, dtype=np.float64)),`
			`gt_3d_attrs["yaw"],`
			`)`
			`)`
			`return metrics`