Files
yolov26_3d/tests/test_metrics_3d.py
2026-06-24 09:35:46 +08:00

1681 lines
64 KiB
Python
Executable File

# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
import random
from types import SimpleNamespace
import numpy as np
import pytest
from ultralytics.data.ground3d_augment import compute_simul_calib
from ultralytics.utils.metrics_3d import aggregate_3d_metric_groups, compute_3d_metrics_for_matched, empty_3d_metrics
from ultralytics.utils.plotting_3d import (
collect_face_bottom_edges,
collect_precomputed_edge_points_2d,
compute_3d_box_corners,
decode_cut_partial_side_edge_from_gt,
decode_edge_yaw_selection_from_prediction,
decode_multi_visible_face_yaw_from_gt,
decode_multi_visible_face_yaw_from_prediction,
decode_visible_face_edge_from_gt,
edge_points_to_yaw,
extract_3d_attrs_from_gt,
extract_3d_attrs_from_prediction,
face_center_from_corners,
get_cut_object_side_face,
project_face_bottom_edge,
project_partial_face_bottom_edge,
project_3d_to_2d,
rebuild_box_corners_for_visualization,
reconstruct_3d_box_from_face,
select_gt_visible_faces,
visible_face_edges_to_yaw,
)
def _make_cut_target(cut_state, cut_side, rot_y=0.75):
target = np.full(42, np.nan, dtype=np.float32)
target[2] = 20.0
target[3:6] = [4.0, 1.5, 1.8]
target[6] = rot_y
target[7:9] = [0.5, 0.6]
if cut_state == "cut_in":
target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.45 if cut_side == "left" else 0.55, 0.55, 0.9, 1.0]
target[18:24] = -1.0
target[24:26] = 0.0
target[26:32] = -1.0
target[32:34] = 0.0
target[34:40] = -1.0
target[40:42] = 0.0
else:
target[18:26] = [0.0, 0.0, 18.5, 0.0, 0.45 if cut_side == "left" else 0.55, 0.55, 0.9, 1.0]
target[10:16] = -1.0
target[16:18] = 0.0
target[26:32] = -1.0
target[32:34] = 0.0
target[34:40] = -1.0
target[40:42] = 0.0
return target
def _make_consistent_cut_target(cut_state, cut_side, calib, rot_y=None):
img_w, img_h = 640, 480
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
center_3d = np.array([-8.0, 0.0, 20.0], dtype=np.float32) if cut_side == "left" else np.array([8.0, 0.0, 20.0], dtype=np.float32)
rot_y = float(-0.75 if cut_state == "cut_in" else 0.75) if rot_y is None else float(rot_y)
target = np.full(42, np.nan, dtype=np.float32)
target[2] = center_3d[2]
target[3:6] = dims
target[6] = rot_y
corners = compute_3d_box_corners(center_3d, dims, rot_y, face_type=-1)
whole_uv = project_3d_to_2d(center_3d[None, :], calib)[0]
target[7:9] = [whole_uv[0] / img_w, whole_uv[1] / img_h]
keep_face = 0 if cut_state == "cut_in" else 1
face_corner_ids = (4, 5, 6, 7) if keep_face == 0 else (0, 1, 2, 3)
face_center_3d = corners[list(face_corner_ids)].mean(axis=0)
face_uv = project_3d_to_2d(face_center_3d[None, :], calib)[0]
face_offset = 10 if keep_face == 0 else 18
target[face_offset : face_offset + 8] = [0.0, 0.0, face_center_3d[2], 0.0, face_uv[0] / img_w, face_uv[1] / img_h, 1.0, 1.0]
for off in (10, 18, 26, 34):
if off == face_offset:
continue
target[off : off + 6] = -1.0
target[off + 6 : off + 8] = 0.0
bbox_xyxy = np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32)
if cut_side == "right":
bbox_xyxy = np.array([560.0, 120.0, 639.0, 240.0], dtype=np.float32)
return target, corners, bbox_xyxy
def _make_consistent_visible_face_target(visible_faces, calib, rot_y=0.75, center_3d=None):
img_w, img_h = 640, 480
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
center_3d = np.array([0.0, 0.0, 20.0], dtype=np.float32) if center_3d is None else np.asarray(center_3d, dtype=np.float32)
face_offsets = {0: 10, 1: 18, 2: 26, 3: 34}
face_corner_ids = {0: (4, 5, 6, 7), 1: (0, 1, 2, 3), 2: (1, 2, 5, 6), 3: (0, 3, 4, 7)}
target = np.zeros(42, dtype=np.float32)
target[2] = center_3d[2]
target[3:6] = dims
target[6] = float(rot_y)
corners = compute_3d_box_corners(center_3d, dims, float(rot_y), face_type=-1)
whole_uv = project_3d_to_2d(center_3d[None, :], calib)[0]
target[7:9] = [whole_uv[0] / img_w, whole_uv[1] / img_h]
for face_type in range(4):
off = face_offsets[face_type]
if face_type not in visible_faces:
continue
face_center_3d = corners[list(face_corner_ids[face_type])].mean(axis=0)
face_uv = project_3d_to_2d(face_center_3d[None, :], calib)[0]
target[off : off + 8] = [
0.0,
0.0,
float(face_center_3d[2]),
0.0,
float(face_uv[0] / img_w),
float(face_uv[1] / img_h),
1.0 - 0.01 * face_type,
1.0,
]
return target, corners
def test_cut_side_face_mapping_falls_back_to_border_side_without_geometry():
assert get_cut_object_side_face(1, "left") == 3
assert get_cut_object_side_face(2, "left") == 3
assert get_cut_object_side_face(1, "right") == 2
assert get_cut_object_side_face(2, "right") == 2
def test_cut_side_face_mapping_uses_geometry_for_cut_out_boxes():
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
cases = [
(np.array([-8.0, 0.0, 20.0], dtype=np.float32), 0.75, "left", 3),
(np.array([-8.0, 0.0, 20.0], dtype=np.float32), 2.6, "left", 2),
(np.array([8.0, 0.0, 20.0], dtype=np.float32), 0.75, "right", 3),
(np.array([8.0, 0.0, 20.0], dtype=np.float32), 2.6, "right", 2),
]
for center, rot_y, cut_side, face_type in cases:
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
assert get_cut_object_side_face(2, cut_side, corners_3d=corners) == face_type
def test_project_partial_face_bottom_edge_samples_exactly_five_visible_points():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([-7.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
corners = compute_3d_box_corners(center, dims, 0.75, face_type=-1)
points_3d, points_2d = project_partial_face_bottom_edge(corners, 3, calib, 640, 480, num_samples=5)
assert points_3d is not None
assert points_2d is not None
assert points_3d.shape == (5, 3)
assert points_2d.shape == (5, 2)
assert np.all((points_2d[:, 0] >= 0) & (points_2d[:, 0] <= 639))
assert np.all((points_2d[:, 1] >= 0) & (points_2d[:, 1] <= 479))
assert np.all(np.diff(points_2d[:, 0]) >= -1e-5)
def test_decode_cut_partial_side_edge_from_gt_matches_left_cut_in_case():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}
target, _, bbox_xyxy = _make_consistent_cut_target("cut_in", "left", calib)
decoded = decode_cut_partial_side_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), bbox_xyxy=bbox_xyxy)
assert decoded is not None
assert decoded["face_type"] == 3
assert decoded["points_2d"].shape == (5, 2)
assert np.all(np.diff(decoded["points_2d"][:, 0]) >= -1e-5)
assert np.allclose(decoded["points_3d"][:, 2], decoded["depths"])
def test_decode_cut_partial_side_edge_from_gt_reuses_face_box_geometry_for_cut_in():
calib = {
"fx": 500.0,
"fy": 500.0,
"cx": 320.0,
"cy": 240.0,
"depth_scale": 1.0,
"distort_coeffs": [0.1, -0.01, 0.001, -0.0001],
}
target, box_corners, bbox_xyxy = _make_consistent_cut_target("cut_in", "left", calib)
decoded = decode_cut_partial_side_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), bbox_xyxy=bbox_xyxy)
face = target[10:18]
expected_corners = reconstruct_3d_box_from_face(
(float(face[4] * 640), float(face[5] * 480)),
float(face[2]),
target[3:6],
float(target[6]),
0,
calib,
)
expected_3d, expected_2d = project_partial_face_bottom_edge(expected_corners, 3, calib, 640, 480, num_samples=5)
box_edge_3d, box_edge_2d = project_partial_face_bottom_edge(box_corners, 3, calib, 640, 480, num_samples=5)
assert decoded is not None
assert expected_3d is not None
assert expected_2d is not None
assert box_edge_3d is not None
assert box_edge_2d is not None
assert np.allclose(decoded["points_3d"], expected_3d, atol=1e-4)
assert np.allclose(decoded["points_2d"], expected_2d, atol=1e-4)
assert np.allclose(decoded["points_3d"], box_edge_3d, atol=1e-4)
assert np.allclose(decoded["points_2d"], box_edge_2d, atol=1e-4)
def test_rebuild_box_corners_for_visualization_preserves_face_anchor():
center_3d = np.array([1.5, 0.2, 18.0], dtype=np.float32)
dims = np.array([4.2, 1.6, 1.9], dtype=np.float32)
base_yaw = 0.35
new_yaw = -0.6
visible_face_type = 0
base_corners = compute_3d_box_corners(center_3d, dims, base_yaw, face_type=-1)
face_center = face_center_from_corners(base_corners, visible_face_type)
rebuilt = rebuild_box_corners_for_visualization(
base_corners,
dims,
new_yaw,
visible_face_type=visible_face_type,
face_center_3d=face_center,
)
assert rebuilt is not None
rebuilt_face_center = face_center_from_corners(rebuilt, visible_face_type)
assert rebuilt_face_center is not None
assert np.allclose(rebuilt_face_center, face_center, atol=1e-5)
def test_rebuild_box_corners_for_visualization_preserves_box_center_for_whole_boxes():
center_3d = np.array([-2.0, 0.1, 25.0], dtype=np.float32)
dims = np.array([0.8, 1.7, 0.6], dtype=np.float32)
base_corners = compute_3d_box_corners(center_3d, dims, 0.2, face_type=-1)
rebuilt = rebuild_box_corners_for_visualization(base_corners, dims, -1.1)
assert rebuilt is not None
assert np.allclose(rebuilt.mean(axis=0), base_corners.mean(axis=0), atol=1e-5)
def test_decode_cut_partial_side_edge_from_gt_matches_other_cut_cases():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}
cases = [
("cut_out", "left", 0.75, 3),
("cut_out", "left", 2.6, 2),
("cut_in", "right", -0.75, 2),
("cut_out", "right", 0.75, 3),
("cut_out", "right", 2.6, 2),
]
for cut_state, cut_side, rot_y, face_type in cases:
target, _, bbox_xyxy = _make_consistent_cut_target(cut_state, cut_side, calib, rot_y=rot_y)
decoded = decode_cut_partial_side_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), bbox_xyxy=bbox_xyxy)
assert decoded is not None
assert decoded["face_type"] == face_type
assert decoded["points_2d"].shape == (5, 2)
def test_decode_cut_partial_side_edge_from_gt_does_not_guess_cut_side_without_bbox():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}
target = _make_cut_target("cut_out", "left", rot_y=0.75)
decoded = decode_cut_partial_side_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), bbox_xyxy=None)
assert decoded is None
def test_decode_multi_visible_face_yaw_from_gt_uses_two_edges_for_valid_cut_objects():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}
rot_y = -0.75
target, _, bbox_xyxy = _make_consistent_cut_target("cut_in", "left", calib, rot_y=rot_y)
yaw = decode_multi_visible_face_yaw_from_gt(
target,
0,
calib,
640,
480,
{0},
set(),
fallback_face_type=0,
bbox_xyxy=bbox_xyxy,
)
diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-3)
def test_decode_multi_visible_face_yaw_from_gt_matches_gt_yaw_across_visible_face_cases():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}
cases = []
for visible_faces, rot_y in (((0, 3), 0.75), ((0, 2), 2.35), ((1, 3), -0.75), ((1, 2), -2.35)):
target, _ = _make_consistent_visible_face_target(visible_faces, calib, rot_y=rot_y)
cases.append((target, rot_y, None))
for cut_state, cut_side, rot_y in (
("cut_in", "left", -0.75),
("cut_out", "left", 0.75),
("cut_out", "left", 2.6),
("cut_out", "right", 0.75),
("cut_out", "right", 2.6),
):
target, _, bbox_xyxy = _make_consistent_cut_target(cut_state, cut_side, calib, rot_y=rot_y)
cases.append((target, rot_y, bbox_xyxy))
for target, rot_y, bbox_xyxy in cases:
yaw = decode_multi_visible_face_yaw_from_gt(
target,
0,
calib,
640,
480,
{0},
set(),
fallback_face_type=0,
bbox_xyxy=bbox_xyxy,
)
diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isfinite(yaw)
assert np.isclose(diff, 0.0, atol=1e-3), (rot_y, yaw, bbox_xyxy)
def test_compute_simul_calib_uses_center_crop_without_augmentation():
calib = {
"focal_u": 1000.0,
"focal_v": 1000.0,
"cu": 960.0,
"cv": 540.0,
"distort_coeffs": [0.0, 0.0, 0.0, 0.0],
}
simul_calib = compute_simul_calib(calib, (1920, 1080), (704, 352), 960.0, 540.0, target_fx=537.0, augment=False)
crop_x1, crop_y1, crop_x2, crop_y2 = simul_calib["crop_bounds"]
crop_w = crop_x2 - crop_x1
crop_h = crop_y2 - crop_y1
assert crop_x1 == int(960.0 - crop_w / 2)
assert crop_y1 == int(540.0 - crop_h / 2)
def test_compute_simul_calib_augmentation_keeps_fixed_crop_center():
calib = {
"focal_u": 1000.0,
"focal_v": 1000.0,
"cu": 960.0,
"cv": 540.0,
"distort_coeffs": [0.0, 0.0, 0.0, 0.0],
}
random.seed(0)
centers = set()
widths = set()
heights = set()
for _ in range(32):
simul_calib = compute_simul_calib(calib, (1920, 1080), (704, 352), 960.0, 540.0, target_fx=537.0, augment=True)
crop_x1, crop_y1, crop_x2, crop_y2 = simul_calib["crop_bounds"]
widths.add(crop_x2 - crop_x1)
heights.add(crop_y2 - crop_y1)
centers.add(((crop_x1 + crop_x2) / 2.0, (crop_y1 + crop_y2) / 2.0))
assert len(widths) > 1
assert len(heights) > 1
assert centers == {(960.0, 540.0)}
def test_select_gt_visible_faces_uses_score_threshold():
target = np.full(42, np.nan, dtype=np.float32)
target[2] = 20.0
target[3:6] = [4.0, 1.5, 1.8]
target[6] = 0.25
target[7:9] = [0.5, 0.6]
target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.45, 0.55, 0.49, 1.0]
target[26:34] = [0.0, 0.0, 17.0, 0.0, 0.42, 0.53, 0.50, 1.0]
target[34:42] = [0.0, 0.0, 16.5, 0.0, 0.52, 0.51, 0.85, 1.0]
selected = select_gt_visible_faces(target, score_thr=0.5)
assert [face_type for face_type, _ in selected] == [2, 3]
selected = select_gt_visible_faces(target, score_thr=0.8)
assert [face_type for face_type, _ in selected] == [3]
def test_extract_3d_attrs_supports_whole_and_face_modes():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 2.0}
target = np.full(42, np.nan, dtype=np.float32)
target[2] = 10.0
target[3:6] = [4.0, 1.5, 1.8]
target[6] = 0.1
target[7:9] = [0.5, 0.4]
target[10:18] = [0.0, 0.0, 8.0, 0.0, 0.25, 0.35, 0.4, 1.0]
face_classes = {0}
complete_classes = {1}
whole = extract_3d_attrs_from_gt(target, 0, calib, 640, 480, face_classes, complete_classes)
face = extract_3d_attrs_from_gt(target, 0, calib, 640, 480, face_classes, complete_classes, face_type=0)
assert np.isclose(whole["depth"], 20.0)
assert np.isclose(whole["uv"][0], 320.0)
assert np.isclose(whole["uv"][1], 192.0)
assert np.isclose(face["depth"], 16.0)
assert np.isclose(face["uv"][0], 160.0)
assert np.isclose(face["uv"][1], 168.0)
assert extract_3d_attrs_from_gt(target, 0, calib, 640, 480, face_classes, complete_classes, face_type=0, score_thr=0.5) is None
pred = np.zeros(41, dtype=np.float32)
pred[24] = 22.0
pred[25:27] = [0.5, -0.25]
pred[27:30] = [4.2, 1.4, 1.9]
pred[30] = 3.0
pred[34:38] = np.sin([0.1, 0.2, 0.3, 0.4])
pred[0] = 17.0
pred[1:3] = [-0.5, 0.25]
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred_whole = extract_3d_attrs_from_prediction(pred, anchor, stride, calib)
pred_face = extract_3d_attrs_from_prediction(pred, anchor, stride, calib, face_type=0)
assert np.isclose(pred_whole["depth"], 22.0)
assert np.allclose(pred_whole["uv"], [84.0, 158.0])
assert np.isclose(pred_face["depth"], 17.0)
assert np.allclose(pred_face["uv"], [76.0, 162.0])
def test_compute_3d_metrics_for_matched_includes_uv():
pred = {
"center": np.array([[1.0, 2.0, 10.0], [2.0, 1.0, 12.0]], dtype=np.float32),
"depth": np.array([10.0, 12.0], dtype=np.float32),
"yaw": np.array([0.1, 0.2], dtype=np.float32),
"edge_yaw": np.array([0.12, 0.18], dtype=np.float32),
"dims": np.array([[4.0, 1.5, 1.8], [4.1, 1.6, 1.9]], dtype=np.float32),
"uv": np.array([[100.0, 120.0], [140.0, 150.0]], dtype=np.float32),
}
gt = {
"center": np.array([[1.5, 2.5, 10.5], [2.5, 1.5, 12.5]], dtype=np.float32),
"depth": np.array([11.0, 13.0], dtype=np.float32),
"yaw": np.array([0.15, 0.25], dtype=np.float32),
"edge_yaw": np.array([0.15, 0.25], dtype=np.float32),
"dims": np.array([[4.2, 1.4, 1.7], [4.0, 1.5, 1.8]], dtype=np.float32),
"uv": np.array([[102.0, 124.0], [137.0, 154.0]], dtype=np.float32),
}
metrics = compute_3d_metrics_for_matched(
pred,
gt,
include_orient=True,
include_size=True,
include_uv=True,
include_visible_orient=True,
)
assert metrics["matched"] == 2
assert metrics["uv"] > 0
assert metrics["orient"] > 0
assert metrics["size"] > 0
assert metrics["direct_orient_visible"] > 0
assert metrics["edge_orient_visible"] > 0
assert metrics["_direct_orient_visible_matched"] == 2
assert metrics["_edge_orient_visible_matched"] == 2
def test_compute_3d_metrics_for_matched_tracks_visible_orientation_valid_pairs():
pred = {
"center": np.array([[1.0, 2.0, 10.0], [2.0, 1.0, 12.0]], dtype=np.float32),
"depth": np.array([10.0, 12.0], dtype=np.float32),
"yaw": np.array([0.1, 0.2], dtype=np.float32),
"edge_yaw": np.array([np.nan, 0.18], dtype=np.float32),
"dims": np.array([[4.0, 1.5, 1.8], [4.1, 1.6, 1.9]], dtype=np.float32),
"uv": np.array([[100.0, 120.0], [140.0, 150.0]], dtype=np.float32),
}
gt = {
"center": np.array([[1.5, 2.5, 10.5], [2.5, 1.5, 12.5]], dtype=np.float32),
"depth": np.array([11.0, 13.0], dtype=np.float32),
"yaw": np.array([0.15, 0.25], dtype=np.float32),
"edge_yaw": np.array([0.15, 0.25], dtype=np.float32),
"dims": np.array([[4.2, 1.4, 1.7], [4.0, 1.5, 1.8]], dtype=np.float32),
"uv": np.array([[102.0, 124.0], [137.0, 154.0]], dtype=np.float32),
}
metrics = compute_3d_metrics_for_matched(
pred,
gt,
include_orient=True,
include_size=True,
include_uv=True,
include_visible_orient=True,
)
assert metrics["_direct_orient_visible_matched"] == 2
assert metrics["_edge_orient_visible_matched"] == 1
assert metrics["edge_orient_visible"] > 0
def test_aggregate_3d_metric_groups_keeps_group_defaults():
stats = {
"whole": [
{"depth_abs": 2.0, "depth_rel": 0.2, "depth_rmse": 3.0, "center": 1.0, "uv": 4.0, "orient": 5.0, "size": 0.5, "matched": 2},
{"depth_abs": 4.0, "depth_rel": 0.4, "depth_rmse": 6.0, "center": 3.0, "uv": 8.0, "orient": 7.0, "size": 1.5, "matched": 1},
],
"face": [],
}
aggregated = aggregate_3d_metric_groups(stats)
assert aggregated["whole"]["matched"] == 3
assert np.isclose(aggregated["whole"]["depth_abs"], round((2.0 * 2 + 4.0 * 1) / 3, 5))
assert np.isclose(aggregated["whole"]["uv"], round((4.0 * 2 + 8.0 * 1) / 3, 5))
assert aggregated["face"] == empty_3d_metrics(include_orient=False, include_size=False, include_uv=True, include_visible_orient=True)
def test_aggregate_3d_metric_groups_weights_visible_orientation_by_valid_pairs():
stats = {
"whole": [],
"face": [
{
"depth_abs": 1.0,
"depth_rel": 0.1,
"depth_rmse": 1.5,
"center": 2.0,
"uv": 3.0,
"size": 0.5,
"matched": 100,
"direct_orient_visible": 0.0,
"edge_orient_visible": 0.0,
"_direct_orient_visible_matched": 0,
"_edge_orient_visible_matched": 0,
},
{
"depth_abs": 2.0,
"depth_rel": 0.2,
"depth_rmse": 2.5,
"center": 4.0,
"uv": 6.0,
"size": 1.0,
"matched": 1,
"direct_orient_visible": 15.0,
"edge_orient_visible": 30.0,
"_direct_orient_visible_matched": 1,
"_edge_orient_visible_matched": 1,
},
],
}
aggregated = aggregate_3d_metric_groups(stats)
assert aggregated["face"]["matched"] == 101
assert np.isclose(aggregated["face"]["edge_orient_visible"], 30.0)
assert np.isclose(aggregated["face"]["direct_orient_visible"], 15.0)
def test_visible_face_edge_geometry_decodes_and_orders_left_to_right():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}
target = np.full(42, np.nan, dtype=np.float32)
target[2] = 20.0
target[3:6] = [4.0, 1.5, 1.8]
target[6] = -0.6
target[7:9] = [0.5, 0.55]
target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.46, 0.55, 0.9, 1.0]
decoded = decode_visible_face_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), face_type=0)
assert decoded is not None
assert decoded["points_2d"].shape == (5, 2)
assert decoded["points_3d"].shape == (5, 3)
assert decoded["depths"].shape == (5,)
assert np.all(np.diff(decoded["points_2d"][:, 0]) >= -1e-5)
assert np.allclose(decoded["points_3d"][:, 2], decoded["depths"])
yaw = edge_points_to_yaw(decoded["points_3d"], decoded["face_type"])
assert np.isfinite(yaw)
def test_decode_visible_face_edge_from_gt_requires_face_visibility_threshold():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}
target = np.full(42, np.nan, dtype=np.float32)
target[2] = 20.0
target[3:6] = [4.0, 1.5, 1.8]
target[6] = 0.25
target[7:9] = [0.5, 0.6]
target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.45, 0.55, 0.8, 1.0]
target[34:42] = [0.0, 0.0, 17.0, 0.0, 0.55, 0.52, 0.2, 1.0]
visible = decode_visible_face_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), face_type=0, score_thr=0.3)
hidden = decode_visible_face_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), face_type=3, score_thr=0.3)
assert visible is not None
assert hidden is None
def test_collect_face_bottom_edges_returns_all_requested_faces():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
corners = compute_3d_box_corners(center, dims, -0.4, face_type=-1)
edge_points_3d, edge_points_2d = collect_face_bottom_edges(corners, [0, 3], calib, num_samples=5)
assert edge_points_3d.shape == (2, 5, 3)
assert edge_points_2d.shape == (2, 5, 2)
assert np.all(np.diff(edge_points_2d[0, :, 0]) >= -1e-5)
assert np.all(np.diff(edge_points_2d[1, :, 0]) >= -1e-5)
def test_collect_precomputed_edge_points_2d_preserves_visible_face_order():
edge_faces_points_2d = np.stack(
[
np.full((5, 2), 10.0, dtype=np.float32),
np.full((5, 2), 20.0, dtype=np.float32),
np.full((5, 2), 30.0, dtype=np.float32),
np.full((5, 2), 40.0, dtype=np.float32),
],
axis=0,
)
edge_faces_valid = np.array([True, False, True, True], dtype=bool)
stacked = collect_precomputed_edge_points_2d(edge_faces_points_2d, edge_faces_valid, visible_face_types=(3, 0))
assert stacked.shape == (3, 5, 2)
assert np.all(stacked[0] == 40.0)
assert np.all(stacked[1] == 10.0)
assert np.all(stacked[2] == 30.0)
def test_edge_points_to_yaw_matches_face_convention_for_visible_faces():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
def face_normal(face_type, rot_y):
forward = np.array([np.cos(rot_y), -np.sin(rot_y)], dtype=np.float64)
if face_type == 0:
return forward
if face_type == 1:
return -forward
width_axis = np.array([np.sin(rot_y), np.cos(rot_y)], dtype=np.float64)
return width_axis if face_type == 2 else -width_axis
for face_type in range(4):
matched_visible = 0
for rot_y in np.linspace(-np.pi, np.pi, 181):
corners = compute_3d_box_corners(center, dims, float(rot_y), face_type=-1)
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None
assert points_2d is not None
midpoint = np.mean(points_3d[:, [0, 2]], axis=0)
if float(np.dot(face_normal(face_type, rot_y), midpoint)) >= 0.0:
continue
yaw = edge_points_to_yaw(points_3d, face_type)
diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-5), (face_type, rot_y, yaw)
matched_visible += 1
assert matched_visible > 0
def test_visible_face_edges_to_yaw_matches_true_yaw_for_two_visible_gt_faces():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = 0.75
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
face_edges = {}
for face_type in (0, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None
assert points_2d is not None
face_edges[face_type] = points_3d
yaw = visible_face_edges_to_yaw(face_edges, face_scores={0: 0.9, 3: 0.85})
diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-5), yaw
def test_visible_face_edges_to_yaw_is_more_stable_with_two_noisy_gt_faces():
rng = np.random.default_rng(0)
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
single_errors = []
combined_errors = []
side_cases = [
(0.75, (0, 3)),
(2.35, (0, 2)),
(-0.75, (1, 3)),
(-2.35, (1, 2)),
]
for rot_y, visible_faces in side_cases:
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
base_edges = {}
for face_type in visible_faces:
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None
assert points_2d is not None
base_edges[face_type] = points_3d
for _ in range(200):
noisy_edges = {}
for face_type, points_3d in base_edges.items():
noise = np.zeros_like(points_3d)
noise[:, [0, 2]] = rng.normal(0.0, 0.05, size=(points_3d.shape[0], 2))
noisy = points_3d + noise
noisy_edges[face_type] = noisy.astype(np.float32)
yaw = edge_points_to_yaw(noisy, face_type)
diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi
single_errors.append(abs(diff))
combined_yaw = visible_face_edges_to_yaw(noisy_edges, face_scores={visible_faces[0]: 0.9, visible_faces[1]: 0.9})
combined_diff = (combined_yaw - rot_y + np.pi) % (2 * np.pi) - np.pi
combined_errors.append(abs(combined_diff))
mean_single = float(np.mean(single_errors))
mean_combined = float(np.mean(combined_errors))
assert mean_combined < mean_single * 0.9
def test_decode_3d_target_collects_all_visible_face_edges():
from ultralytics.utils.plotting_3d import decode_3d_target
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}
target = np.full(42, np.nan, dtype=np.float32)
target[2] = 20.0
target[3:6] = [4.0, 1.5, 1.8]
target[6] = 0.25
target[7:9] = [0.5, 0.6]
target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.45, 0.55, 0.8, 1.0]
target[34:42] = [0.0, 0.0, 17.0, 0.0, 0.55, 0.52, 0.7, 1.0]
decoded = decode_3d_target(target, 0, calib, 640, 480, {0}, set())
assert decoded is not None
assert decoded["visible_face_types"] == (0, 3)
assert decoded["edge_points_2d"].shape == (2, 5, 2)
assert decoded["edge_points_3d"].shape == (2, 5, 3)
def test_decode_multi_visible_face_yaw_from_prediction_uses_two_edges_for_valid_cut_objects():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([-7.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = -0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred_bbox_xyxy = np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32)
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[5] = 0.9
pred[39] = 10.0 # cut_in
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
front_points_3d, front_points_2d = project_face_bottom_edge(corners, 0, calib, num_samples=5)
side_points_3d, side_points_2d = project_partial_face_bottom_edge(corners, 3, calib, 640, 480, num_samples=5)
assert front_points_3d is not None and front_points_2d is not None
assert side_points_3d is not None and side_points_2d is not None
for face_type, points_3d, points_2d in ((0, front_points_3d, front_points_2d), (3, side_points_3d, side_points_2d)):
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
yaw = decode_multi_visible_face_yaw_from_prediction(
pred,
pred_edge,
anchor,
stride,
calib,
fallback_face_type=0,
bbox_xyxy=pred_bbox_xyxy,
img_w=640,
)
diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-3)
def test_decode_cut_partial_side_edge_from_prediction_requires_known_cut_side():
from ultralytics.utils.plotting_3d import decode_cut_partial_side_edge_from_prediction
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([-7.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = -0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[39] = 10.0 # cut_in
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
side_points_3d, side_points_2d = project_partial_face_bottom_edge(corners, 3, calib, 640, 480, num_samples=5)
assert side_points_3d is not None and side_points_2d is not None
face_block = pred_edge[45:60].reshape(5, 3)
face_block[:, 0] = side_points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = side_points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = side_points_3d[:, 2]
decoded = decode_cut_partial_side_edge_from_prediction(pred, pred_edge, anchor, stride, 640, cut_side=None)
assert decoded is None
def test_decode_cut_partial_side_edge_from_prediction_uses_geometry_for_cut_out_boxes():
from ultralytics.utils.plotting_3d import decode_cut_partial_side_edge_from_prediction
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
cases = [
(np.array([-8.0, 0.0, 20.0], dtype=np.float32), 2.6, "left", 2),
(np.array([8.0, 0.0, 20.0], dtype=np.float32), 0.75, "right", 3),
]
for center, rot_y, cut_side, face_type in cases:
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[38 + 2] = 10.0 # cut_out
if rot_y >= np.pi / 2:
pred[33] = 10.0
pred[37] = np.sin(rot_y - np.pi)
else:
pred[30] = 10.0
pred[34] = np.sin(rot_y)
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
for side_face_type in (2, 3):
side_points_3d, side_points_2d = project_partial_face_bottom_edge(
corners, side_face_type, calib, 640, 480, num_samples=5
)
assert side_points_3d is not None and side_points_2d is not None
off = side_face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = side_points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = side_points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = side_points_3d[:, 2]
decoded = decode_cut_partial_side_edge_from_prediction(
pred,
pred_edge,
anchor,
stride,
640,
cut_side=cut_side,
corners_3d=corners,
)
assert decoded is not None
assert decoded["face_type"] == face_type
def test_decode_multi_visible_face_yaw_from_prediction_uses_two_visible_faces():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = 0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[5] = 0.9
pred[23] = 0.85
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
for face_type in (0, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None
assert points_2d is not None
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
yaw = decode_multi_visible_face_yaw_from_prediction(pred, pred_edge, anchor, stride, calib, fallback_face_type=0, img_w=640)
diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-5), yaw
def test_decode_edge_yaw_selection_from_prediction_limits_near_cut_boxes_to_two_edges():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([-6.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = -0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
bbox_xyxy = np.array([120.0, 120.0, 240.0, 240.0], dtype=np.float32)
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[39] = 10.0 # cut_in, but the box is still fully inside the image
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
for face_type, score in ((0, 0.84), (1, 0.93), (3, 0.91)):
face_center = face_center_from_corners(corners, face_type)
face_uv = project_3d_to_2d(face_center[None, :], calib)[0]
off = face_type * 6
pred[off] = face_center[2]
pred[off + 1 : off + 3] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]]
pred[off + 5] = score
pred_edge = np.zeros(60, dtype=np.float32)
for face_type in (0, 1, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None
assert points_2d is not None
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
selection = decode_edge_yaw_selection_from_prediction(
pred,
pred_edge,
anchor,
stride,
calib,
bbox_xyxy=bbox_xyxy,
img_w=640,
img_h=480,
max_lateral_dist_m=5.0,
)
assert selection["face_types"] == (0, 3)
assert selection["two_face_eligible"] is True
assert selection["is_valid"] is True
assert np.asarray(selection["edge_points_2d"], dtype=np.float32).shape == (2, 5, 2)
diff = (selection["yaw"] - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-5), selection["yaw"]
def test_decode_edge_yaw_selection_from_prediction_picks_higher_score_longitudinal_and_side_faces():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = 0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
for face_type, score in ((0, 0.95), (1, 0.99), (3, 0.80)):
face_center = face_center_from_corners(corners, face_type)
face_uv = project_3d_to_2d(face_center[None, :], calib)[0]
off = face_type * 6
pred[off] = face_center[2]
pred[off + 1 : off + 3] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]]
pred[off + 5] = score
pred_edge = np.zeros(60, dtype=np.float32)
for face_type in (0, 1, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None and points_2d is not None
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
selection = decode_edge_yaw_selection_from_prediction(
pred,
pred_edge,
anchor,
stride,
calib,
img_w=640,
img_h=480,
max_lateral_dist_m=5.0,
)
assert selection["face_types"] == (1, 3)
assert selection["is_valid"] is True
diff = (selection["yaw"] - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-5), selection["yaw"]
def test_decode_edge_yaw_selection_from_prediction_uses_face_decode_best_visible_face_as_primary():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = 0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
for face_type, score in ((0, 0.90), (3, 0.95)):
face_center = face_center_from_corners(corners, face_type)
face_uv = project_3d_to_2d(face_center[None, :], calib)[0]
off = face_type * 6
pred[off] = face_center[2]
pred[off + 1 : off + 3] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]]
pred[off + 5] = score
pred_edge = np.zeros(60, dtype=np.float32)
for face_type in (0, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None and points_2d is not None
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
selection = decode_edge_yaw_selection_from_prediction(
pred,
pred_edge,
anchor,
stride,
calib,
img_w=640,
img_h=480,
max_lateral_dist_m=30.0,
)
assert selection["face_types"] == (3, 0)
assert selection["two_face_eligible"] is True
assert selection["is_valid"] is True
diff = (selection["yaw"] - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-5), selection["yaw"]
def test_decode_3d_prediction_uses_highest_score_visible_face_as_anchor():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = 0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
for face_type, score in ((0, 0.95), (1, 0.99), (3, 0.80)):
face_center = face_center_from_corners(corners, face_type)
face_uv = project_3d_to_2d(face_center[None, :], calib)[0]
off = face_type * 6
pred[off] = face_center[2]
pred[off + 1 : off + 3] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]]
pred[off + 5] = score
decoded = decode_3d_prediction(
pred,
anchor,
stride,
calib,
640,
480,
{0},
set(),
0,
pred_edge_60=None,
)
assert decoded is not None
assert decoded["visible_face_types"] == (0, 1, 3)
assert decoded["visible_face_type"] == 1
def test_decode_3d_prediction_keeps_top1_face_even_below_visibility_threshold():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = 0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
face_center = face_center_from_corners(corners, 3)
face_uv = project_3d_to_2d(face_center[None, :], calib)[0]
pred[18] = face_center[2]
pred[19:21] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]]
pred[23] = 0.03
decoded = decode_3d_prediction(
pred,
anchor,
stride,
calib,
640,
480,
{0},
set(),
0,
pred_edge_60=None,
)
assert decoded is not None
assert decoded["visible_face_type"] == 3
assert decoded["visible_face_types"] == (3,)
def test_decode_edge_yaw_selection_from_prediction_uses_face_decode_primary_face_before_strict_companion_gate():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = 0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[5] = 0.09
pred[23] = 0.09
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
for face_type in (0, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None and points_2d is not None
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
selection = decode_edge_yaw_selection_from_prediction(pred, pred_edge, anchor, stride, calib, img_w=640, img_h=480)
assert selection["face_types"] == (0,)
assert selection["two_face_eligible"] is False
assert selection["is_valid"] is False
diff = (selection["yaw"] - rot_y + np.pi) % (2 * np.pi) - np.pi
assert np.isclose(diff, 0.0, atol=1e-5), selection["yaw"]
def test_decode_edge_yaw_selection_from_prediction_rejects_short_cut_side_visibility():
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([-4.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = -0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
bbox_xyxy = np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32)
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[5] = 0.95
pred[39] = 10.0 # cut_in
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
front_points_3d, front_points_2d = project_face_bottom_edge(corners, 0, calib, num_samples=5)
side_points_3d, _ = project_partial_face_bottom_edge(corners, 3, calib, 640, 480, num_samples=5)
assert front_points_3d is not None and front_points_2d is not None
assert side_points_3d is not None
short_len = float(dims[0] * 0.4)
side_dir = side_points_3d[-1] - side_points_3d[0]
side_dir = side_dir / np.linalg.norm(side_dir)
short_side_points_3d = side_points_3d[0] + np.linspace(0.0, short_len, 5, dtype=np.float32)[:, None] * side_dir[None, :]
short_side_points_2d = project_3d_to_2d(short_side_points_3d, calib)
assert np.all(np.isfinite(short_side_points_2d))
for face_type, points_3d, points_2d in ((0, front_points_3d, front_points_2d), (3, short_side_points_3d, short_side_points_2d)):
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
selection = decode_edge_yaw_selection_from_prediction(
pred,
pred_edge,
anchor,
stride,
calib,
bbox_xyxy=bbox_xyxy,
img_w=640,
img_h=480,
max_lateral_dist_m=5.0,
)
assert selection["cut_side_visible_length_ratio"] is not None
assert selection["cut_side_visible_length_ratio"] < 0.5
assert selection["cut_side_visible_ratio_ok"] is False
assert selection["is_valid"] is False
def test_decode_3d_prediction_ignores_cut_logits_for_non_border_box():
from ultralytics.utils.plotting_3d import decode_3d_prediction
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([-6.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = -0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
bbox_xyxy = np.array([120.0, 120.0, 240.0, 240.0], dtype=np.float32)
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[0] = center[2]
pred[1:3] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[18] = center[2]
pred[19:21] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[5] = 0.9
pred[23] = 0.8
pred[39] = 10.0 # raw cut_in logit, but the box is not clipped at the border
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
for face_type in (0, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None
assert points_2d is not None
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
decoded = decode_3d_prediction(
pred,
anchor,
stride,
calib,
640,
480,
{0},
set(),
0,
pred_edge_60=pred_edge,
bbox_xyxy=bbox_xyxy,
)
assert decoded is not None
assert decoded["visible_face_types"] == (0, 3)
assert decoded["edge_points_2d"].shape == (2, 5, 2)
def test_decode_3d_prediction_collects_all_visible_face_edges():
from ultralytics.utils.plotting_3d import decode_3d_prediction
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([0.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = 0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[0] = center[2]
pred[1:3] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[5] = 0.9
pred[23] = 0.85
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
for face_type in (0, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None
assert points_2d is not None
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
decoded = decode_3d_prediction(
pred,
anchor,
stride,
calib,
640,
480,
{0},
set(),
0,
pred_edge_60=pred_edge,
bbox_xyxy=np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32),
)
assert decoded is not None
assert decoded["visible_face_types"] == (0, 3)
assert decoded["edge_points_2d"].shape == (2, 5, 2)
assert decoded["edge_points_3d"].shape == (2, 5, 3)
def test_decode_3d_prediction_does_not_duplicate_partial_side_face_when_already_visible():
from ultralytics.utils.plotting_3d import decode_3d_prediction
calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0}
center = np.array([-7.0, 0.0, 20.0], dtype=np.float32)
dims = np.array([4.0, 1.5, 1.8], dtype=np.float32)
rot_y = -0.75
anchor = np.array([10.0, 20.0], dtype=np.float32)
stride = 8.0
pred_bbox_xyxy = np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32)
pred = np.zeros(41, dtype=np.float32)
pred[24] = center[2]
pred[27:30] = dims
pred[30] = 10.0
pred[34] = np.sin(rot_y)
pred[0] = center[2]
pred[1:3] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[18] = center[2]
pred[19:21] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0]
pred[5] = 0.9
pred[23] = 0.8
pred[39] = 10.0 # cut_in
corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1)
pred_edge = np.zeros(60, dtype=np.float32)
for face_type in (0, 3):
points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5)
assert points_3d is not None
assert points_2d is not None
off = face_type * 15
face_block = pred_edge[off : off + 15].reshape(5, 3)
face_block[:, 0] = points_2d[:, 0] / stride - anchor[0]
face_block[:, 1] = points_2d[:, 1] / stride - anchor[1]
face_block[:, 2] = points_3d[:, 2]
decoded = decode_3d_prediction(
pred,
anchor,
stride,
calib,
640,
480,
{0},
set(),
0,
pred_edge_60=pred_edge,
bbox_xyxy=pred_bbox_xyxy,
)
assert decoded is not None
assert decoded["visible_face_types"] == (0, 3)
assert decoded["edge_points_2d"].shape == (2, 5, 2)
assert decoded["edge_points_3d"].shape == (2, 5, 3)
def test_face_metric_store_counts_visible_yaw_objectwise():
validator = Ground3DDetectionValidator.__new__(Ground3DDetectionValidator)
validator.stats_3d = {"whole": [], "face": []}
face_pred = validator._metric_store()
face_gt = validator._metric_store()
pred_rows = [
{
"center": np.array([1.0, 2.0, 10.0], dtype=np.float32),
"depth": 10.0,
"yaw": 0.1,
"edge_yaw": 0.12,
"dims": np.array([4.0, 1.5, 1.8], dtype=np.float32),
"uv": np.array([100.0, 120.0], dtype=np.float32),
},
{
"center": np.array([1.1, 2.1, 10.1], dtype=np.float32),
"depth": 10.1,
"yaw": 0.1,
"edge_yaw": 0.13,
"dims": np.array([4.0, 1.5, 1.8], dtype=np.float32),
"uv": np.array([104.0, 124.0], dtype=np.float32),
},
]
gt_rows = [
{
"center": np.array([1.0, 2.0, 10.0], dtype=np.float32),
"depth": 10.0,
"yaw": 0.15,
"edge_yaw": 0.15,
"dims": np.array([4.0, 1.5, 1.8], dtype=np.float32),
"uv": np.array([101.0, 121.0], dtype=np.float32),
},
{
"center": np.array([1.0, 2.0, 10.0], dtype=np.float32),
"depth": 10.0,
"yaw": 0.15,
"edge_yaw": 0.15,
"dims": np.array([4.0, 1.5, 1.8], dtype=np.float32),
"uv": np.array([105.0, 125.0], dtype=np.float32),
},
]
for pred_row, gt_row in zip(pred_rows, gt_rows):
validator._append_metric_attr(face_pred, pred_row)
validator._append_metric_attr(face_gt, gt_row)
validator._aggregate_face_metric_store(
face_pred,
face_gt,
pred_visible_yaw=[0.1],
pred_edge_visible_yaw=[0.12],
gt_visible_target_yaw=[0.15],
)
metrics = validator.stats_3d["face"][0]
assert metrics["matched"] == 2
assert metrics["_direct_orient_visible_matched"] == 1
assert metrics["_edge_orient_visible_matched"] == 1
assert np.isclose(metrics["direct_orient_visible"], np.degrees(0.05))
assert np.isclose(metrics["edge_orient_visible"], np.degrees(0.03))
def test_validator_desc_and_row_alignment_include_vyaw():
from ultralytics.models.yolo.detect.train import Ground3DDetectionValidator
validator = Ground3DDetectionValidator.__new__(Ground3DDetectionValidator)
validator.metrics_3d_results = {
"whole": {"matched": 1, "depth_abs": 0.1, "uv": 0.2, "size": 0.3, "orient": 0.4},
"face": {
"matched": 2,
"depth_abs": 0.5,
"uv": 0.6,
"size": 0.7,
"edge_orient_visible": 0.8,
},
}
validator.metrics = SimpleNamespace(keys=("P", "R", "mAP50", "mAP50-95"), nt_per_class=np.array([1]), mean_results=lambda: (0, 0, 0, 0), nt_per_image=np.array([1]), ap_class_index=np.array([0]), stats=[])
validator.seen = 1
validator.training = False
validator.args = SimpleNamespace(verbose=False)
validator.nc = 1
validator.names = {0: "car"}
validator.trainer = SimpleNamespace(
loss_names=("box", "cls", "dfl", "z3d", "uv", "size", "ycls", "ydeg", "ccls", "fz", "fuv", "fsize", "fcls", "euv", "ez")
)
desc = validator.get_desc()
assert "Vyaw" in desc
row_values = [
"",
"all-3d",
f"{validator.metrics_3d_results['whole']['matched']}",
f"{validator.metrics_3d_results['face']['matched']}",
"-",
f"{validator.metrics_3d_results['whole']['depth_abs']:.3g}",
f"{validator.metrics_3d_results['whole']['uv']:.3g}",
f"{validator.metrics_3d_results['whole']['size']:.3g}",
"-",
f"{validator.metrics_3d_results['whole']['orient']:.3g}",
"-",
f"{validator.metrics_3d_results['face']['depth_abs']:.3g}",
f"{validator.metrics_3d_results['face']['uv']:.3g}",
f"{validator.metrics_3d_results['face']['size']:.3g}",
"-",
f"{validator.metrics_3d_results['face']['edge_orient_visible']:.3g}",
]
assert len(row_values) == 1 + len(validator.trainer.loss_names)
def test_3d_loss_logs_skip_virtual_samples_when_roi_metrics_only_enabled():
import torch
from ultralytics.utils.loss import v8Detection3DLoss
loss = object.__new__(v8Detection3DLoss)
loss.hyp = type("Hyp", (), {"roi_metrics_only": True})()
loss.face_3d_classes = {0}
loss.complete_3d_classes = set()
loss.norm_scales_3d = {}
loss.device = torch.device("cpu")
loss.l1_loss = torch.nn.L1Loss(reduction="sum")
loss.l1_loss_none = torch.nn.L1Loss(reduction="none")
loss.bce_yaw = torch.nn.BCEWithLogitsLoss(reduction="sum")
loss.ce_cut = torch.nn.CrossEntropyLoss(reduction="sum")
loss.edge_loss_gain = 0.1
preds = {"preds_3d": torch.zeros((2, 41, 1), dtype=torch.float32), "preds_edge": torch.zeros((2, 60, 1), dtype=torch.float32)}
preds["preds_3d"][0, 0, 0] = 10.0
preds["preds_3d"][0, 24, 0] = 20.0
preds["preds_3d"][1, 0, 0] = 110.0
preds["preds_3d"][1, 24, 0] = 120.0
batch = {
"labels_3d": torch.full((2, 42), float("nan"), dtype=torch.float32),
"batch_idx": torch.tensor([0, 1], dtype=torch.int64),
"cls": torch.tensor([[0.0], [0.0]], dtype=torch.float32),
"camera_mode": ("roi", "virtual"),
"calib": ({"depth_scale": 1.0}, {"depth_scale": 1.0}),
}
batch["labels_3d"][0, 2] = 20.0
batch["labels_3d"][0, 3:6] = torch.tensor([4.0, 1.5, 1.8])
batch["labels_3d"][0, 6] = 0.0
batch["labels_3d"][0, 10:18] = torch.tensor([0.0, 0.0, 10.0, 0.0, 0.5, 0.5, 0.8, 1.0])
batch["labels_3d"][1, 2] = 120.0
batch["labels_3d"][1, 3:6] = torch.tensor([4.0, 1.5, 1.8])
batch["labels_3d"][1, 6] = 0.0
batch["labels_3d"][1, 10:18] = torch.tensor([0.0, 0.0, 100.0, 0.0, 0.5, 0.5, 0.8, 1.0])
fg_mask = torch.tensor([[True], [True]])
target_gt_idx = torch.tensor([[0], [0]], dtype=torch.int64)
anchor_points = torch.tensor([[0.0, 0.0]], dtype=torch.float32)
stride_tensor = torch.tensor([[1.0]], dtype=torch.float32)
imgsz = torch.tensor([1.0, 1.0], dtype=torch.float32)
_, log_items = loss._compute_3d_loss(preds, batch, fg_mask, target_gt_idx, anchor_points, stride_tensor, imgsz)
assert torch.isclose(log_items[6], torch.tensor(0.0))
assert torch.isclose(log_items[0], torch.tensor(0.0))
def test_edge_depth_targets_are_normalized_back_to_model_space():
import torch
from ultralytics.utils.loss import _normalize_edge_depth_targets_to_model_space
depths_metric = torch.tensor([[20.0, 24.0, 28.0]], dtype=torch.float32)
normalized = _normalize_edge_depth_targets_to_model_space(depths_metric, torch.tensor(2.0))
assert torch.allclose(normalized, torch.tensor([[10.0, 12.0, 14.0]], dtype=torch.float32))
def test_3d_loss_gather_assigned_targets_flattens_local_indices():
import torch
from ultralytics.utils.loss import v8Detection3DLoss
labels_3d = torch.arange(4 * 42, dtype=torch.float32).reshape(4, 42)
cls_all = torch.tensor([0.0, 1.0, 2.0, 3.0], dtype=torch.float32)
gt_offsets = torch.tensor([0, 2, 4], dtype=torch.long)
gt_counts = torch.tensor([2, 2], dtype=torch.long)
gt_idx_i = torch.tensor([[0], [1]], dtype=torch.int64)
gt_indices_i, gt_3d_i, cls_i = v8Detection3DLoss._gather_assigned_3d_targets(
labels_3d, cls_all, gt_offsets, gt_counts, gt_idx_i, image_idx=1
)
assert gt_indices_i.shape == (2,)
assert gt_indices_i.tolist() == [2, 3]
assert torch.equal(gt_3d_i, labels_3d[2:4])
assert torch.equal(cls_i, cls_all[2:4])
def test_3d_loss_gather_assigned_targets_rejects_out_of_range_local_indices():
import torch
from ultralytics.utils.loss import v8Detection3DLoss
labels_3d = torch.zeros((4, 42), dtype=torch.float32)
cls_all = torch.zeros(4, dtype=torch.float32)
gt_offsets = torch.tensor([0, 2, 4], dtype=torch.long)
gt_counts = torch.tensor([2, 2], dtype=torch.long)
gt_idx_i = torch.tensor([0, 2], dtype=torch.int64)
with pytest.raises(RuntimeError, match="Assigned GT index out of range"):
v8Detection3DLoss._gather_assigned_3d_targets(labels_3d, cls_all, gt_offsets, gt_counts, gt_idx_i, image_idx=1)
def test_3d_loss_edge_branch_keeps_gt_offsets_for_next_image():
import torch
from ultralytics.utils.loss import v8Detection3DLoss
loss = object.__new__(v8Detection3DLoss)
loss.hyp = type("Hyp", (), {"roi_metrics_only": False})()
loss.face_3d_classes = {0}
loss.complete_3d_classes = set()
loss.norm_scales_3d = {}
loss.device = torch.device("cpu")
loss.l1_loss = torch.nn.L1Loss(reduction="sum")
loss.l1_loss_none = torch.nn.L1Loss(reduction="none")
loss.bce_yaw = torch.nn.BCEWithLogitsLoss(reduction="sum")
loss.ce_cut = torch.nn.CrossEntropyLoss(reduction="sum")
loss.edge_loss_gain = 0.1
preds = {
"preds_3d": torch.zeros((2, 41, 1), dtype=torch.float32),
"preds_edge": torch.zeros((2, 60, 1), dtype=torch.float32),
}
batch = {
"labels_3d": torch.full((2, 42), float("nan"), dtype=torch.float32),
"batch_idx": torch.tensor([0, 1], dtype=torch.int64),
"cls": torch.tensor([[0.0], [0.0]], dtype=torch.float32),
"camera_mode": ("roi", "roi"),
"calib": (
{"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0},
{"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0},
),
}
for row, depth in enumerate((20.0, 22.0)):
batch["labels_3d"][row, 2] = depth
batch["labels_3d"][row, 3:6] = torch.tensor([4.0, 1.5, 1.8])
batch["labels_3d"][row, 6] = -0.6
batch["labels_3d"][row, 7:9] = torch.tensor([0.5, 0.55])
batch["labels_3d"][row, 10:18] = torch.tensor([0.0, 0.0, depth - 2.0, 0.0, 0.46, 0.55, 0.9, 1.0])
fg_mask = torch.tensor([[True], [True]])
target_gt_idx = torch.tensor([[0], [0]], dtype=torch.int64)
anchor_points = torch.tensor([[0.0, 0.0]], dtype=torch.float32)
stride_tensor = torch.tensor([[8.0]], dtype=torch.float32)
imgsz = torch.tensor([480.0, 640.0], dtype=torch.float32)
opt_items, log_items = loss._compute_3d_loss(preds, batch, fg_mask, target_gt_idx, anchor_points, stride_tensor, imgsz)
assert opt_items.shape == (9,)
assert log_items.shape == (12,)
assert torch.isfinite(opt_items).all()
assert torch.isfinite(log_items).all()