# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license import random from types import SimpleNamespace import numpy as np import pytest from ultralytics.data.ground3d_augment import compute_simul_calib from ultralytics.utils.metrics_3d import aggregate_3d_metric_groups, compute_3d_metrics_for_matched, empty_3d_metrics from ultralytics.utils.plotting_3d import ( collect_face_bottom_edges, collect_precomputed_edge_points_2d, compute_3d_box_corners, decode_cut_partial_side_edge_from_gt, decode_edge_yaw_selection_from_prediction, decode_multi_visible_face_yaw_from_gt, decode_multi_visible_face_yaw_from_prediction, decode_visible_face_edge_from_gt, edge_points_to_yaw, extract_3d_attrs_from_gt, extract_3d_attrs_from_prediction, face_center_from_corners, get_cut_object_side_face, project_face_bottom_edge, project_partial_face_bottom_edge, project_3d_to_2d, rebuild_box_corners_for_visualization, reconstruct_3d_box_from_face, select_gt_visible_faces, visible_face_edges_to_yaw, ) def _make_cut_target(cut_state, cut_side, rot_y=0.75): target = np.full(42, np.nan, dtype=np.float32) target[2] = 20.0 target[3:6] = [4.0, 1.5, 1.8] target[6] = rot_y target[7:9] = [0.5, 0.6] if cut_state == "cut_in": target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.45 if cut_side == "left" else 0.55, 0.55, 0.9, 1.0] target[18:24] = -1.0 target[24:26] = 0.0 target[26:32] = -1.0 target[32:34] = 0.0 target[34:40] = -1.0 target[40:42] = 0.0 else: target[18:26] = [0.0, 0.0, 18.5, 0.0, 0.45 if cut_side == "left" else 0.55, 0.55, 0.9, 1.0] target[10:16] = -1.0 target[16:18] = 0.0 target[26:32] = -1.0 target[32:34] = 0.0 target[34:40] = -1.0 target[40:42] = 0.0 return target def _make_consistent_cut_target(cut_state, cut_side, calib, rot_y=None): img_w, img_h = 640, 480 dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) center_3d = np.array([-8.0, 0.0, 20.0], dtype=np.float32) if cut_side == "left" else np.array([8.0, 0.0, 20.0], dtype=np.float32) rot_y = float(-0.75 if cut_state == "cut_in" else 0.75) if rot_y is None else float(rot_y) target = np.full(42, np.nan, dtype=np.float32) target[2] = center_3d[2] target[3:6] = dims target[6] = rot_y corners = compute_3d_box_corners(center_3d, dims, rot_y, face_type=-1) whole_uv = project_3d_to_2d(center_3d[None, :], calib)[0] target[7:9] = [whole_uv[0] / img_w, whole_uv[1] / img_h] keep_face = 0 if cut_state == "cut_in" else 1 face_corner_ids = (4, 5, 6, 7) if keep_face == 0 else (0, 1, 2, 3) face_center_3d = corners[list(face_corner_ids)].mean(axis=0) face_uv = project_3d_to_2d(face_center_3d[None, :], calib)[0] face_offset = 10 if keep_face == 0 else 18 target[face_offset : face_offset + 8] = [0.0, 0.0, face_center_3d[2], 0.0, face_uv[0] / img_w, face_uv[1] / img_h, 1.0, 1.0] for off in (10, 18, 26, 34): if off == face_offset: continue target[off : off + 6] = -1.0 target[off + 6 : off + 8] = 0.0 bbox_xyxy = np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32) if cut_side == "right": bbox_xyxy = np.array([560.0, 120.0, 639.0, 240.0], dtype=np.float32) return target, corners, bbox_xyxy def _make_consistent_visible_face_target(visible_faces, calib, rot_y=0.75, center_3d=None): img_w, img_h = 640, 480 dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) center_3d = np.array([0.0, 0.0, 20.0], dtype=np.float32) if center_3d is None else np.asarray(center_3d, dtype=np.float32) face_offsets = {0: 10, 1: 18, 2: 26, 3: 34} face_corner_ids = {0: (4, 5, 6, 7), 1: (0, 1, 2, 3), 2: (1, 2, 5, 6), 3: (0, 3, 4, 7)} target = np.zeros(42, dtype=np.float32) target[2] = center_3d[2] target[3:6] = dims target[6] = float(rot_y) corners = compute_3d_box_corners(center_3d, dims, float(rot_y), face_type=-1) whole_uv = project_3d_to_2d(center_3d[None, :], calib)[0] target[7:9] = [whole_uv[0] / img_w, whole_uv[1] / img_h] for face_type in range(4): off = face_offsets[face_type] if face_type not in visible_faces: continue face_center_3d = corners[list(face_corner_ids[face_type])].mean(axis=0) face_uv = project_3d_to_2d(face_center_3d[None, :], calib)[0] target[off : off + 8] = [ 0.0, 0.0, float(face_center_3d[2]), 0.0, float(face_uv[0] / img_w), float(face_uv[1] / img_h), 1.0 - 0.01 * face_type, 1.0, ] return target, corners def test_cut_side_face_mapping_falls_back_to_border_side_without_geometry(): assert get_cut_object_side_face(1, "left") == 3 assert get_cut_object_side_face(2, "left") == 3 assert get_cut_object_side_face(1, "right") == 2 assert get_cut_object_side_face(2, "right") == 2 def test_cut_side_face_mapping_uses_geometry_for_cut_out_boxes(): dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) cases = [ (np.array([-8.0, 0.0, 20.0], dtype=np.float32), 0.75, "left", 3), (np.array([-8.0, 0.0, 20.0], dtype=np.float32), 2.6, "left", 2), (np.array([8.0, 0.0, 20.0], dtype=np.float32), 0.75, "right", 3), (np.array([8.0, 0.0, 20.0], dtype=np.float32), 2.6, "right", 2), ] for center, rot_y, cut_side, face_type in cases: corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) assert get_cut_object_side_face(2, cut_side, corners_3d=corners) == face_type def test_project_partial_face_bottom_edge_samples_exactly_five_visible_points(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([-7.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) corners = compute_3d_box_corners(center, dims, 0.75, face_type=-1) points_3d, points_2d = project_partial_face_bottom_edge(corners, 3, calib, 640, 480, num_samples=5) assert points_3d is not None assert points_2d is not None assert points_3d.shape == (5, 3) assert points_2d.shape == (5, 2) assert np.all((points_2d[:, 0] >= 0) & (points_2d[:, 0] <= 639)) assert np.all((points_2d[:, 1] >= 0) & (points_2d[:, 1] <= 479)) assert np.all(np.diff(points_2d[:, 0]) >= -1e-5) def test_decode_cut_partial_side_edge_from_gt_matches_left_cut_in_case(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0} target, _, bbox_xyxy = _make_consistent_cut_target("cut_in", "left", calib) decoded = decode_cut_partial_side_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), bbox_xyxy=bbox_xyxy) assert decoded is not None assert decoded["face_type"] == 3 assert decoded["points_2d"].shape == (5, 2) assert np.all(np.diff(decoded["points_2d"][:, 0]) >= -1e-5) assert np.allclose(decoded["points_3d"][:, 2], decoded["depths"]) def test_decode_cut_partial_side_edge_from_gt_reuses_face_box_geometry_for_cut_in(): calib = { "fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0, "distort_coeffs": [0.1, -0.01, 0.001, -0.0001], } target, box_corners, bbox_xyxy = _make_consistent_cut_target("cut_in", "left", calib) decoded = decode_cut_partial_side_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), bbox_xyxy=bbox_xyxy) face = target[10:18] expected_corners = reconstruct_3d_box_from_face( (float(face[4] * 640), float(face[5] * 480)), float(face[2]), target[3:6], float(target[6]), 0, calib, ) expected_3d, expected_2d = project_partial_face_bottom_edge(expected_corners, 3, calib, 640, 480, num_samples=5) box_edge_3d, box_edge_2d = project_partial_face_bottom_edge(box_corners, 3, calib, 640, 480, num_samples=5) assert decoded is not None assert expected_3d is not None assert expected_2d is not None assert box_edge_3d is not None assert box_edge_2d is not None assert np.allclose(decoded["points_3d"], expected_3d, atol=1e-4) assert np.allclose(decoded["points_2d"], expected_2d, atol=1e-4) assert np.allclose(decoded["points_3d"], box_edge_3d, atol=1e-4) assert np.allclose(decoded["points_2d"], box_edge_2d, atol=1e-4) def test_rebuild_box_corners_for_visualization_preserves_face_anchor(): center_3d = np.array([1.5, 0.2, 18.0], dtype=np.float32) dims = np.array([4.2, 1.6, 1.9], dtype=np.float32) base_yaw = 0.35 new_yaw = -0.6 visible_face_type = 0 base_corners = compute_3d_box_corners(center_3d, dims, base_yaw, face_type=-1) face_center = face_center_from_corners(base_corners, visible_face_type) rebuilt = rebuild_box_corners_for_visualization( base_corners, dims, new_yaw, visible_face_type=visible_face_type, face_center_3d=face_center, ) assert rebuilt is not None rebuilt_face_center = face_center_from_corners(rebuilt, visible_face_type) assert rebuilt_face_center is not None assert np.allclose(rebuilt_face_center, face_center, atol=1e-5) def test_rebuild_box_corners_for_visualization_preserves_box_center_for_whole_boxes(): center_3d = np.array([-2.0, 0.1, 25.0], dtype=np.float32) dims = np.array([0.8, 1.7, 0.6], dtype=np.float32) base_corners = compute_3d_box_corners(center_3d, dims, 0.2, face_type=-1) rebuilt = rebuild_box_corners_for_visualization(base_corners, dims, -1.1) assert rebuilt is not None assert np.allclose(rebuilt.mean(axis=0), base_corners.mean(axis=0), atol=1e-5) def test_decode_cut_partial_side_edge_from_gt_matches_other_cut_cases(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0} cases = [ ("cut_out", "left", 0.75, 3), ("cut_out", "left", 2.6, 2), ("cut_in", "right", -0.75, 2), ("cut_out", "right", 0.75, 3), ("cut_out", "right", 2.6, 2), ] for cut_state, cut_side, rot_y, face_type in cases: target, _, bbox_xyxy = _make_consistent_cut_target(cut_state, cut_side, calib, rot_y=rot_y) decoded = decode_cut_partial_side_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), bbox_xyxy=bbox_xyxy) assert decoded is not None assert decoded["face_type"] == face_type assert decoded["points_2d"].shape == (5, 2) def test_decode_cut_partial_side_edge_from_gt_does_not_guess_cut_side_without_bbox(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0} target = _make_cut_target("cut_out", "left", rot_y=0.75) decoded = decode_cut_partial_side_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), bbox_xyxy=None) assert decoded is None def test_decode_multi_visible_face_yaw_from_gt_uses_two_edges_for_valid_cut_objects(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0} rot_y = -0.75 target, _, bbox_xyxy = _make_consistent_cut_target("cut_in", "left", calib, rot_y=rot_y) yaw = decode_multi_visible_face_yaw_from_gt( target, 0, calib, 640, 480, {0}, set(), fallback_face_type=0, bbox_xyxy=bbox_xyxy, ) diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-3) def test_decode_multi_visible_face_yaw_from_gt_matches_gt_yaw_across_visible_face_cases(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0} cases = [] for visible_faces, rot_y in (((0, 3), 0.75), ((0, 2), 2.35), ((1, 3), -0.75), ((1, 2), -2.35)): target, _ = _make_consistent_visible_face_target(visible_faces, calib, rot_y=rot_y) cases.append((target, rot_y, None)) for cut_state, cut_side, rot_y in ( ("cut_in", "left", -0.75), ("cut_out", "left", 0.75), ("cut_out", "left", 2.6), ("cut_out", "right", 0.75), ("cut_out", "right", 2.6), ): target, _, bbox_xyxy = _make_consistent_cut_target(cut_state, cut_side, calib, rot_y=rot_y) cases.append((target, rot_y, bbox_xyxy)) for target, rot_y, bbox_xyxy in cases: yaw = decode_multi_visible_face_yaw_from_gt( target, 0, calib, 640, 480, {0}, set(), fallback_face_type=0, bbox_xyxy=bbox_xyxy, ) diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isfinite(yaw) assert np.isclose(diff, 0.0, atol=1e-3), (rot_y, yaw, bbox_xyxy) def test_compute_simul_calib_uses_center_crop_without_augmentation(): calib = { "focal_u": 1000.0, "focal_v": 1000.0, "cu": 960.0, "cv": 540.0, "distort_coeffs": [0.0, 0.0, 0.0, 0.0], } simul_calib = compute_simul_calib(calib, (1920, 1080), (704, 352), 960.0, 540.0, target_fx=537.0, augment=False) crop_x1, crop_y1, crop_x2, crop_y2 = simul_calib["crop_bounds"] crop_w = crop_x2 - crop_x1 crop_h = crop_y2 - crop_y1 assert crop_x1 == int(960.0 - crop_w / 2) assert crop_y1 == int(540.0 - crop_h / 2) def test_compute_simul_calib_augmentation_keeps_fixed_crop_center(): calib = { "focal_u": 1000.0, "focal_v": 1000.0, "cu": 960.0, "cv": 540.0, "distort_coeffs": [0.0, 0.0, 0.0, 0.0], } random.seed(0) centers = set() widths = set() heights = set() for _ in range(32): simul_calib = compute_simul_calib(calib, (1920, 1080), (704, 352), 960.0, 540.0, target_fx=537.0, augment=True) crop_x1, crop_y1, crop_x2, crop_y2 = simul_calib["crop_bounds"] widths.add(crop_x2 - crop_x1) heights.add(crop_y2 - crop_y1) centers.add(((crop_x1 + crop_x2) / 2.0, (crop_y1 + crop_y2) / 2.0)) assert len(widths) > 1 assert len(heights) > 1 assert centers == {(960.0, 540.0)} def test_select_gt_visible_faces_uses_score_threshold(): target = np.full(42, np.nan, dtype=np.float32) target[2] = 20.0 target[3:6] = [4.0, 1.5, 1.8] target[6] = 0.25 target[7:9] = [0.5, 0.6] target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.45, 0.55, 0.49, 1.0] target[26:34] = [0.0, 0.0, 17.0, 0.0, 0.42, 0.53, 0.50, 1.0] target[34:42] = [0.0, 0.0, 16.5, 0.0, 0.52, 0.51, 0.85, 1.0] selected = select_gt_visible_faces(target, score_thr=0.5) assert [face_type for face_type, _ in selected] == [2, 3] selected = select_gt_visible_faces(target, score_thr=0.8) assert [face_type for face_type, _ in selected] == [3] def test_extract_3d_attrs_supports_whole_and_face_modes(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 2.0} target = np.full(42, np.nan, dtype=np.float32) target[2] = 10.0 target[3:6] = [4.0, 1.5, 1.8] target[6] = 0.1 target[7:9] = [0.5, 0.4] target[10:18] = [0.0, 0.0, 8.0, 0.0, 0.25, 0.35, 0.4, 1.0] face_classes = {0} complete_classes = {1} whole = extract_3d_attrs_from_gt(target, 0, calib, 640, 480, face_classes, complete_classes) face = extract_3d_attrs_from_gt(target, 0, calib, 640, 480, face_classes, complete_classes, face_type=0) assert np.isclose(whole["depth"], 20.0) assert np.isclose(whole["uv"][0], 320.0) assert np.isclose(whole["uv"][1], 192.0) assert np.isclose(face["depth"], 16.0) assert np.isclose(face["uv"][0], 160.0) assert np.isclose(face["uv"][1], 168.0) assert extract_3d_attrs_from_gt(target, 0, calib, 640, 480, face_classes, complete_classes, face_type=0, score_thr=0.5) is None pred = np.zeros(41, dtype=np.float32) pred[24] = 22.0 pred[25:27] = [0.5, -0.25] pred[27:30] = [4.2, 1.4, 1.9] pred[30] = 3.0 pred[34:38] = np.sin([0.1, 0.2, 0.3, 0.4]) pred[0] = 17.0 pred[1:3] = [-0.5, 0.25] anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred_whole = extract_3d_attrs_from_prediction(pred, anchor, stride, calib) pred_face = extract_3d_attrs_from_prediction(pred, anchor, stride, calib, face_type=0) assert np.isclose(pred_whole["depth"], 22.0) assert np.allclose(pred_whole["uv"], [84.0, 158.0]) assert np.isclose(pred_face["depth"], 17.0) assert np.allclose(pred_face["uv"], [76.0, 162.0]) def test_compute_3d_metrics_for_matched_includes_uv(): pred = { "center": np.array([[1.0, 2.0, 10.0], [2.0, 1.0, 12.0]], dtype=np.float32), "depth": np.array([10.0, 12.0], dtype=np.float32), "yaw": np.array([0.1, 0.2], dtype=np.float32), "edge_yaw": np.array([0.12, 0.18], dtype=np.float32), "dims": np.array([[4.0, 1.5, 1.8], [4.1, 1.6, 1.9]], dtype=np.float32), "uv": np.array([[100.0, 120.0], [140.0, 150.0]], dtype=np.float32), } gt = { "center": np.array([[1.5, 2.5, 10.5], [2.5, 1.5, 12.5]], dtype=np.float32), "depth": np.array([11.0, 13.0], dtype=np.float32), "yaw": np.array([0.15, 0.25], dtype=np.float32), "edge_yaw": np.array([0.15, 0.25], dtype=np.float32), "dims": np.array([[4.2, 1.4, 1.7], [4.0, 1.5, 1.8]], dtype=np.float32), "uv": np.array([[102.0, 124.0], [137.0, 154.0]], dtype=np.float32), } metrics = compute_3d_metrics_for_matched( pred, gt, include_orient=True, include_size=True, include_uv=True, include_visible_orient=True, ) assert metrics["matched"] == 2 assert metrics["uv"] > 0 assert metrics["orient"] > 0 assert metrics["size"] > 0 assert metrics["direct_orient_visible"] > 0 assert metrics["edge_orient_visible"] > 0 assert metrics["_direct_orient_visible_matched"] == 2 assert metrics["_edge_orient_visible_matched"] == 2 def test_compute_3d_metrics_for_matched_tracks_visible_orientation_valid_pairs(): pred = { "center": np.array([[1.0, 2.0, 10.0], [2.0, 1.0, 12.0]], dtype=np.float32), "depth": np.array([10.0, 12.0], dtype=np.float32), "yaw": np.array([0.1, 0.2], dtype=np.float32), "edge_yaw": np.array([np.nan, 0.18], dtype=np.float32), "dims": np.array([[4.0, 1.5, 1.8], [4.1, 1.6, 1.9]], dtype=np.float32), "uv": np.array([[100.0, 120.0], [140.0, 150.0]], dtype=np.float32), } gt = { "center": np.array([[1.5, 2.5, 10.5], [2.5, 1.5, 12.5]], dtype=np.float32), "depth": np.array([11.0, 13.0], dtype=np.float32), "yaw": np.array([0.15, 0.25], dtype=np.float32), "edge_yaw": np.array([0.15, 0.25], dtype=np.float32), "dims": np.array([[4.2, 1.4, 1.7], [4.0, 1.5, 1.8]], dtype=np.float32), "uv": np.array([[102.0, 124.0], [137.0, 154.0]], dtype=np.float32), } metrics = compute_3d_metrics_for_matched( pred, gt, include_orient=True, include_size=True, include_uv=True, include_visible_orient=True, ) assert metrics["_direct_orient_visible_matched"] == 2 assert metrics["_edge_orient_visible_matched"] == 1 assert metrics["edge_orient_visible"] > 0 def test_aggregate_3d_metric_groups_keeps_group_defaults(): stats = { "whole": [ {"depth_abs": 2.0, "depth_rel": 0.2, "depth_rmse": 3.0, "center": 1.0, "uv": 4.0, "orient": 5.0, "size": 0.5, "matched": 2}, {"depth_abs": 4.0, "depth_rel": 0.4, "depth_rmse": 6.0, "center": 3.0, "uv": 8.0, "orient": 7.0, "size": 1.5, "matched": 1}, ], "face": [], } aggregated = aggregate_3d_metric_groups(stats) assert aggregated["whole"]["matched"] == 3 assert np.isclose(aggregated["whole"]["depth_abs"], round((2.0 * 2 + 4.0 * 1) / 3, 5)) assert np.isclose(aggregated["whole"]["uv"], round((4.0 * 2 + 8.0 * 1) / 3, 5)) assert aggregated["face"] == empty_3d_metrics(include_orient=False, include_size=False, include_uv=True, include_visible_orient=True) def test_aggregate_3d_metric_groups_weights_visible_orientation_by_valid_pairs(): stats = { "whole": [], "face": [ { "depth_abs": 1.0, "depth_rel": 0.1, "depth_rmse": 1.5, "center": 2.0, "uv": 3.0, "size": 0.5, "matched": 100, "direct_orient_visible": 0.0, "edge_orient_visible": 0.0, "_direct_orient_visible_matched": 0, "_edge_orient_visible_matched": 0, }, { "depth_abs": 2.0, "depth_rel": 0.2, "depth_rmse": 2.5, "center": 4.0, "uv": 6.0, "size": 1.0, "matched": 1, "direct_orient_visible": 15.0, "edge_orient_visible": 30.0, "_direct_orient_visible_matched": 1, "_edge_orient_visible_matched": 1, }, ], } aggregated = aggregate_3d_metric_groups(stats) assert aggregated["face"]["matched"] == 101 assert np.isclose(aggregated["face"]["edge_orient_visible"], 30.0) assert np.isclose(aggregated["face"]["direct_orient_visible"], 15.0) def test_visible_face_edge_geometry_decodes_and_orders_left_to_right(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0} target = np.full(42, np.nan, dtype=np.float32) target[2] = 20.0 target[3:6] = [4.0, 1.5, 1.8] target[6] = -0.6 target[7:9] = [0.5, 0.55] target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.46, 0.55, 0.9, 1.0] decoded = decode_visible_face_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), face_type=0) assert decoded is not None assert decoded["points_2d"].shape == (5, 2) assert decoded["points_3d"].shape == (5, 3) assert decoded["depths"].shape == (5,) assert np.all(np.diff(decoded["points_2d"][:, 0]) >= -1e-5) assert np.allclose(decoded["points_3d"][:, 2], decoded["depths"]) yaw = edge_points_to_yaw(decoded["points_3d"], decoded["face_type"]) assert np.isfinite(yaw) def test_decode_visible_face_edge_from_gt_requires_face_visibility_threshold(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0} target = np.full(42, np.nan, dtype=np.float32) target[2] = 20.0 target[3:6] = [4.0, 1.5, 1.8] target[6] = 0.25 target[7:9] = [0.5, 0.6] target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.45, 0.55, 0.8, 1.0] target[34:42] = [0.0, 0.0, 17.0, 0.0, 0.55, 0.52, 0.2, 1.0] visible = decode_visible_face_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), face_type=0, score_thr=0.3) hidden = decode_visible_face_edge_from_gt(target, 0, calib, 640, 480, {0}, set(), face_type=3, score_thr=0.3) assert visible is not None assert hidden is None def test_collect_face_bottom_edges_returns_all_requested_faces(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) corners = compute_3d_box_corners(center, dims, -0.4, face_type=-1) edge_points_3d, edge_points_2d = collect_face_bottom_edges(corners, [0, 3], calib, num_samples=5) assert edge_points_3d.shape == (2, 5, 3) assert edge_points_2d.shape == (2, 5, 2) assert np.all(np.diff(edge_points_2d[0, :, 0]) >= -1e-5) assert np.all(np.diff(edge_points_2d[1, :, 0]) >= -1e-5) def test_collect_precomputed_edge_points_2d_preserves_visible_face_order(): edge_faces_points_2d = np.stack( [ np.full((5, 2), 10.0, dtype=np.float32), np.full((5, 2), 20.0, dtype=np.float32), np.full((5, 2), 30.0, dtype=np.float32), np.full((5, 2), 40.0, dtype=np.float32), ], axis=0, ) edge_faces_valid = np.array([True, False, True, True], dtype=bool) stacked = collect_precomputed_edge_points_2d(edge_faces_points_2d, edge_faces_valid, visible_face_types=(3, 0)) assert stacked.shape == (3, 5, 2) assert np.all(stacked[0] == 40.0) assert np.all(stacked[1] == 10.0) assert np.all(stacked[2] == 30.0) def test_edge_points_to_yaw_matches_face_convention_for_visible_faces(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) def face_normal(face_type, rot_y): forward = np.array([np.cos(rot_y), -np.sin(rot_y)], dtype=np.float64) if face_type == 0: return forward if face_type == 1: return -forward width_axis = np.array([np.sin(rot_y), np.cos(rot_y)], dtype=np.float64) return width_axis if face_type == 2 else -width_axis for face_type in range(4): matched_visible = 0 for rot_y in np.linspace(-np.pi, np.pi, 181): corners = compute_3d_box_corners(center, dims, float(rot_y), face_type=-1) points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None assert points_2d is not None midpoint = np.mean(points_3d[:, [0, 2]], axis=0) if float(np.dot(face_normal(face_type, rot_y), midpoint)) >= 0.0: continue yaw = edge_points_to_yaw(points_3d, face_type) diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-5), (face_type, rot_y, yaw) matched_visible += 1 assert matched_visible > 0 def test_visible_face_edges_to_yaw_matches_true_yaw_for_two_visible_gt_faces(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = 0.75 corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) face_edges = {} for face_type in (0, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None assert points_2d is not None face_edges[face_type] = points_3d yaw = visible_face_edges_to_yaw(face_edges, face_scores={0: 0.9, 3: 0.85}) diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-5), yaw def test_visible_face_edges_to_yaw_is_more_stable_with_two_noisy_gt_faces(): rng = np.random.default_rng(0) calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) single_errors = [] combined_errors = [] side_cases = [ (0.75, (0, 3)), (2.35, (0, 2)), (-0.75, (1, 3)), (-2.35, (1, 2)), ] for rot_y, visible_faces in side_cases: corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) base_edges = {} for face_type in visible_faces: points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None assert points_2d is not None base_edges[face_type] = points_3d for _ in range(200): noisy_edges = {} for face_type, points_3d in base_edges.items(): noise = np.zeros_like(points_3d) noise[:, [0, 2]] = rng.normal(0.0, 0.05, size=(points_3d.shape[0], 2)) noisy = points_3d + noise noisy_edges[face_type] = noisy.astype(np.float32) yaw = edge_points_to_yaw(noisy, face_type) diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi single_errors.append(abs(diff)) combined_yaw = visible_face_edges_to_yaw(noisy_edges, face_scores={visible_faces[0]: 0.9, visible_faces[1]: 0.9}) combined_diff = (combined_yaw - rot_y + np.pi) % (2 * np.pi) - np.pi combined_errors.append(abs(combined_diff)) mean_single = float(np.mean(single_errors)) mean_combined = float(np.mean(combined_errors)) assert mean_combined < mean_single * 0.9 def test_decode_3d_target_collects_all_visible_face_edges(): from ultralytics.utils.plotting_3d import decode_3d_target calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0} target = np.full(42, np.nan, dtype=np.float32) target[2] = 20.0 target[3:6] = [4.0, 1.5, 1.8] target[6] = 0.25 target[7:9] = [0.5, 0.6] target[10:18] = [0.0, 0.0, 18.0, 0.0, 0.45, 0.55, 0.8, 1.0] target[34:42] = [0.0, 0.0, 17.0, 0.0, 0.55, 0.52, 0.7, 1.0] decoded = decode_3d_target(target, 0, calib, 640, 480, {0}, set()) assert decoded is not None assert decoded["visible_face_types"] == (0, 3) assert decoded["edge_points_2d"].shape == (2, 5, 2) assert decoded["edge_points_3d"].shape == (2, 5, 3) def test_decode_multi_visible_face_yaw_from_prediction_uses_two_edges_for_valid_cut_objects(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([-7.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = -0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred_bbox_xyxy = np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32) pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[5] = 0.9 pred[39] = 10.0 # cut_in corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) front_points_3d, front_points_2d = project_face_bottom_edge(corners, 0, calib, num_samples=5) side_points_3d, side_points_2d = project_partial_face_bottom_edge(corners, 3, calib, 640, 480, num_samples=5) assert front_points_3d is not None and front_points_2d is not None assert side_points_3d is not None and side_points_2d is not None for face_type, points_3d, points_2d in ((0, front_points_3d, front_points_2d), (3, side_points_3d, side_points_2d)): off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] yaw = decode_multi_visible_face_yaw_from_prediction( pred, pred_edge, anchor, stride, calib, fallback_face_type=0, bbox_xyxy=pred_bbox_xyxy, img_w=640, ) diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-3) def test_decode_cut_partial_side_edge_from_prediction_requires_known_cut_side(): from ultralytics.utils.plotting_3d import decode_cut_partial_side_edge_from_prediction calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([-7.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = -0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[39] = 10.0 # cut_in corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) side_points_3d, side_points_2d = project_partial_face_bottom_edge(corners, 3, calib, 640, 480, num_samples=5) assert side_points_3d is not None and side_points_2d is not None face_block = pred_edge[45:60].reshape(5, 3) face_block[:, 0] = side_points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = side_points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = side_points_3d[:, 2] decoded = decode_cut_partial_side_edge_from_prediction(pred, pred_edge, anchor, stride, 640, cut_side=None) assert decoded is None def test_decode_cut_partial_side_edge_from_prediction_uses_geometry_for_cut_out_boxes(): from ultralytics.utils.plotting_3d import decode_cut_partial_side_edge_from_prediction calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 cases = [ (np.array([-8.0, 0.0, 20.0], dtype=np.float32), 2.6, "left", 2), (np.array([8.0, 0.0, 20.0], dtype=np.float32), 0.75, "right", 3), ] for center, rot_y, cut_side, face_type in cases: pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[38 + 2] = 10.0 # cut_out if rot_y >= np.pi / 2: pred[33] = 10.0 pred[37] = np.sin(rot_y - np.pi) else: pred[30] = 10.0 pred[34] = np.sin(rot_y) corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) for side_face_type in (2, 3): side_points_3d, side_points_2d = project_partial_face_bottom_edge( corners, side_face_type, calib, 640, 480, num_samples=5 ) assert side_points_3d is not None and side_points_2d is not None off = side_face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = side_points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = side_points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = side_points_3d[:, 2] decoded = decode_cut_partial_side_edge_from_prediction( pred, pred_edge, anchor, stride, 640, cut_side=cut_side, corners_3d=corners, ) assert decoded is not None assert decoded["face_type"] == face_type def test_decode_multi_visible_face_yaw_from_prediction_uses_two_visible_faces(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = 0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[5] = 0.9 pred[23] = 0.85 corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) for face_type in (0, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None assert points_2d is not None off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] yaw = decode_multi_visible_face_yaw_from_prediction(pred, pred_edge, anchor, stride, calib, fallback_face_type=0, img_w=640) diff = (yaw - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-5), yaw def test_decode_edge_yaw_selection_from_prediction_limits_near_cut_boxes_to_two_edges(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([-6.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = -0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 bbox_xyxy = np.array([120.0, 120.0, 240.0, 240.0], dtype=np.float32) pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[39] = 10.0 # cut_in, but the box is still fully inside the image corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) for face_type, score in ((0, 0.84), (1, 0.93), (3, 0.91)): face_center = face_center_from_corners(corners, face_type) face_uv = project_3d_to_2d(face_center[None, :], calib)[0] off = face_type * 6 pred[off] = face_center[2] pred[off + 1 : off + 3] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]] pred[off + 5] = score pred_edge = np.zeros(60, dtype=np.float32) for face_type in (0, 1, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None assert points_2d is not None off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] selection = decode_edge_yaw_selection_from_prediction( pred, pred_edge, anchor, stride, calib, bbox_xyxy=bbox_xyxy, img_w=640, img_h=480, max_lateral_dist_m=5.0, ) assert selection["face_types"] == (0, 3) assert selection["two_face_eligible"] is True assert selection["is_valid"] is True assert np.asarray(selection["edge_points_2d"], dtype=np.float32).shape == (2, 5, 2) diff = (selection["yaw"] - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-5), selection["yaw"] def test_decode_edge_yaw_selection_from_prediction_picks_higher_score_longitudinal_and_side_faces(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = 0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) for face_type, score in ((0, 0.95), (1, 0.99), (3, 0.80)): face_center = face_center_from_corners(corners, face_type) face_uv = project_3d_to_2d(face_center[None, :], calib)[0] off = face_type * 6 pred[off] = face_center[2] pred[off + 1 : off + 3] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]] pred[off + 5] = score pred_edge = np.zeros(60, dtype=np.float32) for face_type in (0, 1, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None and points_2d is not None off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] selection = decode_edge_yaw_selection_from_prediction( pred, pred_edge, anchor, stride, calib, img_w=640, img_h=480, max_lateral_dist_m=5.0, ) assert selection["face_types"] == (1, 3) assert selection["is_valid"] is True diff = (selection["yaw"] - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-5), selection["yaw"] def test_decode_edge_yaw_selection_from_prediction_uses_face_decode_best_visible_face_as_primary(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = 0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) for face_type, score in ((0, 0.90), (3, 0.95)): face_center = face_center_from_corners(corners, face_type) face_uv = project_3d_to_2d(face_center[None, :], calib)[0] off = face_type * 6 pred[off] = face_center[2] pred[off + 1 : off + 3] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]] pred[off + 5] = score pred_edge = np.zeros(60, dtype=np.float32) for face_type in (0, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None and points_2d is not None off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] selection = decode_edge_yaw_selection_from_prediction( pred, pred_edge, anchor, stride, calib, img_w=640, img_h=480, max_lateral_dist_m=30.0, ) assert selection["face_types"] == (3, 0) assert selection["two_face_eligible"] is True assert selection["is_valid"] is True diff = (selection["yaw"] - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-5), selection["yaw"] def test_decode_3d_prediction_uses_highest_score_visible_face_as_anchor(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = 0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) for face_type, score in ((0, 0.95), (1, 0.99), (3, 0.80)): face_center = face_center_from_corners(corners, face_type) face_uv = project_3d_to_2d(face_center[None, :], calib)[0] off = face_type * 6 pred[off] = face_center[2] pred[off + 1 : off + 3] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]] pred[off + 5] = score decoded = decode_3d_prediction( pred, anchor, stride, calib, 640, 480, {0}, set(), 0, pred_edge_60=None, ) assert decoded is not None assert decoded["visible_face_types"] == (0, 1, 3) assert decoded["visible_face_type"] == 1 def test_decode_3d_prediction_keeps_top1_face_even_below_visibility_threshold(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = 0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) face_center = face_center_from_corners(corners, 3) face_uv = project_3d_to_2d(face_center[None, :], calib)[0] pred[18] = face_center[2] pred[19:21] = [face_uv[0] / stride - anchor[0], face_uv[1] / stride - anchor[1]] pred[23] = 0.03 decoded = decode_3d_prediction( pred, anchor, stride, calib, 640, 480, {0}, set(), 0, pred_edge_60=None, ) assert decoded is not None assert decoded["visible_face_type"] == 3 assert decoded["visible_face_types"] == (3,) def test_decode_edge_yaw_selection_from_prediction_uses_face_decode_primary_face_before_strict_companion_gate(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = 0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[5] = 0.09 pred[23] = 0.09 corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) for face_type in (0, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None and points_2d is not None off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] selection = decode_edge_yaw_selection_from_prediction(pred, pred_edge, anchor, stride, calib, img_w=640, img_h=480) assert selection["face_types"] == (0,) assert selection["two_face_eligible"] is False assert selection["is_valid"] is False diff = (selection["yaw"] - rot_y + np.pi) % (2 * np.pi) - np.pi assert np.isclose(diff, 0.0, atol=1e-5), selection["yaw"] def test_decode_edge_yaw_selection_from_prediction_rejects_short_cut_side_visibility(): calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([-4.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = -0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 bbox_xyxy = np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32) pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[25:27] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[5] = 0.95 pred[39] = 10.0 # cut_in corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) front_points_3d, front_points_2d = project_face_bottom_edge(corners, 0, calib, num_samples=5) side_points_3d, _ = project_partial_face_bottom_edge(corners, 3, calib, 640, 480, num_samples=5) assert front_points_3d is not None and front_points_2d is not None assert side_points_3d is not None short_len = float(dims[0] * 0.4) side_dir = side_points_3d[-1] - side_points_3d[0] side_dir = side_dir / np.linalg.norm(side_dir) short_side_points_3d = side_points_3d[0] + np.linspace(0.0, short_len, 5, dtype=np.float32)[:, None] * side_dir[None, :] short_side_points_2d = project_3d_to_2d(short_side_points_3d, calib) assert np.all(np.isfinite(short_side_points_2d)) for face_type, points_3d, points_2d in ((0, front_points_3d, front_points_2d), (3, short_side_points_3d, short_side_points_2d)): off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] selection = decode_edge_yaw_selection_from_prediction( pred, pred_edge, anchor, stride, calib, bbox_xyxy=bbox_xyxy, img_w=640, img_h=480, max_lateral_dist_m=5.0, ) assert selection["cut_side_visible_length_ratio"] is not None assert selection["cut_side_visible_length_ratio"] < 0.5 assert selection["cut_side_visible_ratio_ok"] is False assert selection["is_valid"] is False def test_decode_3d_prediction_ignores_cut_logits_for_non_border_box(): from ultralytics.utils.plotting_3d import decode_3d_prediction calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([-6.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = -0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 bbox_xyxy = np.array([120.0, 120.0, 240.0, 240.0], dtype=np.float32) pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[0] = center[2] pred[1:3] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[18] = center[2] pred[19:21] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[5] = 0.9 pred[23] = 0.8 pred[39] = 10.0 # raw cut_in logit, but the box is not clipped at the border corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) for face_type in (0, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None assert points_2d is not None off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] decoded = decode_3d_prediction( pred, anchor, stride, calib, 640, 480, {0}, set(), 0, pred_edge_60=pred_edge, bbox_xyxy=bbox_xyxy, ) assert decoded is not None assert decoded["visible_face_types"] == (0, 3) assert decoded["edge_points_2d"].shape == (2, 5, 2) def test_decode_3d_prediction_collects_all_visible_face_edges(): from ultralytics.utils.plotting_3d import decode_3d_prediction calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([0.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = 0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[0] = center[2] pred[1:3] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[5] = 0.9 pred[23] = 0.85 corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) for face_type in (0, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None assert points_2d is not None off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] decoded = decode_3d_prediction( pred, anchor, stride, calib, 640, 480, {0}, set(), 0, pred_edge_60=pred_edge, bbox_xyxy=np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32), ) assert decoded is not None assert decoded["visible_face_types"] == (0, 3) assert decoded["edge_points_2d"].shape == (2, 5, 2) assert decoded["edge_points_3d"].shape == (2, 5, 3) def test_decode_3d_prediction_does_not_duplicate_partial_side_face_when_already_visible(): from ultralytics.utils.plotting_3d import decode_3d_prediction calib = {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0} center = np.array([-7.0, 0.0, 20.0], dtype=np.float32) dims = np.array([4.0, 1.5, 1.8], dtype=np.float32) rot_y = -0.75 anchor = np.array([10.0, 20.0], dtype=np.float32) stride = 8.0 pred_bbox_xyxy = np.array([0.0, 120.0, 80.0, 240.0], dtype=np.float32) pred = np.zeros(41, dtype=np.float32) pred[24] = center[2] pred[27:30] = dims pred[30] = 10.0 pred[34] = np.sin(rot_y) pred[0] = center[2] pred[1:3] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[18] = center[2] pred[19:21] = [center[0] / center[2] * calib["fx"] / stride + (calib["cx"] / stride - anchor[0]), 0.0] pred[5] = 0.9 pred[23] = 0.8 pred[39] = 10.0 # cut_in corners = compute_3d_box_corners(center, dims, rot_y, face_type=-1) pred_edge = np.zeros(60, dtype=np.float32) for face_type in (0, 3): points_3d, points_2d = project_face_bottom_edge(corners, face_type, calib, num_samples=5) assert points_3d is not None assert points_2d is not None off = face_type * 15 face_block = pred_edge[off : off + 15].reshape(5, 3) face_block[:, 0] = points_2d[:, 0] / stride - anchor[0] face_block[:, 1] = points_2d[:, 1] / stride - anchor[1] face_block[:, 2] = points_3d[:, 2] decoded = decode_3d_prediction( pred, anchor, stride, calib, 640, 480, {0}, set(), 0, pred_edge_60=pred_edge, bbox_xyxy=pred_bbox_xyxy, ) assert decoded is not None assert decoded["visible_face_types"] == (0, 3) assert decoded["edge_points_2d"].shape == (2, 5, 2) assert decoded["edge_points_3d"].shape == (2, 5, 3) def test_face_metric_store_counts_visible_yaw_objectwise(): validator = Ground3DDetectionValidator.__new__(Ground3DDetectionValidator) validator.stats_3d = {"whole": [], "face": []} face_pred = validator._metric_store() face_gt = validator._metric_store() pred_rows = [ { "center": np.array([1.0, 2.0, 10.0], dtype=np.float32), "depth": 10.0, "yaw": 0.1, "edge_yaw": 0.12, "dims": np.array([4.0, 1.5, 1.8], dtype=np.float32), "uv": np.array([100.0, 120.0], dtype=np.float32), }, { "center": np.array([1.1, 2.1, 10.1], dtype=np.float32), "depth": 10.1, "yaw": 0.1, "edge_yaw": 0.13, "dims": np.array([4.0, 1.5, 1.8], dtype=np.float32), "uv": np.array([104.0, 124.0], dtype=np.float32), }, ] gt_rows = [ { "center": np.array([1.0, 2.0, 10.0], dtype=np.float32), "depth": 10.0, "yaw": 0.15, "edge_yaw": 0.15, "dims": np.array([4.0, 1.5, 1.8], dtype=np.float32), "uv": np.array([101.0, 121.0], dtype=np.float32), }, { "center": np.array([1.0, 2.0, 10.0], dtype=np.float32), "depth": 10.0, "yaw": 0.15, "edge_yaw": 0.15, "dims": np.array([4.0, 1.5, 1.8], dtype=np.float32), "uv": np.array([105.0, 125.0], dtype=np.float32), }, ] for pred_row, gt_row in zip(pred_rows, gt_rows): validator._append_metric_attr(face_pred, pred_row) validator._append_metric_attr(face_gt, gt_row) validator._aggregate_face_metric_store( face_pred, face_gt, pred_visible_yaw=[0.1], pred_edge_visible_yaw=[0.12], gt_visible_target_yaw=[0.15], ) metrics = validator.stats_3d["face"][0] assert metrics["matched"] == 2 assert metrics["_direct_orient_visible_matched"] == 1 assert metrics["_edge_orient_visible_matched"] == 1 assert np.isclose(metrics["direct_orient_visible"], np.degrees(0.05)) assert np.isclose(metrics["edge_orient_visible"], np.degrees(0.03)) def test_validator_desc_and_row_alignment_include_vyaw(): from ultralytics.models.yolo.detect.train import Ground3DDetectionValidator validator = Ground3DDetectionValidator.__new__(Ground3DDetectionValidator) validator.metrics_3d_results = { "whole": {"matched": 1, "depth_abs": 0.1, "uv": 0.2, "size": 0.3, "orient": 0.4}, "face": { "matched": 2, "depth_abs": 0.5, "uv": 0.6, "size": 0.7, "edge_orient_visible": 0.8, }, } validator.metrics = SimpleNamespace(keys=("P", "R", "mAP50", "mAP50-95"), nt_per_class=np.array([1]), mean_results=lambda: (0, 0, 0, 0), nt_per_image=np.array([1]), ap_class_index=np.array([0]), stats=[]) validator.seen = 1 validator.training = False validator.args = SimpleNamespace(verbose=False) validator.nc = 1 validator.names = {0: "car"} validator.trainer = SimpleNamespace( loss_names=("box", "cls", "dfl", "z3d", "uv", "size", "ycls", "ydeg", "ccls", "fz", "fuv", "fsize", "fcls", "euv", "ez") ) desc = validator.get_desc() assert "Vyaw" in desc row_values = [ "", "all-3d", f"{validator.metrics_3d_results['whole']['matched']}", f"{validator.metrics_3d_results['face']['matched']}", "-", f"{validator.metrics_3d_results['whole']['depth_abs']:.3g}", f"{validator.metrics_3d_results['whole']['uv']:.3g}", f"{validator.metrics_3d_results['whole']['size']:.3g}", "-", f"{validator.metrics_3d_results['whole']['orient']:.3g}", "-", f"{validator.metrics_3d_results['face']['depth_abs']:.3g}", f"{validator.metrics_3d_results['face']['uv']:.3g}", f"{validator.metrics_3d_results['face']['size']:.3g}", "-", f"{validator.metrics_3d_results['face']['edge_orient_visible']:.3g}", ] assert len(row_values) == 1 + len(validator.trainer.loss_names) def test_3d_loss_logs_skip_virtual_samples_when_roi_metrics_only_enabled(): import torch from ultralytics.utils.loss import v8Detection3DLoss loss = object.__new__(v8Detection3DLoss) loss.hyp = type("Hyp", (), {"roi_metrics_only": True})() loss.face_3d_classes = {0} loss.complete_3d_classes = set() loss.norm_scales_3d = {} loss.device = torch.device("cpu") loss.l1_loss = torch.nn.L1Loss(reduction="sum") loss.l1_loss_none = torch.nn.L1Loss(reduction="none") loss.bce_yaw = torch.nn.BCEWithLogitsLoss(reduction="sum") loss.ce_cut = torch.nn.CrossEntropyLoss(reduction="sum") loss.edge_loss_gain = 0.1 preds = {"preds_3d": torch.zeros((2, 41, 1), dtype=torch.float32), "preds_edge": torch.zeros((2, 60, 1), dtype=torch.float32)} preds["preds_3d"][0, 0, 0] = 10.0 preds["preds_3d"][0, 24, 0] = 20.0 preds["preds_3d"][1, 0, 0] = 110.0 preds["preds_3d"][1, 24, 0] = 120.0 batch = { "labels_3d": torch.full((2, 42), float("nan"), dtype=torch.float32), "batch_idx": torch.tensor([0, 1], dtype=torch.int64), "cls": torch.tensor([[0.0], [0.0]], dtype=torch.float32), "camera_mode": ("roi", "virtual"), "calib": ({"depth_scale": 1.0}, {"depth_scale": 1.0}), } batch["labels_3d"][0, 2] = 20.0 batch["labels_3d"][0, 3:6] = torch.tensor([4.0, 1.5, 1.8]) batch["labels_3d"][0, 6] = 0.0 batch["labels_3d"][0, 10:18] = torch.tensor([0.0, 0.0, 10.0, 0.0, 0.5, 0.5, 0.8, 1.0]) batch["labels_3d"][1, 2] = 120.0 batch["labels_3d"][1, 3:6] = torch.tensor([4.0, 1.5, 1.8]) batch["labels_3d"][1, 6] = 0.0 batch["labels_3d"][1, 10:18] = torch.tensor([0.0, 0.0, 100.0, 0.0, 0.5, 0.5, 0.8, 1.0]) fg_mask = torch.tensor([[True], [True]]) target_gt_idx = torch.tensor([[0], [0]], dtype=torch.int64) anchor_points = torch.tensor([[0.0, 0.0]], dtype=torch.float32) stride_tensor = torch.tensor([[1.0]], dtype=torch.float32) imgsz = torch.tensor([1.0, 1.0], dtype=torch.float32) _, log_items = loss._compute_3d_loss(preds, batch, fg_mask, target_gt_idx, anchor_points, stride_tensor, imgsz) assert torch.isclose(log_items[6], torch.tensor(0.0)) assert torch.isclose(log_items[0], torch.tensor(0.0)) def test_edge_depth_targets_are_normalized_back_to_model_space(): import torch from ultralytics.utils.loss import _normalize_edge_depth_targets_to_model_space depths_metric = torch.tensor([[20.0, 24.0, 28.0]], dtype=torch.float32) normalized = _normalize_edge_depth_targets_to_model_space(depths_metric, torch.tensor(2.0)) assert torch.allclose(normalized, torch.tensor([[10.0, 12.0, 14.0]], dtype=torch.float32)) def test_3d_loss_gather_assigned_targets_flattens_local_indices(): import torch from ultralytics.utils.loss import v8Detection3DLoss labels_3d = torch.arange(4 * 42, dtype=torch.float32).reshape(4, 42) cls_all = torch.tensor([0.0, 1.0, 2.0, 3.0], dtype=torch.float32) gt_offsets = torch.tensor([0, 2, 4], dtype=torch.long) gt_counts = torch.tensor([2, 2], dtype=torch.long) gt_idx_i = torch.tensor([[0], [1]], dtype=torch.int64) gt_indices_i, gt_3d_i, cls_i = v8Detection3DLoss._gather_assigned_3d_targets( labels_3d, cls_all, gt_offsets, gt_counts, gt_idx_i, image_idx=1 ) assert gt_indices_i.shape == (2,) assert gt_indices_i.tolist() == [2, 3] assert torch.equal(gt_3d_i, labels_3d[2:4]) assert torch.equal(cls_i, cls_all[2:4]) def test_3d_loss_gather_assigned_targets_rejects_out_of_range_local_indices(): import torch from ultralytics.utils.loss import v8Detection3DLoss labels_3d = torch.zeros((4, 42), dtype=torch.float32) cls_all = torch.zeros(4, dtype=torch.float32) gt_offsets = torch.tensor([0, 2, 4], dtype=torch.long) gt_counts = torch.tensor([2, 2], dtype=torch.long) gt_idx_i = torch.tensor([0, 2], dtype=torch.int64) with pytest.raises(RuntimeError, match="Assigned GT index out of range"): v8Detection3DLoss._gather_assigned_3d_targets(labels_3d, cls_all, gt_offsets, gt_counts, gt_idx_i, image_idx=1) def test_3d_loss_edge_branch_keeps_gt_offsets_for_next_image(): import torch from ultralytics.utils.loss import v8Detection3DLoss loss = object.__new__(v8Detection3DLoss) loss.hyp = type("Hyp", (), {"roi_metrics_only": False})() loss.face_3d_classes = {0} loss.complete_3d_classes = set() loss.norm_scales_3d = {} loss.device = torch.device("cpu") loss.l1_loss = torch.nn.L1Loss(reduction="sum") loss.l1_loss_none = torch.nn.L1Loss(reduction="none") loss.bce_yaw = torch.nn.BCEWithLogitsLoss(reduction="sum") loss.ce_cut = torch.nn.CrossEntropyLoss(reduction="sum") loss.edge_loss_gain = 0.1 preds = { "preds_3d": torch.zeros((2, 41, 1), dtype=torch.float32), "preds_edge": torch.zeros((2, 60, 1), dtype=torch.float32), } batch = { "labels_3d": torch.full((2, 42), float("nan"), dtype=torch.float32), "batch_idx": torch.tensor([0, 1], dtype=torch.int64), "cls": torch.tensor([[0.0], [0.0]], dtype=torch.float32), "camera_mode": ("roi", "roi"), "calib": ( {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}, {"fx": 500.0, "fy": 500.0, "cx": 320.0, "cy": 240.0, "depth_scale": 1.0}, ), } for row, depth in enumerate((20.0, 22.0)): batch["labels_3d"][row, 2] = depth batch["labels_3d"][row, 3:6] = torch.tensor([4.0, 1.5, 1.8]) batch["labels_3d"][row, 6] = -0.6 batch["labels_3d"][row, 7:9] = torch.tensor([0.5, 0.55]) batch["labels_3d"][row, 10:18] = torch.tensor([0.0, 0.0, depth - 2.0, 0.0, 0.46, 0.55, 0.9, 1.0]) fg_mask = torch.tensor([[True], [True]]) target_gt_idx = torch.tensor([[0], [0]], dtype=torch.int64) anchor_points = torch.tensor([[0.0, 0.0]], dtype=torch.float32) stride_tensor = torch.tensor([[8.0]], dtype=torch.float32) imgsz = torch.tensor([480.0, 640.0], dtype=torch.float32) opt_items, log_items = loss._compute_3d_loss(preds, batch, fg_mask, target_gt_idx, anchor_points, stride_tensor, imgsz) assert opt_items.shape == (9,) assert log_items.shape == (12,) assert torch.isfinite(opt_items).all() assert torch.isfinite(log_items).all()