yolov26_3d/ultralytics/utils/plotting_3d.py

# Ultralytics AGPL-3.0 License - https://ultralytics.com/license

"""3D detection visualization utilities.

Provides functions for decoding 3D predictions, projecting 3D boxes to 2D,
and drawing 3D wireframe boxes on images. Ported from yolov5-3d/utils/plots.py.
"""

import cv2
import numpy as np


def _default_face_visibility_score_thresh():
    """Return the configured visible-face threshold, even when this module is imported standalone."""
    try:
        from ultralytics.utils import DEFAULT_CFG

        return float(getattr(DEFAULT_CFG, "face_visibility_score_thresh", 0.05))
    except Exception:
        return 0.05


YAW_BIN_OFFSETS = (0.0, np.pi / 2, -np.pi / 2, np.pi)
FACE_OFFSETS_42 = (10, 18, 26, 34)
FACE_OFFSETS_41 = (0, 6, 12, 18)
FACE_EDGE_OFFSETS_60 = (0, 15, 30, 45)
FACE_CORNERS = {0: (4, 5, 6, 7), 1: (0, 1, 2, 3), 2: (1, 2, 5, 6), 3: (0, 3, 4, 7)}
FACE_BOTTOM_EDGE_CORNERS = {0: (6, 7), 1: (2, 3), 2: (2, 6), 3: (3, 7)}
FACE_VISIBILITY_SCORE_THRESH = _default_face_visibility_score_thresh()
# Edge-yaw keeps the face-based visible-face threshold for the primary face, but uses a stricter gate for the
# optional second face in the two-face bucket.
EDGE_YAW_VALID_VISIBILITY_SCORE_THRESH = 0.1
EDGE_YAW_CUT_SIDE_MIN_VISIBLE_LENGTH_RATIO = 0.5
EDGE_YAW_MAX_LATERAL_DIST_M = 30.0
CUT_STATE_NORMAL = 0
CUT_STATE_IN = 1
CUT_STATE_OUT = 2
FACE_COLORS = ((0, 0, 255), (255, 0, 0), (0, 255, 0), (0, 255, 255))


def rotation_3d_in_axis(points, angles, axis=1):
    """Rotate points around a specified axis.

    Args:
        points: (N, 3) array of 3D points.
        angles: Rotation angle in radians (scalar).
        axis: 0=X, 1=Y, 2=Z.

    Returns:
        Rotated points (N, 3).
    """
    rot_sin = np.sin(angles)
    rot_cos = np.cos(angles)
    ones = np.ones_like(rot_cos)
    zeros = np.zeros_like(rot_cos)
    if axis == 1:  # Y axis (X=right, Y=down, Z=forward)
        rot_mat = np.stack([
            np.stack([rot_cos, zeros, -rot_sin]),
            np.stack([zeros, ones, zeros]),
            np.stack([rot_sin, zeros, rot_cos]),
        ])
    elif axis == 2:
        rot_mat = np.stack([
            np.stack([rot_cos, rot_sin, zeros]),
            np.stack([-rot_sin, rot_cos, zeros]),
            np.stack([zeros, zeros, ones]),
        ])
    elif axis == 0:
        rot_mat = np.stack([
            np.stack([ones, zeros, zeros]),
            np.stack([zeros, rot_cos, rot_sin]),
            np.stack([zeros, -rot_sin, rot_cos]),
        ])
    else:
        raise ValueError(f"axis should be in [0, 1, 2], got {axis}")
    return np.dot(points, rot_mat)


def compute_3d_box_corners(center_3d, dimensions, rotation, face_type=-1):
    """Compute 8 corners of a 3D bounding box.

    When face_type >= 0, center_3d is the center of that face (not box center).

    Args:
        center_3d: (x, y, z) center position in camera coordinates.
        dimensions: (length, height, width) of the box.
        rotation: rot_y (rotation around y-axis in radians).
        face_type: -1=box center, 0=front, 1=rear, 2=left, 3=right.

    Returns:
        corners: (8, 3) array of corner coordinates.
    """
    l, h, w = dimensions

    # 8 corners via unravel_index pattern, reordered
    corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1).astype(np.float64)
    corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]

    # Offset based on face type
    offsets = {0: [1, 0.5, 0.5], 1: [0, 0.5, 0.5], 2: [0.5, 0.5, 1], 3: [0.5, 0.5, 0]}
    corners_norm -= offsets.get(face_type, [0.5, 0.5, 0.5])

    # Scale by dimensions and rotate
    corners = np.array([l, h, w]).reshape(1, 3) * corners_norm.reshape(8, 3)
    corners = rotation_3d_in_axis(corners, rotation, axis=1)
    corners += np.array(center_3d).reshape(1, 3)
    return corners


def apply_fisheye_distortion(x, y, distort_coeffs):
    """Apply Kannala-Brandt fisheye distortion to normalized camera coordinates."""
    if distort_coeffs is None or len(distort_coeffs) < 4:
        return x, y

    k1, k2, k3, k4 = distort_coeffs[:4]
    r = np.sqrt(x * x + y * y)
    if r < 1e-8:
        return x, y

    theta = np.arctan(r)
    theta2 = theta * theta
    theta4 = theta2 * theta2
    theta6 = theta4 * theta2
    theta8 = theta4 * theta4
    theta_d = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8)
    scale = theta_d / r
    return x * scale, y * scale


def remove_fisheye_distortion(xd, yd, distort_coeffs, max_iter=20):
    """Remove Kannala-Brandt fisheye distortion from normalized camera coordinates."""
    if distort_coeffs is None or len(distort_coeffs) < 4:
        return xd, yd

    k1, k2, k3, k4 = distort_coeffs[:4]
    r_d = np.sqrt(xd * xd + yd * yd)
    if r_d < 1e-8:
        return xd, yd

    theta_d = r_d
    theta_d2 = theta_d * theta_d
    theta = theta_d / (1 + k1 * theta_d2)

    for _ in range(max_iter):
        theta2 = theta * theta
        theta4 = theta2 * theta2
        theta6 = theta4 * theta2
        theta8 = theta4 * theta4
        f = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8) - theta_d
        f_prime = 1 + 3 * k1 * theta2 + 5 * k2 * theta4 + 7 * k3 * theta6 + 9 * k4 * theta8
        theta_new = theta - f / f_prime
        if abs(theta_new - theta) < 1e-8:
            theta = theta_new
            break
        theta = theta_new

    r = np.tan(theta)
    scale = r / r_d
    return xd * scale, yd * scale


def project_3d_to_2d_with_distortion(points_3d, calib):
    """Project 3D points with fisheye distortion-aware calibration."""
    fx, fy = calib["fx"], calib["fy"]
    cx, cy = calib["cx"], calib["cy"]
    distort_coeffs = calib.get("distort_coeffs", [])

    points_2d = np.full((len(points_3d), 2), np.nan)
    for i, (x, y, z) in enumerate(points_3d):
        if z > 0.1:
            xn, yn = x / z, y / z
            xd, yd = apply_fisheye_distortion(xn, yn, distort_coeffs)
            points_2d[i] = [fx * xd + cx, fy * yd + cy]
    return points_2d


def project_3d_to_2d_with_calib(points_3d, calib):
    """Project 3D points with standard pinhole calibration."""
    fx, fy = calib["fx"], calib["fy"]
    cx, cy = calib["cx"], calib["cy"]

    points_2d = np.full((len(points_3d), 2), np.nan)
    for i, (x, y, z) in enumerate(points_3d):
        if z > 0.1:
            points_2d[i] = [fx * x / z + cx, fy * y / z + cy]
    return points_2d


def project_3d_to_2d(points_3d, calib):
    """Project 3D points to 2D using the provided calibration model."""
    if calib is None:
        return np.full((len(points_3d), 2), np.nan)
    if calib.get("distort_coeffs") is not None and len(calib.get("distort_coeffs", [])) >= 4:
        return project_3d_to_2d_with_distortion(points_3d, calib)
    return project_3d_to_2d_with_calib(points_3d, calib)


def sample_3d_edge(p1, p2, num_samples=10):
    """Sample 3D points uniformly along a box edge."""
    t = np.linspace(0, 1, num_samples).reshape(-1, 1)
    return p1 + t * (p2 - p1)


def _point_inside_image(point_2d, img_w, img_h):
    """Return whether a projected point lies inside the image bounds."""
    x, y = float(point_2d[0]), float(point_2d[1])
    return np.isfinite(x) and np.isfinite(y) and 0.0 <= x <= img_w - 1 and 0.0 <= y <= img_h - 1


def _solve_edge_image_boundary_t(p0_2d, p1_2d, img_w, img_h):
    """Return the parametric interval whose projected segment lies inside the image."""
    p0 = np.asarray(p0_2d, dtype=np.float64)
    p1 = np.asarray(p1_2d, dtype=np.float64)
    if not np.isfinite(p0).all() or not np.isfinite(p1).all():
        return None

    dx, dy = p1 - p0
    t_min, t_max = 0.0, 1.0
    for p, q in ((-dx, p0[0]), (dx, (img_w - 1) - p0[0]), (-dy, p0[1]), (dy, (img_h - 1) - p0[1])):
        if abs(p) < 1e-12:
            if q < 0:
                return None
            continue
        t = q / p
        if p < 0:
            t_min = max(t_min, t)
        else:
            t_max = min(t_max, t)
        if t_min > t_max:
            return None
    return t_min, t_max


def _project_edge_point_at_t(p1, p2, t, calib):
    """Project a single parametric point on a 3D edge."""
    point_3d = np.asarray(p1, dtype=np.float64) + float(t) * (np.asarray(p2, dtype=np.float64) - np.asarray(p1, dtype=np.float64))
    point_2d = project_3d_to_2d(point_3d[None, :], calib)[0]
    return point_3d, point_2d


def _refine_visible_edge_boundary(p1, p2, calib, img_w, img_h, t_out, t_in, steps=12):
    """Refine one visible/hidden transition on a projected 3D edge."""
    lo, hi = (float(t_out), float(t_in)) if t_out < t_in else (float(t_in), float(t_out))
    for _ in range(steps):
        mid = 0.5 * (lo + hi)
        _, point_2d = _project_edge_point_at_t(p1, p2, mid, calib)
        if _point_inside_image(point_2d, img_w, img_h):
            hi = mid
        else:
            lo = mid
    return hi if t_out < t_in else lo


def sample_partial_3d_edge(p1, p2, calib, img_w, img_h, num_samples=5, dense_samples=129):
    """Sample exactly ``num_samples`` points from the visible sub-segment of a projected 3D edge."""
    endpoints_3d = np.asarray([p1, p2], dtype=np.float64)
    dense_t = np.linspace(0.0, 1.0, dense_samples, dtype=np.float64)
    dense_points_3d = endpoints_3d[0:1] + dense_t[:, None] * (endpoints_3d[1:2] - endpoints_3d[0:1])
    dense_points_2d = project_3d_to_2d(dense_points_3d, calib)
    visible = np.array([_point_inside_image(point_2d, img_w, img_h) for point_2d in dense_points_2d], dtype=bool)
    if not visible.any():
        return None, None

    visible_idx = np.flatnonzero(visible)
    split_idx = np.where(np.diff(visible_idx) > 1)[0] + 1
    visible_runs = np.split(visible_idx, split_idx)
    visible_run = max(visible_runs, key=len)
    first_idx, last_idx = int(visible_run[0]), int(visible_run[-1])

    t_start = dense_t[first_idx]
    if first_idx > 0:
        t_start = _refine_visible_edge_boundary(
            endpoints_3d[0], endpoints_3d[1], calib, img_w, img_h, dense_t[first_idx - 1], dense_t[first_idx]
        )

    t_end = dense_t[last_idx]
    if last_idx < len(dense_t) - 1:
        t_end = _refine_visible_edge_boundary(
            endpoints_3d[0], endpoints_3d[1], calib, img_w, img_h, dense_t[last_idx + 1], dense_t[last_idx]
        )

    if t_end - t_start < 1e-6:
        return None, None

    sample_t = np.linspace(t_start, t_end, num_samples, dtype=np.float64)
    sample_points_3d = endpoints_3d[0:1] + sample_t[:, None] * (endpoints_3d[1:2] - endpoints_3d[0:1])
    sample_points_2d = project_3d_to_2d(sample_points_3d, calib)
    if np.any(np.isnan(sample_points_2d)):
        return None, None
    if not np.all([_point_inside_image(point_2d, img_w, img_h) for point_2d in sample_points_2d]):
        return None, None

    order = np.argsort(sample_points_2d[:, 0], kind="stable")
    return sample_points_3d[order], sample_points_2d[order]


def project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=10):
    """Project sampled 3D box edges for distortion-aware wireframe drawing."""
    edges = {
        "back_0": (4, 5), "back_1": (5, 6), "back_2": (6, 7), "back_3": (7, 4),
        "connect_0": (0, 4), "connect_1": (1, 5), "connect_2": (2, 6), "connect_3": (3, 7),
        "front_0": (0, 1), "front_1": (1, 2), "front_2": (2, 3), "front_3": (3, 0),
        "front_x1": (0, 2), "front_x2": (1, 3),
    }

    edge_points_2d = {}
    for edge_name, (i, j) in edges.items():
        sampled_3d = sample_3d_edge(corners_3d[i], corners_3d[j], samples_per_edge)
        edge_points_2d[edge_name] = project_3d_to_2d_with_distortion(sampled_3d, calib)
    return edge_points_2d


def plot_box3d_on_img_with_distortion(
    img, edge_points_2d, color_front=(0, 0, 255), color_back=(255, 0, 0), color_side=(255, 255, 0), thickness=1
):
    """Draw a 3D box using distortion-aware projected edge samples."""
    front_edges = {"front_0", "front_1", "front_2", "front_3", "front_x1", "front_x2"}
    back_edges = {"back_0", "back_1", "back_2", "back_3", "back_x1", "back_x2"}

    for edge_name, points in edge_points_2d.items():
        if np.any(np.isnan(points)):
            continue
        pts = points.astype(np.int32)
        color = color_front if edge_name in front_edges else color_back if edge_name in back_edges else color_side
        cv2.polylines(img, [pts], isClosed=False, color=color, thickness=thickness, lineType=cv2.LINE_AA)
    return img


def plot_box3d_on_img(img, corners_2d, color_front=(0, 0, 255), color_back=(255, 0, 0), color_side=(255, 255, 0), thickness=1):
    """Draw a 3D wireframe box from projected 2D corners."""
    line_indices = (
        (4, 5), (5, 6), (6, 7), (7, 4),
        (0, 4), (1, 5), (2, 6), (3, 7),
        (0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3),
    )
    front_edges = {(0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3)}
    back_edges = {(4, 5), (5, 6), (6, 7), (7, 4)}

    pts = corners_2d.astype(np.int32)
    for i, j in line_indices:
        color = color_front if (i, j) in front_edges else color_back if (i, j) in back_edges else color_side
        cv2.line(img, tuple(pts[i]), tuple(pts[j]), color, thickness, cv2.LINE_AA)
    return img


def back_project_2d_to_3d(uv, depth, calib):
    """Back-project a pixel point to camera coordinates, removing distortion when needed."""
    if calib is None or depth <= 0:
        return None

    fx, fy = calib["fx"], calib["fy"]
    cx, cy = calib["cx"], calib["cy"]
    u, v = uv
    xd = (u - cx) / fx
    yd = (v - cy) / fy

    distort_coeffs = calib.get("distort_coeffs", [])
    if distort_coeffs is not None and len(distort_coeffs) >= 4:
        xn, yn = remove_fisheye_distortion(xd, yd, distort_coeffs)
    else:
        xn, yn = xd, yd

    return np.array([xn * depth, yn * depth, depth], dtype=np.float64)


def reconstruct_3d_box_from_face(face_uv, face_z, dims, rot_y, face_type, calib):
    """Reconstruct 3D box corners from a visible face center."""
    if calib is None or face_z <= 0:
        return None

    center_3d = back_project_2d_to_3d(face_uv, face_z, calib)
    if center_3d is None:
        return None

    l, h, w = dims
    if any(np.isnan(x) for x in (l, h, w, rot_y)):
        return None

    return compute_3d_box_corners(center_3d, dims, rot_y, face_type)


def reconstruct_3d_box_from_whole(uv, z3d, dims, rot_y, calib):
    """Reconstruct 3D box corners from whole-box center."""
    if calib is None or z3d <= 0:
        return None

    center_3d = back_project_2d_to_3d(uv, z3d, calib)
    if center_3d is None:
        return None

    l, h, w = dims
    if any(np.isnan(x) for x in (l, h, w, rot_y)):
        return None

    return compute_3d_box_corners(center_3d, dims, rot_y, face_type=-1)


def get_face_bottom_edge_points(corners_3d, face_type, num_samples=5):
    """Sample points along the requested visible face bottom edge."""
    if corners_3d is None or face_type not in FACE_BOTTOM_EDGE_CORNERS:
        return None
    start_idx, end_idx = FACE_BOTTOM_EDGE_CORNERS[face_type]
    points_3d = sample_3d_edge(corners_3d[start_idx], corners_3d[end_idx], num_samples=num_samples)
    return points_3d


def project_face_bottom_edge(corners_3d, face_type, calib, num_samples=5):
    """Project sampled visible-face bottom-edge points to the image plane."""
    points_3d = get_face_bottom_edge_points(corners_3d, face_type, num_samples=num_samples)
    if points_3d is None:
        return None, None
    points_2d = project_3d_to_2d(points_3d, calib)
    if np.any(np.isnan(points_2d)):
        return points_3d, None
    order = np.argsort(points_2d[:, 0], kind="stable")
    return points_3d[order], points_2d[order]


def project_partial_face_bottom_edge(corners_3d, face_type, calib, img_w, img_h, num_samples=5):
    """Project exactly ``num_samples`` points from the visible sub-segment of a face bottom edge."""
    if corners_3d is None or face_type not in FACE_BOTTOM_EDGE_CORNERS:
        return None, None
    start_idx, end_idx = FACE_BOTTOM_EDGE_CORNERS[face_type]
    return sample_partial_3d_edge(corners_3d[start_idx], corners_3d[end_idx], calib, img_w, img_h, num_samples=num_samples)


def collect_face_bottom_edges(corners_3d, face_types, calib, num_samples=5):
    """Project sampled bottom-edge points for all requested visible faces."""
    if corners_3d is None:
        return None, None

    edge_points_3d, edge_points_2d = [], []
    for face_type in face_types:
        points_3d, points_2d = project_face_bottom_edge(corners_3d, face_type, calib, num_samples=num_samples)
        if points_3d is None or points_2d is None:
            continue
        edge_points_3d.append(points_3d.astype(np.float32, copy=False))
        edge_points_2d.append(points_2d.astype(np.float32, copy=False))

    if not edge_points_2d:
        return None, None
    if len(edge_points_2d) == 1:
        return edge_points_3d[0], edge_points_2d[0]
    return np.stack(edge_points_3d, axis=0), np.stack(edge_points_2d, axis=0)


def _edge_batches_to_list(edge_points):
    """Normalize edge sample arrays to a list of `(5, D)` arrays."""
    if edge_points is None:
        return []
    arr = np.asarray(edge_points, dtype=np.float32)
    if arr.ndim == 2:
        return [arr]
    return [arr[i] for i in range(arr.shape[0])]


def _stack_edge_batches(edge_batches):
    """Convert a list of edge sample arrays back to the legacy stacked representation."""
    if not edge_batches:
        return None
    if len(edge_batches) == 1:
        return edge_batches[0]
    return np.stack(edge_batches, axis=0)


def _append_edge_batch(edge_points_3d, edge_points_2d, decoded_edge):
    """Append one decoded edge sample set to stacked edge arrays."""
    if decoded_edge is None:
        return edge_points_3d, edge_points_2d
    edge3d_list = _edge_batches_to_list(edge_points_3d)
    edge2d_list = _edge_batches_to_list(edge_points_2d)
    edge3d_list.append(np.asarray(decoded_edge["points_3d"], dtype=np.float32))
    edge2d_list.append(np.asarray(decoded_edge["points_2d"], dtype=np.float32))
    return _stack_edge_batches(edge3d_list), _stack_edge_batches(edge2d_list)


def collect_precomputed_edge_points_2d(edge_faces_points_2d, edge_faces_valid=None, visible_face_types=()):
    """Convert one object's precomputed face-edge tensors into drawable polyline batches."""
    if edge_faces_points_2d is None:
        return None

    points = np.asarray(edge_faces_points_2d, dtype=np.float32)
    if points.ndim != 3 or points.shape[0] == 0:
        return None

    if edge_faces_valid is None:
        valid = np.ones(points.shape[0], dtype=bool)
    else:
        valid = np.asarray(edge_faces_valid, dtype=bool).reshape(-1)
        if valid.shape[0] < points.shape[0]:
            valid = np.pad(valid, (0, points.shape[0] - valid.shape[0]), constant_values=False)
        else:
            valid = valid[: points.shape[0]]

    face_order = []
    for face_type in visible_face_types or ():
        face_type = int(face_type)
        if 0 <= face_type < points.shape[0] and valid[face_type] and face_type not in face_order:
            face_order.append(face_type)
    for face_type in np.flatnonzero(valid):
        face_type = int(face_type)
        if face_type not in face_order:
            face_order.append(face_type)

    if not face_order:
        return None
    return _stack_edge_batches([points[face_type].astype(np.float32, copy=False) for face_type in face_order])


def decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride):
    """Decode one face block of auxiliary edge predictions into pixel UV and depth samples."""
    if pred_edge_60 is None or face_type not in range(4):
        return None
    off = FACE_EDGE_OFFSETS_60[face_type]
    face = np.asarray(pred_edge_60[off : off + 15], dtype=np.float32).reshape(5, 3)
    points_2d = np.empty((5, 2), dtype=np.float32)
    points_2d[:, 0] = (anchor_xy[0] + face[:, 0]) * stride
    points_2d[:, 1] = (anchor_xy[1] + face[:, 1]) * stride
    order = np.argsort(points_2d[:, 0], kind="stable")
    return {
        "points_2d": points_2d[order],
        "depths": face[order, 2].astype(np.float32),
        "face_type": face_type,
    }


def _is_gt_face_cut(target_42, face_type):
    """Return whether a GT face was invalidated by crop handling."""
    if face_type not in range(4):
        return False
    off = FACE_OFFSETS_42[face_type]
    face = target_42[off : off + 8]
    return np.all(face[:6] == -1) and face[7] <= 0


def get_gt_cut_state(target_42):
    """Return cut-object state from the GT face invalidation pattern."""
    if target_42 is None or len(target_42) < 42:
        return CUT_STATE_NORMAL
    f_cut = _is_gt_face_cut(target_42, 0)
    r_cut = _is_gt_face_cut(target_42, 1)
    l_cut = _is_gt_face_cut(target_42, 2)
    ri_cut = _is_gt_face_cut(target_42, 3)
    if r_cut and l_cut and ri_cut:
        return CUT_STATE_IN
    if f_cut and l_cut and ri_cut:
        return CUT_STATE_OUT
    return CUT_STATE_NORMAL


def get_gt_cut_side(target_42, img_w, img_h, tol=1e-4, score_thr=FACE_VISIBILITY_SCORE_THRESH):
    """Infer whether a cut GT object is clipped on the left or right image border."""
    visible_faces = []
    for face_type, off in enumerate(FACE_OFFSETS_42):
        face = target_42[off : off + 8]
        if face[7] != 1 or np.isnan(face[6]) or face[6] < score_thr:
            continue
        if np.isnan(face[4]) or np.isnan(face[5]) or face[4] < 0 or face[5] < 0:
            continue
        visible_faces.append((face_type, face[4] * img_w, face[5] * img_h, float(face[6])))

    if not visible_faces:
        return None

    _, best_u, _, _ = max(visible_faces, key=lambda item: item[3])
    edge_u = best_u
    side_faces = []
    for face_type in (2, 3):
        off = FACE_OFFSETS_42[face_type]
        face = target_42[off : off + 8]
        if np.isnan(face[4]) or face[4] < 0:
            continue
        side_faces.append((face_type, face[4] * img_w))
    if side_faces:
        edge_u = side_faces[0][1] if len(side_faces) == 1 else float(np.mean([item[1] for item in side_faces]))

    if edge_u <= tol:
        return "left"
    if edge_u >= img_w - 1 - tol:
        return "right"
    return None


def get_cut_side_from_bbox_xyxy(bbox_xyxy, img_w, tol=1.0):
    """Infer whether a clipped box touches the left or right image border."""
    if bbox_xyxy is None:
        return None
    x1, _, x2, _ = np.asarray(bbox_xyxy, dtype=np.float64)
    touch_left = x1 <= tol and x2 > tol
    touch_right = x2 >= img_w - 1 - tol and x1 < img_w - 1 - tol
    if touch_left == touch_right:
        return None
    return "left" if touch_left else "right"


def _get_camera_facing_side_face_from_corners(corners_3d):
    """Return the side face whose outward normal points most toward the camera."""
    if corners_3d is None:
        return None

    corners = np.asarray(corners_3d, dtype=np.float64)
    if corners.shape != (8, 3) or not np.isfinite(corners).all():
        return None

    box_center = corners.mean(axis=0)
    best_face_type, best_score = None, -np.inf
    for face_type in (2, 3):
        face_points = corners[list(FACE_CORNERS[face_type])]
        face_center = face_points.mean(axis=0)
        view_dir = -face_center
        view_norm = float(np.linalg.norm(view_dir))
        if view_norm < 1e-8:
            continue

        edge_a = face_points[1] - face_points[0]
        edge_b = face_points[2] - face_points[1]
        normal = np.cross(edge_a, edge_b)
        normal_norm = float(np.linalg.norm(normal))
        if normal_norm < 1e-8:
            continue

        if np.dot(normal, face_center - box_center) < 0:
            normal = -normal
        score = float(np.dot(normal / normal_norm, view_dir / view_norm))
        if score > best_score:
            best_face_type, best_score = face_type, score

    return best_face_type


def get_cut_object_side_face(face_type_or_state, cut_side=None, corners_3d=None):
    """Resolve the partially visible side face for a cut object.

    Prefer reconstructed box geometry when available so the near side can change with yaw.
    Fall back to the historical image-border heuristic when only the crop side is known.
    """
    if face_type_or_state not in {CUT_STATE_IN, CUT_STATE_OUT}:
        return None
    side_face_type = _get_camera_facing_side_face_from_corners(corners_3d)
    if side_face_type in (2, 3):
        return side_face_type
    if cut_side not in {"left", "right"}:
        return None
    return 3 if cut_side == "left" else 2


def get_cut_object_side_face_from_yaw(cut_state, yaw):
    """Infer the partially visible side face from cut state and whole-box yaw."""
    if cut_state == CUT_STATE_IN:
        return 3 if np.sin(float(yaw)) > 0 else 2
    if cut_state == CUT_STATE_OUT:
        return 2 if np.sin(float(yaw)) < 0 else 3
    return None


def get_pred_cut_state(pred_41):
    """Return predicted cut state from the cut classification logits."""
    cut_logits = np.asarray(pred_41[38:41], dtype=np.float32)
    return int(np.argmax(cut_logits))


def get_pred_cut_primary_face(cut_state):
    """Return the mandated longitudinal visible face for a cut prediction."""
    if cut_state == CUT_STATE_IN:
        return 0
    if cut_state == CUT_STATE_OUT:
        return 1
    return None


def _reconstruct_pred_corners_for_cut_edge(pred_41, anchor_xy, stride, calib, cut_state=None):
    """Reconstruct predicted box corners for cut-edge side-face selection."""
    if calib is None:
        return None

    cut_state = get_pred_cut_state(pred_41) if cut_state is None else int(cut_state)
    dims = np.asarray(pred_41[27:30], dtype=np.float32)
    rot_y = _decode_yaw_from_prediction(pred_41)
    if np.any(np.isnan(dims)) or not np.isfinite(rot_y):
        return None

    primary_face = get_pred_cut_primary_face(cut_state)
    if primary_face is not None:
        off = FACE_OFFSETS_41[primary_face]
        z_face = float(pred_41[off])
        uv_face_offset = np.asarray(pred_41[off + 1 : off + 3], dtype=np.float32)
        if np.isfinite(z_face) and z_face > 0 and np.isfinite(uv_face_offset).all():
            u_face = float((anchor_xy[0] + uv_face_offset[0]) * stride)
            v_face = float((anchor_xy[1] + uv_face_offset[1]) * stride)
            corners = reconstruct_3d_box_from_face((u_face, v_face), z_face, dims, rot_y, primary_face, calib)
            if corners is not None:
                return corners

    z_whole = float(pred_41[24])
    uv_whole_offset = np.asarray(pred_41[25:27], dtype=np.float32)
    if not np.isfinite(z_whole) or z_whole <= 0 or not np.isfinite(uv_whole_offset).all():
        return None

    u_whole = float((anchor_xy[0] + uv_whole_offset[0]) * stride)
    v_whole = float((anchor_xy[1] + uv_whole_offset[1]) * stride)
    return reconstruct_3d_box_from_whole((u_whole, v_whole), z_whole, dims, rot_y, calib)


def _resolve_pred_cut_state_for_decode(pred_41, bbox_xyxy=None, img_w=None):
    """Resolve predicted cut state only when the box is actually clipped at the image border."""
    cut_state = get_pred_cut_state(pred_41)
    if cut_state == CUT_STATE_NORMAL:
        return cut_state, None

    cut_side = None
    if bbox_xyxy is not None and img_w is not None:
        cut_side = get_cut_side_from_bbox_xyxy(bbox_xyxy, img_w)
    if cut_side not in {"left", "right"}:
        return CUT_STATE_NORMAL, None
    return cut_state, cut_side


def _select_best_pred_face_score(pred_41):
    """Return the highest-scoring predicted face without applying a visibility threshold."""
    best_face_type, best_score = None, float("-inf")
    for face_type, off in enumerate(FACE_OFFSETS_41):
        score = float(pred_41[off + 5])
        if not np.isfinite(score):
            continue
        if score > best_score:
            best_face_type = int(face_type)
            best_score = float(score)
    if best_face_type is None:
        return None
    return best_face_type, best_score


def select_pred_visible_faces_for_decode(pred_41, score_thr=FACE_VISIBILITY_SCORE_THRESH, bbox_xyxy=None, img_w=None):
    """Return visible faces used for decoding and drawing.

    For cut objects we enforce the intended semantics:
    - cut_in  -> front face only
    - cut_out -> rear face only
    For normal objects we keep the thresholded visible-face list, but always retain the top1 face
    even if its score is below the threshold. The partial side edge is handled separately by the cut-edge decoder.
    """
    cut_state, _ = _resolve_pred_cut_state_for_decode(pred_41, bbox_xyxy=bbox_xyxy, img_w=img_w)
    primary_face = get_pred_cut_primary_face(cut_state)
    if primary_face is not None:
        off = FACE_OFFSETS_41[primary_face]
        return [(primary_face, float(pred_41[off + 5]))]
    visible_faces = list(select_pred_visible_faces(pred_41, score_thr=score_thr))
    best_face = _select_best_pred_face_score(pred_41)
    if best_face is None:
        return visible_faces
    best_face_type, best_score = best_face
    if all(int(face_type) != int(best_face_type) for face_type, _ in visible_faces):
        visible_faces.append((int(best_face_type), float(best_score)))
    return visible_faces


def decode_cut_partial_side_edge_from_prediction(
    pred_41, pred_edge_60, anchor_xy, stride, img_w, cut_side=None, calib=None, corners_3d=None
):
    """Decode the partially visible side bottom edge for a cut prediction."""
    if pred_edge_60 is None:
        return None
    cut_state = get_pred_cut_state(pred_41)
    if cut_state == CUT_STATE_NORMAL:
        return None

    if corners_3d is None and calib is not None:
        corners_3d = _reconstruct_pred_corners_for_cut_edge(pred_41, anchor_xy, stride, calib, cut_state=cut_state)

    side_face_type = get_cut_object_side_face(cut_state, cut_side, corners_3d=corners_3d)
    if side_face_type is None:
        return None

    decoded = decode_visible_face_edge_from_prediction(pred_edge_60, side_face_type, anchor_xy, stride)
    if decoded is None:
        return None
    decoded["cut_state"] = cut_state
    decoded["cut_side"] = cut_side
    decoded["is_partial"] = True
    return decoded


def _resolve_gt_cut_partial_side_face(target_42, img_w, img_h, bbox_xyxy=None, score_thr=FACE_VISIBILITY_SCORE_THRESH):
    """Resolve cut-object metadata needed to decode the partial side edge."""
    cut_state = get_gt_cut_state(target_42)
    if cut_state == CUT_STATE_NORMAL:
        return cut_state, None

    cut_side = get_cut_side_from_bbox_xyxy(bbox_xyxy, img_w)
    if cut_side is None:
        cut_side = get_gt_cut_side(target_42, img_w, img_h, score_thr=score_thr)
    return cut_state, cut_side


def _reconstruct_gt_corners_for_cut_edge(
    target_42, cls_id, calib, img_w, img_h, face_3d_classes, complete_3d_classes, score_thr=FACE_VISIBILITY_SCORE_THRESH
):
    """Reconstruct GT box corners using the same geometry source as box visualization when possible."""
    if calib is None:
        return None

    depth_scale = calib.get("depth_scale", 1.0)
    dims = target_42[3:6].astype(np.float32)
    rot_y = float(target_42[6])
    if np.any(np.isnan(dims)) or not np.isfinite(rot_y):
        return None

    if cls_id in face_3d_classes:
        visible_faces = select_gt_visible_faces(target_42, score_thr=score_thr)
        if visible_faces:
            best_type, best_face = max(visible_faces, key=lambda item: float(item[1][6]))
            u_face = float(best_face[4] * img_w)
            v_face = float(best_face[5] * img_h)
            z_face = float(best_face[2] * depth_scale)
            if np.isfinite(u_face) and np.isfinite(v_face) and np.isfinite(z_face) and z_face > 0:
                corners = reconstruct_3d_box_from_face((u_face, v_face), z_face, dims, rot_y, best_type, calib)
                if corners is not None:
                    return corners

    if cls_id not in face_3d_classes and cls_id not in complete_3d_classes:
        return None

    z3d = float(target_42[2])
    whole_uv = target_42[7:9]
    if np.any(np.isnan(whole_uv)) or not np.isfinite(z3d) or z3d <= 0:
        return None
    return reconstruct_3d_box_from_whole(
        (float(whole_uv[0] * img_w), float(whole_uv[1] * img_h)), float(z3d * depth_scale), dims, rot_y, calib
    )


def decode_cut_partial_side_edge_from_gt(
    target_42,
    cls_id,
    calib,
    img_w,
    img_h,
    face_3d_classes,
    complete_3d_classes,
    bbox_xyxy=None,
    corners_3d=None,
    score_thr=FACE_VISIBILITY_SCORE_THRESH,
):
    """Decode the partially visible side bottom edge for a cut GT object."""
    if cls_id not in face_3d_classes:
        return None
    cut_state, cut_side = _resolve_gt_cut_partial_side_face(target_42, img_w, img_h, bbox_xyxy=bbox_xyxy, score_thr=score_thr)
    if cut_side not in {"left", "right"}:
        return None

    corners = corners_3d
    if corners is None:
        corners = _reconstruct_gt_corners_for_cut_edge(
            target_42, cls_id, calib, img_w, img_h, face_3d_classes, complete_3d_classes, score_thr=score_thr
        )
    if corners is None:
        return None

    side_face_type = get_cut_object_side_face(cut_state, cut_side, corners_3d=corners)
    if side_face_type is None or not _is_gt_face_cut(target_42, side_face_type):
        return None

    points_3d, points_2d = project_partial_face_bottom_edge(corners, side_face_type, calib, img_w, img_h, num_samples=5)
    if points_3d is None or points_2d is None:
        return None

    return {
        "points_3d": points_3d.astype(np.float32),
        "points_2d": points_2d.astype(np.float32),
        "depths": points_3d[:, 2].astype(np.float32),
        "face_type": side_face_type,
        "cut_state": cut_state,
        "cut_side": cut_side,
        "is_partial": True,
    }


def decode_visible_face_edge_from_gt(
    target_42,
    cls_id,
    calib,
    img_w,
    img_h,
    face_3d_classes,
    complete_3d_classes,
    face_type=None,
    score_thr=FACE_VISIBILITY_SCORE_THRESH,
    bbox_xyxy=None,
):
    """Decode GT visible-face bottom-edge samples from the current camera geometry."""
    if cls_id not in face_3d_classes:
        return None

    partial_edge = decode_cut_partial_side_edge_from_gt(
        target_42,
        cls_id,
        calib,
        img_w,
        img_h,
        face_3d_classes,
        complete_3d_classes,
        bbox_xyxy=bbox_xyxy,
        score_thr=score_thr,
    )
    if partial_edge is not None and (face_type is None or face_type == partial_edge["face_type"]):
        return partial_edge

    target_decoded = decode_3d_target(
        target_42, cls_id, calib, img_w, img_h, face_3d_classes, complete_3d_classes, score_thr=score_thr
    )
    if target_decoded is None or target_decoded.get("corners_3d") is None:
        return None

    visible_face_types = tuple(int(face_type) for face_type, _ in select_gt_visible_faces(target_42, score_thr=score_thr))
    selected_face = target_decoded.get("visible_face_type") if face_type is None else face_type
    if selected_face not in range(4):
        return None
    if face_type is not None and selected_face not in visible_face_types:
        return None

    points_3d, points_2d = project_face_bottom_edge(target_decoded["corners_3d"], selected_face, calib, num_samples=5)
    if points_3d is None or points_2d is None:
        return None

    return {
        "points_3d": points_3d.astype(np.float32),
        "points_2d": points_2d.astype(np.float32),
        "depths": points_3d[:, 2].astype(np.float32),
        "face_type": selected_face,
    }


def _decoded_edge_to_points_3d(decoded_edge, calib):
    """Back-project one decoded edge sample set into 3D camera coordinates."""
    if decoded_edge is None:
        return None
    points_3d = []
    for pt, depth in zip(decoded_edge["points_2d"], decoded_edge["depths"]):
        point_3d = back_project_2d_to_3d(tuple(pt), float(depth), calib)
        if point_3d is None:
            return None
        points_3d.append(point_3d)
    return np.asarray(points_3d, dtype=np.float32)


def _decoded_edge_points_are_drawable(points_2d, img_w=None, img_h=None, min_endpoint_dist_px=2.0):
    """Return whether decoded edge points correspond to a visible, drawable in-image segment."""
    if points_2d is None:
        return False

    pts = np.asarray(points_2d, dtype=np.float32)
    if pts.ndim != 2 or pts.shape[0] < 2 or pts.shape[1] != 2 or not np.isfinite(pts).all():
        return False

    if img_w is not None and img_h is not None:
        if not np.all([_point_inside_image(point_2d, img_w, img_h) for point_2d in pts]):
            return False

    endpoint_dist = float(np.linalg.norm(pts[-1] - pts[0]))
    return endpoint_dist >= float(min_endpoint_dist_px)


def _edge_segment_length_3d(points_3d):
    """Return the visible BEV length of one decoded bottom-edge segment.

    Bottom-edge size recovery should ignore vertical noise in the decoded points and only measure the
    ground-plane extent (x/z).
    """
    if points_3d is None:
        return None
    pts = np.asarray(points_3d, dtype=np.float32)
    if pts.ndim != 2 or pts.shape[0] < 2 or pts.shape[1] != 3 or not np.isfinite(pts).all():
        return None
    return float(np.linalg.norm(pts[-1, [0, 2]] - pts[0, [0, 2]]))


def _prediction_lateral_distance_m_from_center(center):
    """Return absolute lateral distance from any predicted metric-space anchor center."""
    if center is None:
        return None
    center = np.asarray(center, dtype=np.float32).reshape(-1)
    if center.shape[0] < 1 or not np.isfinite(center[0]):
        return None
    return float(abs(center[0]))


def edge_points_to_yaw(points_3d, face_type):
    """Infer whole-box yaw from visible-face bottom-edge 3D samples."""
    if points_3d is None or len(points_3d) < 2 or face_type not in range(4):
        return float("nan")

    pts = np.asarray(points_3d, dtype=np.float64)
    valid = np.isfinite(pts).all(axis=1)
    pts = pts[valid]
    if len(pts) < 2:
        return float("nan")

    tangent = np.array([pts[-1, 0] - pts[0, 0], pts[-1, 2] - pts[0, 2]], dtype=np.float64)
    tangent_norm = float(np.linalg.norm(tangent))
    if tangent_norm < 1e-8:
        return float("nan")
    tangent /= tangent_norm

    midpoint = np.mean(pts[:, [0, 2]], axis=0)

    def _rot_cw(v):
        return np.array([v[1], -v[0]], dtype=np.float64)

    def _rot_ccw(v):
        return np.array([-v[1], v[0]], dtype=np.float64)

    if face_type in (0, 1):
        forward_candidates = (_rot_cw(tangent), -_rot_cw(tangent))
    else:
        forward_candidates = (tangent, -tangent)

    def _face_normal(forward):
        if face_type == 0:
            return forward
        if face_type == 1:
            return -forward
        if face_type == 2:
            return _rot_ccw(forward)
        return -_rot_ccw(forward)

    # The edge samples arrive sorted left-to-right in image space, so the tangent has an
    # unavoidable 180-degree ambiguity in world space. Resolve it by selecting the forward
    # direction whose face normal points most toward the camera for the requested visible face.
    best_forward = min(forward_candidates, key=lambda forward: float(np.dot(_face_normal(forward), midpoint)))
    yaw = np.arctan2(-best_forward[1], best_forward[0])

    return float((yaw + np.pi) % (2 * np.pi) - np.pi)


def visible_face_edges_to_yaw(face_edges_3d, face_scores=None):
    """Estimate whole-box yaw from one or more visible-face bottom edges."""
    if face_edges_3d is None:
        return float("nan")

    items = list(face_edges_3d.items() if hasattr(face_edges_3d, "items") else face_edges_3d)
    weighted_candidates = []
    for face_type, points_3d in items:
        weight = 1.0
        if face_scores is not None:
            if hasattr(face_scores, "get"):
                weight = face_scores.get(face_type, 1.0)
            else:
                weight = face_scores[face_type]
        if not np.isfinite(weight) or weight <= 0:
            weight = 1.0
        weighted_candidates.append(
            {
                "face_type": int(face_type),
                "points_3d": np.asarray(points_3d, dtype=np.float32),
                "score": float(weight),
            }
        )

    longitudinal_candidates = [candidate for candidate in weighted_candidates if candidate["face_type"] in (0, 1)]
    side_candidates = [candidate for candidate in weighted_candidates if candidate["face_type"] in (2, 3)]
    if longitudinal_candidates and side_candidates:
        longitudinal_candidate = max(longitudinal_candidates, key=lambda item: item["score"])
        side_candidate = max(side_candidates, key=lambda item: item["score"])
        yaw = _estimate_two_edge_yaw_from_candidates(longitudinal_candidate, side_candidate)
        if np.isfinite(yaw):
            return yaw

    yaws, weights = [], []
    for face_type, points_3d in items:
        yaw = edge_points_to_yaw(points_3d, face_type)
        if not np.isfinite(yaw):
            continue

        weight = 1.0
        if face_scores is not None:
            if hasattr(face_scores, "get"):
                weight = face_scores.get(face_type, 1.0)
            else:
                weight = face_scores[face_type]
        if not np.isfinite(weight) or weight <= 0:
            weight = 1.0

        yaws.append(float(yaw))
        weights.append(float(weight))

    if not yaws:
        return float("nan")
    if len(yaws) == 1:
        return float(yaws[0])

    forward = np.stack([np.cos(yaws), -np.sin(yaws)], axis=1)
    mean_forward = np.sum(forward * np.asarray(weights, dtype=np.float64)[:, None], axis=0)
    norm = float(np.linalg.norm(mean_forward))
    if norm < 1e-8:
        return float(yaws[int(np.argmax(weights))])

    mean_forward /= norm
    yaw = np.arctan2(-mean_forward[1], mean_forward[0])
    return float((yaw + np.pi) % (2 * np.pi) - np.pi)


def _bev_edge_points(points_3d):
    """Return finite (x, z) BEV points for one decoded edge."""
    pts = np.asarray(points_3d, dtype=np.float64)
    if pts.ndim != 2 or pts.shape[0] < 2 or pts.shape[1] != 3:
        return None
    valid = np.isfinite(pts).all(axis=1)
    pts = pts[valid]
    if len(pts) < 2:
        return None
    return pts[:, [0, 2]]


def _fit_bev_edge_axis(points_3d):
    """Fit one dominant BEV line direction to decoded edge points."""
    bev_points = _bev_edge_points(points_3d)
    if bev_points is None:
        return None, None
    midpoint = np.mean(bev_points, axis=0)
    centered = bev_points - midpoint
    try:
        _, _, vh = np.linalg.svd(centered, full_matrices=False)
    except np.linalg.LinAlgError:
        return None, None
    axis = np.asarray(vh[0], dtype=np.float64)
    norm = float(np.linalg.norm(axis))
    if norm < 1e-8:
        return None, None
    return axis / norm, midpoint


def _estimate_two_edge_yaw_from_candidates(
    longitudinal_candidate,
    side_candidate,
    reference_yaw=None,
):
    """Estimate yaw from two edges in BEV while keeping the box as parallel as possible to the side edge."""
    if longitudinal_candidate is None or side_candidate is None:
        return float("nan")
    if int(longitudinal_candidate["face_type"]) not in (0, 1) or int(side_candidate["face_type"]) not in (2, 3):
        return float("nan")

    side_axis, side_midpoint = _fit_bev_edge_axis(side_candidate["points_3d"])
    long_axis, long_midpoint = _fit_bev_edge_axis(longitudinal_candidate["points_3d"])
    if side_axis is None or long_midpoint is None or side_midpoint is None:
        return float("nan")

    long_face_type = int(longitudinal_candidate["face_type"])
    side_face_type = int(side_candidate["face_type"])

    def _rot_ccw(v):
        return np.array([-v[1], v[0]], dtype=np.float64)

    def _face_normal(forward, face_type):
        if face_type == 0:
            return forward
        if face_type == 1:
            return -forward
        if face_type == 2:
            return _rot_ccw(forward)
        return -_rot_ccw(forward)

    forward_candidates = (side_axis, -side_axis)
    best_forward = min(
        forward_candidates,
        key=lambda forward: float(np.dot(_face_normal(forward, long_face_type), long_midpoint))
        + float(np.dot(_face_normal(forward, side_face_type), side_midpoint)),
    )

    if reference_yaw is not None and np.isfinite(reference_yaw):
        ref_forward = np.array([np.cos(float(reference_yaw)), -np.sin(float(reference_yaw))], dtype=np.float64)
        if float(np.dot(best_forward, ref_forward)) < 0.0:
            best_forward = -best_forward

    yaw = np.arctan2(-best_forward[1], best_forward[0])
    return float((yaw + np.pi) % (2 * np.pi) - np.pi)


def _resolve_two_face_candidate_roles(candidates, yaw):
    """Assign one decoded edge to the longitudinal face and the other to the side face from geometry."""
    if candidates is None or len(candidates) < 2 or not np.isfinite(float(yaw)):
        return None

    forward_bev = np.array([np.cos(float(yaw)), -np.sin(float(yaw))], dtype=np.float64)
    right_bev = np.array([np.sin(float(yaw)), np.cos(float(yaw))], dtype=np.float64)

    role_candidates = []
    for index, candidate in enumerate(candidates[:2]):
        axis, midpoint = _fit_bev_edge_axis(candidate["points_3d"])
        if axis is None or midpoint is None:
            return None
        role_candidates.append(
            {
                "index": int(index),
                "candidate": candidate,
                "axis": axis,
                "midpoint": midpoint,
                "forward_align": abs(float(np.dot(axis, forward_bev))),
                "right_align": abs(float(np.dot(axis, right_bev))),
            }
        )

    def _role_label_penalty(info, role):
        face_type = int(info["candidate"].get("face_type", -1))
        if role == "longitudinal":
            return 0 if face_type in (0, 1) else 1
        return 0 if face_type in (2, 3) else 1

    assignments = ((0, 1), (1, 0))
    best_assignment = min(
        assignments,
        key=lambda assignment: (
            (1.0 - role_candidates[assignment[0]]["right_align"]) + (1.0 - role_candidates[assignment[1]]["forward_align"]),
            _role_label_penalty(role_candidates[assignment[0]], "longitudinal")
            + _role_label_penalty(role_candidates[assignment[1]], "side"),
            -(role_candidates[assignment[0]]["right_align"] + role_candidates[assignment[1]]["forward_align"]),
        ),
    )
    longitudinal_info = role_candidates[best_assignment[0]]
    side_info = role_candidates[best_assignment[1]]
    return {
        "forward_bev": forward_bev,
        "right_bev": right_bev,
        "longitudinal": longitudinal_info,
        "side": side_info,
    }


def _resolve_two_face_center_from_geometry(longitudinal_info, side_info, length_m, width_m):
    """Recover the two-face box center from the pair of perpendicular visible edges."""
    if longitudinal_info is None or side_info is None:
        return None

    forward_bev = np.asarray(longitudinal_info["forward_bev"], dtype=np.float64)
    right_bev = np.asarray(longitudinal_info["right_bev"], dtype=np.float64)
    long_mid = np.asarray(longitudinal_info["midpoint"], dtype=np.float64)
    side_mid = np.asarray(side_info["midpoint"], dtype=np.float64)
    if not np.isfinite(long_mid).all() or not np.isfinite(side_mid).all():
        return None

    raw_longitudinal_face_type = int(longitudinal_info["candidate"].get("face_type", -1))
    if raw_longitudinal_face_type == 0:
        longitudinal_options = ((1.0, 0),)
    elif raw_longitudinal_face_type == 1:
        longitudinal_options = ((-1.0, 1),)
    else:
        longitudinal_options = ((1.0, 0), (-1.0, 1))

    best = None
    for longitudinal_sign, longitudinal_face_type in longitudinal_options:
        center_from_longitudinal = long_mid - longitudinal_sign * forward_bev * (float(length_m) * 0.5)
        for side_sign, side_face_type in ((1.0, 2), (-1.0, 3)):
            center_from_side = side_mid - side_sign * right_bev * (float(width_m) * 0.5)
            disagreement = float(np.linalg.norm(center_from_longitudinal - center_from_side))
            if best is None or disagreement < best["disagreement"]:
                best = {
                    "center_from_longitudinal": center_from_longitudinal,
                    "center_from_side": center_from_side,
                    "longitudinal_face_type": int(longitudinal_face_type),
                    "side_face_type": int(side_face_type),
                    "disagreement": disagreement,
                }

    if best is None:
        return None

    longitudinal_coord = float(np.dot(best["center_from_longitudinal"], forward_bev))
    lateral_coord = float(np.dot(best["center_from_side"], right_bev))
    center_bev = longitudinal_coord * forward_bev + lateral_coord * right_bev
    return {
        "center_bev": center_bev,
        "longitudinal_face_type": int(best["longitudinal_face_type"]),
        "side_face_type": int(best["side_face_type"]),
        "center_from_longitudinal": best["center_from_longitudinal"],
        "center_from_side": best["center_from_side"],
    }


def _estimate_single_edge_yaw_with_cut_primary_face(candidate, cut_state, reference_yaw=None):
    """Resolve single-edge yaw with cut-state longitudinal semantics when available."""
    if candidate is None or cut_state not in (CUT_STATE_IN, CUT_STATE_OUT):
        return float("nan")

    face_type = int(candidate["face_type"])
    if face_type in (0, 1):
        yaw = edge_points_to_yaw(candidate["points_3d"], face_type)
        if reference_yaw is not None and np.isfinite(reference_yaw):
            return _align_yaw_to_reference_pi_periodic(yaw, reference_yaw)
        primary_face = get_pred_cut_primary_face(cut_state)
        if primary_face in (0, 1) and int(primary_face) != face_type:
            return float((float(yaw) + 2 * np.pi) % (2 * np.pi) - np.pi)
        return float(yaw)
    if face_type not in (2, 3):
        return float("nan")

    axis, _ = _fit_bev_edge_axis(candidate["points_3d"])
    midpoint = _bev_edge_points(candidate["points_3d"])
    if axis is None or midpoint is None:
        return float("nan")
    midpoint = np.mean(midpoint, axis=0)

    yaw_candidates = [float((np.arctan2(-forward[1], forward[0]) + np.pi) % (2 * np.pi) - np.pi) for forward in (axis, -axis)]
    primary_face = get_pred_cut_primary_face(cut_state)
    if primary_face in (0, 1):
        matched = []
        for yaw in yaw_candidates:
            forward = np.array([np.cos(float(yaw)), -np.sin(float(yaw))], dtype=np.float64)
            longitudinal_score = float(np.dot(forward, midpoint))
            if (int(primary_face) == 0 and longitudinal_score > 0.0) or (int(primary_face) == 1 and longitudinal_score < 0.0):
                matched.append(float(yaw))
        candidates = matched or yaw_candidates
    else:
        candidates = yaw_candidates
    yaw = float(candidates[0])
    if reference_yaw is not None and np.isfinite(reference_yaw):
        return _align_yaw_to_reference_pi_periodic(yaw, reference_yaw)
    return yaw


def extract_face_regressed_size_priors_from_prediction(pred_41):
    """Extract per-face size regression hints from one denormalized 41-dim prediction."""
    p = np.asarray(pred_41, dtype=np.float32).reshape(-1)
    priors = {}
    for face_type, off in enumerate(FACE_OFFSETS_41):
        size_pair = np.asarray(p[off + 3 : off + 5], dtype=np.float32).reshape(-1)
        if size_pair.shape != (2,) or not np.isfinite(size_pair).all():
            continue
        if face_type in (0, 1):
            priors[int(face_type)] = {
                "height": float(abs(size_pair[0])),
                "width": float(abs(size_pair[1])),
            }
        else:
            priors[int(face_type)] = {
                "length": float(abs(size_pair[0])),
                "height": float(abs(size_pair[1])),
            }
    return priors


def _select_edge_or_regressed_size(measured_size_m, regressed_size_m, min_fraction=0.85, max_fraction=1.35):
    """Use edge-measured size when it is geometrically sane, otherwise fall back to regression."""
    regressed = float(abs(regressed_size_m))
    if not np.isfinite(regressed) or regressed <= 1e-6:
        return None, None

    measured = None if measured_size_m is None else float(abs(measured_size_m))
    if measured is None or not np.isfinite(measured) or measured <= 1e-6:
        return regressed, "regressed"

    fraction = measured / regressed
    if fraction < float(min_fraction) or fraction > float(max_fraction):
        return regressed, "regressed"
    return measured, "edge"


def reconstruct_edge_based_box_from_selection(edge_selection, box_center_y_m, regressed_dims, face_regressed_dims_by_type=None):
    """Reconstruct a full 3D box from one or two selected visible-face bottom edges.

    Two-face mode:
    - side edge provides yaw/length and lateral anchor
    - front/rear edge provides width and longitudinal anchor

    One-face mode:
    - front/rear edge provides yaw/width and the visible-face longitudinal+lateral anchor
    - side edge provides yaw/length and the visible-face longitudinal+lateral anchor

    The selected edge geometry stays the anchor. Height and the missing dimensions in one-face mode
    come from the regressed branch.
    """
    if edge_selection is None:
        return None

    yaw = float(edge_selection.get("yaw", float("nan")))
    if not np.isfinite(yaw):
        return None

    dims_reg = np.asarray(regressed_dims, dtype=np.float32).reshape(-1)
    if dims_reg.shape != (3,) or not np.isfinite(dims_reg).all():
        return None
    reg_length = float(abs(dims_reg[0]))
    box_height = float(abs(dims_reg[1]))
    reg_width = float(abs(dims_reg[2]))
    if reg_length <= 1e-6 or box_height <= 1e-6 or reg_width <= 1e-6:
        return None

    face_types = tuple(int(face_type) for face_type in (edge_selection.get("face_types") or ()))
    edge_batches = _edge_batches_to_list(edge_selection.get("edge_points_3d"))
    if len(face_types) != len(edge_batches):
        return None

    face_is_partial = tuple(bool(flag) for flag in (edge_selection.get("face_is_partial") or ()))
    if len(face_is_partial) < len(face_types):
        face_is_partial = face_is_partial + (False,) * (len(face_types) - len(face_is_partial))

    candidates = []
    for face_type, points_3d, is_partial in zip(face_types, edge_batches, face_is_partial):
        pts = np.asarray(points_3d, dtype=np.float32)
        if pts.ndim != 2 or pts.shape[0] < 2 or pts.shape[1] != 3 or not np.isfinite(pts).all():
            return None
        candidates.append({"face_type": int(face_type), "points_3d": pts, "is_partial": bool(is_partial)})

    forward = np.array([np.cos(yaw), 0.0, -np.sin(yaw)], dtype=np.float64)
    right = np.array([np.sin(yaw), 0.0, np.cos(yaw)], dtype=np.float64)
    center_x = None
    center_z = None
    length_m = None
    width_m = None
    length_source = None
    width_source = None
    mode = None
    resolved_face_types = list(face_types)
    resolved_longitudinal_face_type = None
    resolved_side_face_type = None

    face_regressed_dims_by_type = face_regressed_dims_by_type or {}

    def _face_size_prior(candidate, key, fallback, max_ratio=1.25):
        if candidate is None or bool(candidate.get("is_partial")):
            return float(fallback)
        prior = face_regressed_dims_by_type.get(int(candidate["face_type"]), {})
        value = prior.get(key)
        if value is None or not np.isfinite(float(value)) or float(value) <= 1e-6:
            return float(fallback)
        prior_value = float(value)
        fallback_value = float(abs(fallback))
        if fallback_value <= 1e-6:
            return prior_value
        ratio = max(prior_value / fallback_value, fallback_value / prior_value)
        if ratio > float(max_ratio):
            return fallback_value
        return prior_value

    role_resolution = _resolve_two_face_candidate_roles(candidates, yaw) if len(candidates) >= 2 else None
    if role_resolution is not None:
        longitudinal_info = {
            **role_resolution["longitudinal"],
            "forward_bev": role_resolution["forward_bev"],
            "right_bev": role_resolution["right_bev"],
        }
        side_info = {
            **role_resolution["side"],
            "forward_bev": role_resolution["forward_bev"],
            "right_bev": role_resolution["right_bev"],
        }
        longitudinal_candidate = longitudinal_info["candidate"]
        side_candidate = side_info["candidate"]
        side_length_m = None if bool(side_candidate.get("is_partial")) else _edge_segment_length_3d(side_candidate["points_3d"])
        width_from_long_m = (
            None if bool(longitudinal_candidate.get("is_partial")) else _edge_segment_length_3d(longitudinal_candidate["points_3d"])
        )

        length_m, length_source = _select_edge_or_regressed_size(
            side_length_m,
            _face_size_prior(side_candidate, "length", reg_length),
        )
        width_m, width_source = _select_edge_or_regressed_size(
            width_from_long_m,
            _face_size_prior(longitudinal_candidate, "width", reg_width),
        )
        if length_m is None or width_m is None:
            return None

        center_resolution = _resolve_two_face_center_from_geometry(longitudinal_info, side_info, length_m, width_m)
        if center_resolution is None:
            return None
        center_bev = np.asarray(center_resolution["center_bev"], dtype=np.float64)
        if center_bev.shape != (2,) or not np.isfinite(center_bev).all():
            return None
        center_x = float(center_bev[0])
        center_z = float(center_bev[1])
        resolved_longitudinal_face_type = int(center_resolution["longitudinal_face_type"])
        resolved_side_face_type = int(center_resolution["side_face_type"])
        resolved_face_types[int(longitudinal_info["index"])] = resolved_longitudinal_face_type
        resolved_face_types[int(side_info["index"])] = resolved_side_face_type
        mode = "two-face"
    else:
        longitudinal_candidate = next((candidate for candidate in candidates if candidate["face_type"] in (0, 1)), None)
        side_candidate = next((candidate for candidate in candidates if candidate["face_type"] in (2, 3)), None)

    if mode == "two-face":
        pass
    elif longitudinal_candidate is not None:
        long_mid = np.mean(np.asarray(longitudinal_candidate["points_3d"], dtype=np.float64), axis=0)
        width_from_long_m = (
            None if bool(longitudinal_candidate.get("is_partial")) else _edge_segment_length_3d(longitudinal_candidate["points_3d"])
        )
        if not np.isfinite(long_mid).all():
            return None
        width_m, width_source = _select_edge_or_regressed_size(
            width_from_long_m,
            _face_size_prior(longitudinal_candidate, "width", reg_width),
        )
        if width_m is None:
            return None
        longitudinal_sign = 1.0 if int(longitudinal_candidate["face_type"]) == 0 else -1.0
        center_from_longitudinal = long_mid - longitudinal_sign * forward * (float(reg_length) * 0.5)
        center_x = float(center_from_longitudinal[0])
        center_z = float(center_from_longitudinal[2])
        length_m = float(reg_length)
        width_source = width_source or "regressed"
        length_source = "regressed"
        resolved_longitudinal_face_type = int(longitudinal_candidate["face_type"])
        mode = "front-rear"
    elif side_candidate is not None:
        side_mid = np.mean(np.asarray(side_candidate["points_3d"], dtype=np.float64), axis=0)
        side_length_m = None if bool(side_candidate.get("is_partial")) else _edge_segment_length_3d(side_candidate["points_3d"])
        if not np.isfinite(side_mid).all():
            return None
        length_m, length_source = _select_edge_or_regressed_size(
            side_length_m,
            _face_size_prior(side_candidate, "length", reg_length),
        )
        if length_m is None:
            return None
        side_sign = 1.0 if int(side_candidate["face_type"]) == 2 else -1.0
        center_from_side = side_mid - side_sign * right * (float(reg_width) * 0.5)
        center_x = float(center_from_side[0])
        center_z = float(center_from_side[2])
        width_m = float(reg_width)
        width_source = "regressed"
        resolved_side_face_type = int(side_candidate["face_type"])
        mode = "side"
    else:
        return None

    all_y = np.concatenate([candidate["points_3d"][:, 1] for candidate in candidates], axis=0)
    if all_y.size == 0 or not np.isfinite(all_y).all():
        if box_center_y_m is None or not np.isfinite(float(box_center_y_m)):
            return None
        center_y = float(box_center_y_m)
    else:
        center_y = float(np.mean(all_y) - box_height * 0.5)

    center = np.array(
        [
            float(center_x),
            float(center_y),
            float(center_z),
        ],
        dtype=np.float32,
    )
    if not np.isfinite(center).all():
        return None

    dims = np.array([float(length_m), float(box_height), float(width_m)], dtype=np.float32)
    corners_3d = compute_3d_box_corners(center, dims, float(yaw), face_type=-1)
    return {
        "center": center,
        "dims": dims,
        "yaw": float(yaw),
        "corners_3d": corners_3d.astype(np.float32),
        "mode": mode,
        "side_length_m": float(length_m),
        "width_m": float(width_m),
        "length_source": length_source,
        "width_source": width_source,
        "face_types": tuple(int(face_type) for face_type in resolved_face_types),
        "longitudinal_face_type": resolved_longitudinal_face_type,
        "side_face_type": resolved_side_face_type,
    }


def reconstruct_two_face_box_from_edge_selection(edge_selection, box_height_m):
    """Backward-compatible two-face-only wrapper around the generalized edge-based reconstruction."""
    edge_box = reconstruct_edge_based_box_from_selection(
        edge_selection,
        box_center_y_m=None,
        regressed_dims=np.array([1.0, float(box_height_m), 1.0], dtype=np.float32),
    )
    if edge_box is None or edge_box.get("mode") != "two-face":
        return None
    return edge_box


def classify_edge_yaw_prediction_bucket(face_types, is_valid):
    """Bucket one prediction by whether edge-yaw would be used from prediction-side cues only."""
    face_types = tuple(int(face_type) for face_type in (face_types or ()))
    has_longitudinal = any(face_type in (0, 1) for face_type in face_types)
    has_side = any(face_type in (2, 3) for face_type in face_types)

    if bool(is_valid) and has_longitudinal and has_side:
        return "two-face"
    if has_side and not has_longitudinal:
        return "side only"
    if has_longitudinal:
        return "front_rear_only"
    return None


def _align_yaw_to_reference_pi_periodic(yaw, reference_yaw):
    """Choose the pi-equivalent yaw closest to a reference heading."""
    if not np.isfinite(yaw) or not np.isfinite(reference_yaw):
        return float(yaw)

    base = float((float(yaw) + np.pi) % (2 * np.pi) - np.pi)
    alt = float((float(yaw) + 2 * np.pi) % (2 * np.pi) - np.pi)
    return min(
        (base, alt),
        key=lambda candidate: abs(float((candidate - float(reference_yaw) + np.pi) % (2 * np.pi) - np.pi)),
    )


def _draw_edge_points(img, edge_points_2d=None, edge_color=(0, 255, 0), thickness=1):
    """Draw sampled bottom-edge points and the connecting polylines."""
    if edge_points_2d is None:
        return img

    pts = np.asarray(edge_points_2d, dtype=np.float32)
    if pts.size == 0 or np.any(np.isnan(pts)):
        return img
    if pts.ndim == 2:
        pts = pts[None, ...]
    if pts.ndim != 3 or pts.shape[1] == 0:
        return img

    radius = max(1, thickness + 1)
    for poly in pts:
        pts_i = np.round(poly).astype(np.int32)
        cv2.polylines(img, [pts_i], isClosed=False, color=edge_color, thickness=thickness, lineType=cv2.LINE_AA)
        for pt in pts_i:
            cv2.circle(img, tuple(pt), radius, edge_color, -1, cv2.LINE_AA)
    return img


def decode_3d_target(
    target_42,
    cls_id,
    calib,
    img_w,
    img_h,
    face_3d_classes,
    complete_3d_classes,
    score_thr=FACE_VISIBILITY_SCORE_THRESH,
    bbox_xyxy=None,
):
    """Decode a single 42-dim GT label to 3D box corners for visualization."""
    t = target_42
    if np.isnan(t[2]) or t[2] <= 0:
        return None

    depth_scale = calib.get("depth_scale", 1.0) if calib else 1.0
    dims = t[3:6]
    rot_y = t[6]

    if cls_id in face_3d_classes:
        best_type, best_score, best_data = -1, -1.0, None
        visible_faces = []
        for ft, off in enumerate(FACE_OFFSETS_42):
            face = t[off : off + 8]
            is_vis, score = face[7], face[6]
            if is_vis != 1 or np.isnan(score) or score < score_thr:
                continue
            z_f = face[2]
            if np.isnan(z_f) or z_f <= 0:
                continue
            visible_faces.append(ft)
            if score > best_score:
                best_score, best_type, best_data = float(score), ft, face

        if best_type < 0:
            return None

        u = best_data[4] * img_w
        v = best_data[5] * img_h
        z_f = best_data[2] * depth_scale
        corners = reconstruct_3d_box_from_face((u, v), z_f, dims, rot_y, best_type, calib)
        if corners is None:
            return None

        edge_points_3d, edge_points_2d = collect_face_bottom_edges(corners, visible_faces, calib, num_samples=5)
        partial_edge = decode_cut_partial_side_edge_from_gt(
            target_42,
            cls_id,
            calib,
            img_w,
            img_h,
            face_3d_classes,
            complete_3d_classes,
            bbox_xyxy=bbox_xyxy,
            corners_3d=corners,
            score_thr=score_thr,
        )
        if partial_edge is not None:
            edge_points_3d, edge_points_2d = _append_edge_batch(edge_points_3d, edge_points_2d, partial_edge)
            visible_faces = list(dict.fromkeys([*visible_faces, partial_edge["face_type"]]))

        return {
            "corners_3d": corners,
            "face_center_2d": (u, v),
            "face_color": FACE_COLORS[best_type],
            "visible_face_type": best_type,
            "visible_face_types": tuple(visible_faces),
            "edge_points_2d": edge_points_2d,
            "edge_points_3d": edge_points_3d,
            "cls": cls_id,
        }

    if cls_id in complete_3d_classes:
        u = t[7] * img_w
        v = t[8] * img_h
        z = t[2] * depth_scale
        corners = reconstruct_3d_box_from_whole((u, v), z, dims, rot_y, calib)
        if corners is None:
            return None
        return {
            "corners_3d": corners,
            "face_center_2d": None,
            "face_color": None,
            "visible_face_type": None,
            "visible_face_types": (),
            "edge_points_2d": None,
            "edge_points_3d": None,
            "cls": cls_id,
        }

    return None


def decode_3d_prediction(
    pred_41,
    anchor_xy,
    stride,
    calib,
    img_w,
    img_h,
    face_3d_classes,
    complete_3d_classes,
    cls_id,
    pred_edge_60=None,
    score_thr=FACE_VISIBILITY_SCORE_THRESH,
    bbox_xyxy=None,
):
    """Decode a single 41-dim denormalized prediction to 3D box corners."""
    p = pred_41
    rot_y = _decode_yaw_from_prediction(p)
    z_whole = p[24]
    uv_whole_offset = p[25:27]
    dims_whole = p[27:30]
    u_whole = (anchor_xy[0] + uv_whole_offset[0]) * stride
    v_whole = (anchor_xy[1] + uv_whole_offset[1]) * stride

    if cls_id in face_3d_classes:
        _, cut_side = _resolve_pred_cut_state_for_decode(p, bbox_xyxy=bbox_xyxy, img_w=img_w)
        visible_faces = select_pred_visible_faces_for_decode(p, score_thr=score_thr, bbox_xyxy=bbox_xyxy, img_w=img_w)
        anchor_face = select_best_score_pred_face_anchor(
            p,
            anchor_xy,
            stride,
            calib,
            visible_faces,
        )
        if anchor_face is None:
            return None

        anchor_face_type = int(anchor_face["face_type"])
        anchor_face_center_3d = np.asarray(anchor_face["center_3d"], dtype=np.float32)
        if anchor_face_center_3d.shape != (3,) or not np.isfinite(anchor_face_center_3d).all():
            return None
        corners = compute_3d_box_corners(anchor_face_center_3d, dims_whole, rot_y, anchor_face_type)

        edge_points_3d, edge_points_2d = collect_face_bottom_edges(
            corners, [face_type for face_type, _ in visible_faces], calib, num_samples=5
        )
        if pred_edge_60 is not None:
            pred_edge_points_2d, pred_edge_points_3d = [], []
            for face_type, _ in visible_faces:
                pred_edge = decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride)
                if pred_edge is None:
                    continue
                points_3d = [
                    back_project_2d_to_3d(tuple(pt), depth, calib) for pt, depth in zip(pred_edge["points_2d"], pred_edge["depths"])
                ]
                if any(point is None for point in points_3d):
                    continue
                pred_edge_points_2d.append(pred_edge["points_2d"].astype(np.float32, copy=False))
                pred_edge_points_3d.append(np.asarray(points_3d, dtype=np.float32))
            if pred_edge_points_2d:
                edge_points_2d = _stack_edge_batches(pred_edge_points_2d)
                edge_points_3d = _stack_edge_batches(pred_edge_points_3d)

            partial_edge = decode_cut_partial_side_edge_from_prediction(
                p,
                pred_edge_60,
                anchor_xy,
                stride,
                img_w,
                cut_side=cut_side,
                corners_3d=corners,
            )
            if partial_edge is not None:
                partial_points_3d = [
                    back_project_2d_to_3d(tuple(pt), depth, calib)
                    for pt, depth in zip(partial_edge["points_2d"], partial_edge["depths"])
                ]
                if all(point is not None for point in partial_points_3d):
                    partial_edge = {**partial_edge, "points_3d": np.asarray(partial_points_3d, dtype=np.float32)}
                    visible_face_types = {face_type for face_type, _ in visible_faces}
                    if partial_edge["face_type"] not in visible_face_types:
                        edge_points_3d, edge_points_2d = _append_edge_batch(edge_points_3d, edge_points_2d, partial_edge)
                        visible_faces = [*visible_faces, (partial_edge["face_type"], 1.0)]

        return {
            "corners_3d": corners,
            "face_center_2d": tuple(np.asarray(anchor_face["center_2d"], dtype=np.float32).tolist()),
            "face_color": FACE_COLORS[anchor_face_type],
            "visible_face_type": anchor_face_type,
            "visible_face_types": tuple(face_type for face_type, _ in visible_faces),
            "edge_points_2d": edge_points_2d,
            "edge_points_3d": edge_points_3d,
            "cls": cls_id,
        }

    if cls_id in complete_3d_classes:
        corners = reconstruct_3d_box_from_whole((u_whole, v_whole), z_whole, dims_whole, rot_y, calib)
        if corners is None:
            return None
        return {
            "corners_3d": corners,
            "face_center_2d": None,
            "face_color": None,
            "visible_face_type": None,
            "visible_face_types": (),
            "edge_points_2d": None,
            "edge_points_3d": None,
            "cls": cls_id,
        }

    return None


def draw_3d_box(
    img,
    corners_3d,
    calib,
    face_center_2d=None,
    face_color=None,
    edge_points_2d=None,
    edge_color=(0, 255, 0),
    thickness=1,
):
    """Project and draw a 3D box wireframe on an image."""
    corners_3d = corners_3d[[4, 5, 6, 7, 0, 1, 2, 3]]
    color_front = (0, 0, 255)
    color_back = (255, 0, 0)
    color_side = (255, 255, 0)

    distort_coeffs = calib.get("distort_coeffs", []) if calib is not None else []
    if distort_coeffs is not None and len(distort_coeffs) >= 4:
        edge_points_2d_box = project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=15)
        plot_box3d_on_img_with_distortion(
            img, edge_points_2d_box, color_front=color_front, color_back=color_back, color_side=color_side, thickness=thickness
        )
    else:
        corners_2d = project_3d_to_2d(corners_3d, calib)
        if np.any(np.isnan(corners_2d)):
            return img
        plot_box3d_on_img(
            img, corners_2d, color_front=color_front, color_back=color_back, color_side=color_side, thickness=thickness
        )

    if face_center_2d is not None and face_color is not None:
        cv2.circle(img, (int(face_center_2d[0]), int(face_center_2d[1])), 2, face_color, -1, cv2.LINE_AA)

    _draw_edge_points(img, edge_points_2d=edge_points_2d, edge_color=edge_color, thickness=thickness)
    return img


def plot_3d_boxes_on_image(img_tensor, decoded_results, calib=None, label_text=None, scale_factor=2):
    """Draw decoded 3D boxes on an image tensor.

    Args:
        img_tensor: (3, H, W) or (N, 3, H, W) tensor normalized [0, 1] BGR.
        decoded_results: List of dicts from decode_3d_target/decode_3d_prediction.
        calib: Dict with fx, fy, cx, cy.
        label_text: Optional text overlay (e.g., "3D GT" or "3D Pred").
        scale_factor: Upscale factor for clearer visualization.

    Returns:
        (H*scale, W*scale, 3) RGB numpy image, or None if no boxes.
    """
    if img_tensor.ndim == 4:
        img_tensor = img_tensor[0]

    im = img_tensor.cpu().numpy().transpose(1, 2, 0)
    im = np.ascontiguousarray(im * 255, dtype=np.uint8)
    h, w = im.shape[:2]

    h_new, w_new = h * scale_factor, w * scale_factor
    im = cv2.resize(im, (w_new, h_new), interpolation=cv2.INTER_LINEAR)

    # Scale calibration
    if calib is not None:
        calib_s = {
            "fx": calib["fx"] * scale_factor,
            "fy": calib["fy"] * scale_factor,
            "cx": calib["cx"] * scale_factor,
            "cy": calib["cy"] * scale_factor,
            "distort_coeffs": calib.get("distort_coeffs", []),
            "depth_scale": calib.get("depth_scale", 1.0),
        }
    else:
        calib_s = {"fx": w_new * 1.2, "fy": w_new * 1.2, "cx": w_new / 2, "cy": h_new / 2, "distort_coeffs": []}

    for d in decoded_results:
        if d is None or d.get("corners_3d") is None:
            continue
        fc = d.get("face_center_2d")
        if fc is not None:
            fc = (fc[0] * scale_factor, fc[1] * scale_factor)
        edge_points_2d = d.get("edge_points_2d")
        if edge_points_2d is not None:
            edge_points_2d = np.asarray(edge_points_2d, dtype=np.float32) * scale_factor
        draw_3d_box(
            im,
            d["corners_3d"],
            calib_s,
            fc,
            d.get("face_color"),
            edge_points_2d=edge_points_2d,
            thickness=max(1, scale_factor),
        )

    if label_text:
        cv2.putText(im, label_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 255), 3, cv2.LINE_AA)

    return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)


def decode_3d_prediction_batch(preds_3d_sel, anchors, strides, cls_ids, calib, img_w, img_h,
                                face_3d_classes, complete_3d_classes):
    """Batch decode multiple 3D predictions for visualization.

    Args:
        preds_3d_sel: (k, 41) numpy array — denormalized 3D predictions.
        anchors: (2, k) numpy array — anchor xy in grid coords.
        strides: (k,) numpy array — stride per anchor.
        cls_ids: (k,) numpy array — class IDs.
        calib: Dict with fx, fy, cx, cy.
        img_w: Image width in pixels.
        img_h: Image height in pixels.
        face_3d_classes: Set of class IDs with face annotations.
        complete_3d_classes: Set of class IDs with whole-box 3D only.

    Returns:
        List of decoded dicts (same format as decode_3d_prediction).
    """
    results = []
    for i in range(len(preds_3d_sel)):
        anchor_xy = anchors[:, i]
        d = decode_3d_prediction(
            preds_3d_sel[i], anchor_xy, float(strides[i]),
            calib, img_w, img_h, face_3d_classes, complete_3d_classes, int(cls_ids[i])
        )
        results.append(d)
    return results


def decode_pred_face_anchor(pred_41, anchor_xy, stride, calib, face_type):
    """Decode one predicted face center for use as a visualization anchor."""
    if face_type not in range(4):
        return None
    off = FACE_OFFSETS_41[int(face_type)]
    z_face = float(pred_41[off])
    uv_face_offset = np.asarray(pred_41[off + 1 : off + 3], dtype=np.float32)
    if not np.isfinite(z_face) or z_face <= 0 or not np.isfinite(uv_face_offset).all():
        return None

    u_face = float((anchor_xy[0] + uv_face_offset[0]) * stride)
    v_face = float((anchor_xy[1] + uv_face_offset[1]) * stride)
    center_3d = back_project_2d_to_3d((u_face, v_face), z_face, calib)
    if center_3d is None:
        return None
    center_arr = np.asarray(center_3d, dtype=np.float32)
    if center_arr.shape != (3,) or not np.isfinite(center_arr).all():
        return None
    return {
        "face_type": int(face_type),
        "center_3d": center_arr,
        "center_2d": np.array([u_face, v_face], dtype=np.float32),
    }


def select_best_score_pred_face_anchor(
    pred_41,
    anchor_xy,
    stride,
    calib,
    visible_faces,
):
    """Select the predicted face anchor using the highest visible-face score."""
    if not visible_faces:
        return None

    best_face_type, _ = max(((int(face_type), float(score)) for face_type, score in visible_faces if int(face_type) in range(4)), key=lambda item: item[1], default=(-1, float("-inf")))
    if best_face_type not in range(4):
        return None
    return decode_pred_face_anchor(pred_41, anchor_xy, stride, calib, best_face_type)


def _decode_yaw_from_prediction(pred_41):
    """Decode whole-box yaw from a 41-dim denormalized prediction."""
    yaw_cls_logits = pred_41[30:34]
    yaw_residual_sin = np.clip(pred_41[34:38], -1.0, 1.0)
    best_bin = int(np.argmax(yaw_cls_logits))
    return np.arcsin(yaw_residual_sin[best_bin]) + YAW_BIN_OFFSETS[best_bin]


def decode_visible_face_yaw_from_prediction(pred_41, pred_edge_60, anchor_xy, stride, face_type, calib):
    """Decode auxiliary visible-face yaw from sampled bottom-edge predictions."""
    if pred_edge_60 is None or face_type not in range(4):
        return float("nan")
    decoded = decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride)
    points_3d = _decoded_edge_to_points_3d(decoded, calib)
    if points_3d is None:
        return float("nan")
    return edge_points_to_yaw(points_3d, face_type)


def decode_visible_face_yaw_from_gt(
    target_42,
    cls_id,
    calib,
    img_w,
    img_h,
    face_3d_classes,
    complete_3d_classes,
    face_type,
    score_thr=FACE_VISIBILITY_SCORE_THRESH,
    bbox_xyxy=None,
):
    """Decode GT visible-face yaw from sampled bottom-edge geometry."""
    decoded = decode_visible_face_edge_from_gt(
        target_42,
        cls_id,
        calib,
        img_w,
        img_h,
        face_3d_classes,
        complete_3d_classes,
        face_type=face_type,
        score_thr=score_thr,
        bbox_xyxy=bbox_xyxy,
    )
    if decoded is None:
        return float("nan")
    return edge_points_to_yaw(decoded["points_3d"], decoded["face_type"])


def decode_edge_yaw_selection_from_prediction(
    pred_41,
    pred_edge_60,
    anchor_xy,
    stride,
    calib,
    score_thr=EDGE_YAW_VALID_VISIBILITY_SCORE_THRESH,
    bbox_xyxy=None,
    img_w=None,
    img_h=None,
    max_lateral_dist_m=None,
    cut_side_min_visible_length_ratio=EDGE_YAW_CUT_SIDE_MIN_VISIBLE_LENGTH_RATIO,
    max_faces=2,
):
    """Select the face-edge geometry used for prediction-time edge-yaw re-estimation.

    The selection intentionally uses a face-based primary face plus an optional strict two-face companion:
    - choose the first face exactly as face-based reconstruction would choose its visible-face anchor
    - then choose at most one companion face from the opposite face family using the stricter threshold
    - for cut states, the cut classification chooses the longitudinal face first
    - for true border-cut objects, prefer the decoded partial side edge over a full side edge
    """
    empty = {
        "yaw": float("nan"),
        "face_types": (),
        "face_is_partial": (),
        "edge_points_2d": None,
        "edge_points_3d": None,
        "two_face_eligible": False,
        "lateral_distance_m": None,
        "lateral_ok": False if max_lateral_dist_m is not None else True,
        "cut_side_visible_length_m": None,
        "cut_side_visible_length_ratio": None,
        "cut_side_visible_ratio_ok": None,
        "is_valid": False,
    }
    if pred_edge_60 is None:
        return empty

    inferred_img_w = float(img_w) if img_w is not None else None
    inferred_img_h = float(img_h) if img_h is not None else None
    if inferred_img_w is None:
        if bbox_xyxy is not None:
            inferred_img_w = max(float(np.asarray(bbox_xyxy, dtype=np.float64)[2]), 1.0)
        else:
            inferred_img_w = max(float((anchor_xy[0] + pred_41[25]) * stride) * 2.0, 1.0)

    decode_visible_faces = list(
        select_pred_visible_faces_for_decode(
            pred_41,
            score_thr=FACE_VISIBILITY_SCORE_THRESH,
            bbox_xyxy=bbox_xyxy,
            img_w=inferred_img_w,
        )
    )
    anchor_face = select_best_score_pred_face_anchor(pred_41, anchor_xy, stride, calib, decode_visible_faces)
    lateral_distance_m = None if anchor_face is None else _prediction_lateral_distance_m_from_center(anchor_face.get("center_3d"))
    lateral_ok = bool(
        max_lateral_dist_m is None or (lateral_distance_m is not None and lateral_distance_m < float(max_lateral_dist_m))
    )
    primary_candidate_face_type = max(
        ((int(face_type), float(score)) for face_type, score in decode_visible_faces if int(face_type) in range(4)),
        key=lambda item: item[1],
        default=(-1, float("-inf")),
    )[0]

    raw_cut_state = get_pred_cut_state(pred_41)
    primary_face = get_pred_cut_primary_face(raw_cut_state)
    visible_faces = list(select_pred_visible_faces(pred_41, score_thr=score_thr))
    longitudinal_faces = {face_type for face_type, _ in visible_faces if face_type in (0, 1)}
    if primary_face in longitudinal_faces and len(longitudinal_faces) > 1:
        visible_faces = [(face_type, score) for face_type, score in visible_faces if face_type not in (0, 1) or face_type == primary_face]

    def _decode_face_candidate(face_type, score, require_in_image=True):
        if face_type not in range(4):
            return None
        decoded = decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride)
        if decoded is None:
            return None
        if require_in_image:
            drawable = _decoded_edge_points_are_drawable(decoded["points_2d"], inferred_img_w, inferred_img_h)
        else:
            # The primary edge should follow face-based anchor selection even when one sample lands just
            # outside the image. Companions stay fully in-image so the strict two-face case remains stable.
            drawable = _decoded_edge_points_are_drawable(decoded["points_2d"])
        if not drawable:
            return None
        points_3d = _decoded_edge_to_points_3d(decoded, calib)
        if points_3d is None:
            return None
        return {
            "face_type": int(face_type),
            "score": float(score),
            "is_partial": False,
            "points_2d": np.asarray(decoded["points_2d"], dtype=np.float32),
            "points_3d": np.asarray(points_3d, dtype=np.float32),
        }

    face_candidates = {}
    for face_type, score in visible_faces:
        candidate = _decode_face_candidate(face_type, score)
        if candidate is not None:
            face_candidates[int(face_type)] = candidate

    primary_candidate = None
    if primary_candidate_face_type in range(4):
        primary_score = next(
            (float(score) for face_type, score in decode_visible_faces if int(face_type) == int(primary_candidate_face_type)),
            float("-inf"),
        )
        primary_candidate = _decode_face_candidate(
            int(primary_candidate_face_type),
            primary_score,
            require_in_image=False,
        )
        if primary_candidate is not None:
            face_candidates.pop(int(primary_candidate_face_type), None)

    resolved_cut_state, cut_side = _resolve_pred_cut_state_for_decode(pred_41, bbox_xyxy=bbox_xyxy, img_w=inferred_img_w)
    partial_candidate = None
    cut_side_visible_length_m = None
    cut_side_visible_length_ratio = None
    cut_side_visible_ratio_ok = None
    if resolved_cut_state != CUT_STATE_NORMAL:
        cut_corners = _reconstruct_pred_corners_for_cut_edge(pred_41, anchor_xy, stride, calib, cut_state=resolved_cut_state)
        partial_edge = decode_cut_partial_side_edge_from_prediction(
            pred_41,
            pred_edge_60,
            anchor_xy,
            stride,
            img_w=inferred_img_w,
            cut_side=cut_side,
            corners_3d=cut_corners,
        )
        if partial_edge is not None and not _decoded_edge_points_are_drawable(
            partial_edge["points_2d"], inferred_img_w, inferred_img_h
        ):
            partial_edge = None
        partial_points_3d = _decoded_edge_to_points_3d(partial_edge, calib)
        cut_side_visible_length_m = _edge_segment_length_3d(partial_points_3d)
        box_length_m = float(abs(pred_41[27])) if np.isfinite(pred_41[27]) else None
        if cut_side_visible_length_m is not None and box_length_m is not None and box_length_m > 1e-6:
            cut_side_visible_length_ratio = float(cut_side_visible_length_m / box_length_m)
            cut_side_visible_ratio_ok = bool(cut_side_visible_length_ratio > float(cut_side_min_visible_length_ratio))
        else:
            cut_side_visible_ratio_ok = False
        if partial_edge is not None and partial_points_3d is not None:
            partial_face_type = int(partial_edge["face_type"])
            partial_score = face_candidates.get(partial_face_type, {}).get("score", 1.0)
            partial_candidate = {
                "face_type": partial_face_type,
                "score": float(partial_score),
                "is_partial": True,
                "points_2d": np.asarray(partial_edge["points_2d"], dtype=np.float32),
                "points_3d": np.asarray(partial_points_3d, dtype=np.float32),
            }
            face_candidates.pop(partial_face_type, None)

    if resolved_cut_state != CUT_STATE_NORMAL and not cut_side_visible_ratio_ok:
        partial_candidate = None

    selected_candidates = []

    def _best_candidate(candidates):
        if not candidates:
            return None
        return max(candidates, key=lambda item: (float(item["score"]), -int(item["face_type"])))

    cut_expected_side_face = None
    if raw_cut_state != CUT_STATE_NORMAL:
        cut_corners_for_side = (
            cut_corners
            if resolved_cut_state != CUT_STATE_NORMAL and cut_corners is not None
            else _reconstruct_pred_corners_for_cut_edge(pred_41, anchor_xy, stride, calib, cut_state=raw_cut_state)
        )
        cut_expected_side_face = get_cut_object_side_face(raw_cut_state, corners_3d=cut_corners_for_side)

    if primary_candidate is not None:
        selected_candidates.append(primary_candidate)

    if len(selected_candidates) < int(max_faces):
        secondary_candidate = None
        if primary_candidate is not None and int(primary_candidate["face_type"]) in (0, 1):
            secondary_candidate = partial_candidate
            if secondary_candidate is None and cut_expected_side_face in (2, 3) and (resolved_cut_state == CUT_STATE_NORMAL or cut_side_visible_ratio_ok):
                secondary_candidate = face_candidates.pop(int(cut_expected_side_face), None)
            if secondary_candidate is None and (resolved_cut_state == CUT_STATE_NORMAL or cut_side_visible_ratio_ok):
                secondary_candidate = _best_candidate([candidate for candidate in face_candidates.values() if candidate["face_type"] in (2, 3)])
                if secondary_candidate is not None:
                    face_candidates.pop(int(secondary_candidate["face_type"]), None)
        elif primary_candidate is not None and int(primary_candidate["face_type"]) in (2, 3):
            longitudinal_candidate = None
            if primary_face is not None:
                longitudinal_candidate = face_candidates.pop(int(primary_face), None)
            if longitudinal_candidate is None:
                longitudinal_candidate = _best_candidate([candidate for candidate in face_candidates.values() if candidate["face_type"] in (0, 1)])
                if longitudinal_candidate is not None:
                    face_candidates.pop(int(longitudinal_candidate["face_type"]), None)
            secondary_candidate = longitudinal_candidate
        if secondary_candidate is not None:
            selected_candidates.append(secondary_candidate)

    if not selected_candidates:
        return {
            **empty,
            "cut_side_visible_length_m": cut_side_visible_length_m,
            "cut_side_visible_length_ratio": cut_side_visible_length_ratio,
            "cut_side_visible_ratio_ok": cut_side_visible_ratio_ok,
        }

    edge_points_3d = _stack_edge_batches([candidate["points_3d"] for candidate in selected_candidates])
    edge_points_2d = _stack_edge_batches([candidate["points_2d"] for candidate in selected_candidates])
    face_types = tuple(int(candidate["face_type"]) for candidate in selected_candidates)
    face_is_partial = tuple(bool(candidate.get("is_partial", False)) for candidate in selected_candidates)

    if len(selected_candidates) >= 2:
        longitudinal_selected = next((candidate for candidate in selected_candidates if candidate["face_type"] in (0, 1)), None)
        side_selected = next((candidate for candidate in selected_candidates if candidate["face_type"] in (2, 3)), None)
        yaw = _estimate_two_edge_yaw_from_candidates(
            longitudinal_selected,
            side_selected,
            reference_yaw=_decode_yaw_from_prediction(pred_41),
        )
        if not np.isfinite(yaw):
            yaw = visible_face_edges_to_yaw(
                {candidate["face_type"]: candidate["points_3d"] for candidate in selected_candidates},
                face_scores={candidate["face_type"]: candidate["score"] for candidate in selected_candidates},
            )
    else:
        only_candidate = selected_candidates[0]
        if raw_cut_state in (CUT_STATE_IN, CUT_STATE_OUT):
            yaw = _estimate_single_edge_yaw_with_cut_primary_face(
                only_candidate,
                cut_state=raw_cut_state,
                reference_yaw=_decode_yaw_from_prediction(pred_41),
            )
            if not np.isfinite(yaw):
                yaw = edge_points_to_yaw(only_candidate["points_3d"], only_candidate["face_type"])
        else:
            yaw = edge_points_to_yaw(only_candidate["points_3d"], only_candidate["face_type"])

    has_longitudinal = any(candidate["face_type"] in (0, 1) for candidate in selected_candidates)
    has_side = any(candidate["face_type"] in (2, 3) for candidate in selected_candidates)
    two_face_eligible = len(selected_candidates) >= 2 and has_longitudinal and has_side
    is_valid = bool(two_face_eligible and np.isfinite(yaw) and lateral_ok)
    return {
        "yaw": float(yaw),
        "face_types": face_types,
        "face_is_partial": face_is_partial,
        "edge_points_2d": edge_points_2d,
        "edge_points_3d": edge_points_3d,
        "two_face_eligible": bool(two_face_eligible),
        "lateral_distance_m": lateral_distance_m,
        "lateral_ok": lateral_ok,
        "cut_side_visible_length_m": cut_side_visible_length_m,
        "cut_side_visible_length_ratio": cut_side_visible_length_ratio,
        "cut_side_visible_ratio_ok": cut_side_visible_ratio_ok,
        "is_valid": bool(is_valid),
    }


def decode_multi_visible_face_yaw_from_prediction(
    pred_41,
    pred_edge_60,
    anchor_xy,
    stride,
    calib,
    fallback_face_type=None,
    score_thr=FACE_VISIBILITY_SCORE_THRESH,
    bbox_xyxy=None,
    img_w=None,
):
    """Decode visible-face yaw using the same direct two-edge logic as prediction-time edge-yaw selection."""
    if pred_edge_60 is None:
        return (
            decode_visible_face_yaw_from_prediction(pred_41, pred_edge_60, anchor_xy, stride, fallback_face_type, calib)
            if fallback_face_type in range(4)
            else float("nan")
        )

    inferred_img_w = float(img_w) if img_w is not None else None
    if inferred_img_w is None:
        if bbox_xyxy is not None:
            inferred_img_w = max(float(np.asarray(bbox_xyxy, dtype=np.float64)[2]), 1.0)
        else:
            inferred_img_w = max(float((anchor_xy[0] + pred_41[25]) * stride) * 2.0, 1.0)

    selection = decode_edge_yaw_selection_from_prediction(
        pred_41,
        pred_edge_60,
        anchor_xy,
        stride,
        calib,
        score_thr=score_thr,
        bbox_xyxy=bbox_xyxy,
        img_w=inferred_img_w,
    )
    if selection.get("two_face_eligible") and np.isfinite(selection.get("yaw", float("nan"))):
        return float(selection["yaw"])

    face_edges_3d, face_scores = {}, {}
    for face_type, score in select_pred_visible_faces_for_decode(
        pred_41, score_thr=score_thr, bbox_xyxy=bbox_xyxy, img_w=inferred_img_w
    ):
        decoded = decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride)
        points_3d = _decoded_edge_to_points_3d(decoded, calib)
        if points_3d is None:
            continue
        face_edges_3d[face_type] = points_3d
        face_scores[face_type] = float(score)

    if fallback_face_type in range(4):
        return decode_visible_face_yaw_from_prediction(pred_41, pred_edge_60, anchor_xy, stride, fallback_face_type, calib)
    return visible_face_edges_to_yaw(face_edges_3d, face_scores=face_scores)


def decode_multi_visible_face_yaw_from_gt(
    target_42,
    cls_id,
    calib,
    img_w,
    img_h,
    face_3d_classes,
    complete_3d_classes,
    fallback_face_type=None,
    score_thr=FACE_VISIBILITY_SCORE_THRESH,
    bbox_xyxy=None,
):
    """Decode visible-face yaw from GT edge geometry with the same direct two-edge logic."""
    face_edges_3d, face_scores = {}, {}
    for face_type, face in select_gt_visible_faces(target_42, score_thr=score_thr):
        decoded = decode_visible_face_edge_from_gt(
            target_42,
            cls_id,
            calib,
            img_w,
            img_h,
            face_3d_classes,
            complete_3d_classes,
            face_type=face_type,
            score_thr=score_thr,
            bbox_xyxy=bbox_xyxy,
        )
        if decoded is None:
            continue
        face_edges_3d[decoded["face_type"]] = decoded["points_3d"]
        face_scores[decoded["face_type"]] = float(face[6])

    partial_edge = decode_cut_partial_side_edge_from_gt(
        target_42,
        cls_id,
        calib,
        img_w,
        img_h,
        face_3d_classes,
        complete_3d_classes,
        bbox_xyxy=bbox_xyxy,
        score_thr=score_thr,
    )
    if partial_edge is not None:
        face_edges_3d[partial_edge["face_type"]] = partial_edge["points_3d"]
        face_scores[partial_edge["face_type"]] = max(face_scores.get(partial_edge["face_type"], 0.0), 1.0)

    if len(face_edges_3d) >= 2:
        yaw = visible_face_edges_to_yaw(face_edges_3d, face_scores=face_scores)
        if np.isfinite(yaw):
            return yaw

    if fallback_face_type in range(4):
        return decode_visible_face_yaw_from_gt(
            target_42,
            cls_id,
            calib,
            img_w,
            img_h,
            face_3d_classes,
            complete_3d_classes,
            fallback_face_type,
            score_thr=score_thr,
            bbox_xyxy=bbox_xyxy,
        )
    return visible_face_edges_to_yaw(face_edges_3d, face_scores=face_scores)


def _back_project_metric_point(u, v, z, calib):
    """Back-project a metric point to 3D center coordinates."""
    if calib is not None and z > 0:
        center_3d = back_project_2d_to_3d((u, v), z, calib)
        if center_3d is None:
            x3d, y3d = float("nan"), float("nan")
        else:
            x3d, y3d = center_3d[0], center_3d[1]
    else:
        x3d, y3d = float("nan"), float("nan")
    return np.array([x3d, y3d, z], dtype=np.float32)


def select_gt_visible_faces(target_42, score_thr=FACE_VISIBILITY_SCORE_THRESH):
    """Return GT-visible faces eligible for face-based metrics."""
    selected = []
    for face_type, off in enumerate(FACE_OFFSETS_42):
        face = target_42[off : off + 8]
        is_vis, score = face[7], face[6]
        if is_vis != 1 or np.isnan(score) or score < score_thr:
            continue
        if np.isnan(face[2]) or face[2] <= 0:
            continue
        selected.append((face_type, face))
    return selected


def select_pred_visible_faces(pred_41, score_thr=FACE_VISIBILITY_SCORE_THRESH):
    """Return predicted visible faces whose scores clear the face-metric threshold."""
    selected = []
    for face_type, off in enumerate(FACE_OFFSETS_41):
        score = float(pred_41[off + 5])
        if np.isnan(score) or score < score_thr:
            continue
        selected.append((face_type, score))
    return selected


def is_gt_face_cut(target_42, face_type):
    """Return whether a GT face was invalidated by crop handling."""
    if face_type not in range(4):
        return False
    off = FACE_OFFSETS_42[face_type]
    face = target_42[off : off + 8]
    return np.all(face[:6] == -1) and face[7] <= 0


def is_gt_cut_object(target_42):
    """Return whether a GT face-based object is labeled as cut-in or cut-out."""
    f_cut = is_gt_face_cut(target_42, 0)
    r_cut = is_gt_face_cut(target_42, 1)
    l_cut = is_gt_face_cut(target_42, 2)
    ri_cut = is_gt_face_cut(target_42, 3)
    return (r_cut and l_cut and ri_cut) or (f_cut and l_cut and ri_cut)


def extract_3d_attrs_from_prediction(pred_41, anchor_xy, stride, calib, face_type=None, pred_edge_60=None):
    """Extract raw 3D attributes from a single 41-dim denormalized prediction.

    Args:
        pred_41: Denormalized prediction.
        anchor_xy: Anchor point in grid coordinates.
        stride: Anchor stride.
        calib: Per-sample calibration.
        face_type: Optional face index (0-3). When provided, decode depth/UV from the matching face branch.
        pred_edge_60: Optional denormalized auxiliary edge prediction aligned to the same anchor.

    Returns:
        Dict with center, depth, dims, yaw, uv, and edge_yaw, or None if the requested branch is invalid.
    """
    p = pred_41
    rot_y = _decode_yaw_from_prediction(p)
    dims = p[27:30].astype(np.float32)

    if face_type is None:
        z = float(p[24])
        uv_offset = p[25:27]
        edge_yaw = float("nan")
    else:
        off = FACE_OFFSETS_41[face_type]
        z = float(p[off])
        uv_offset = p[off + 1 : off + 3]
        edge_yaw = decode_multi_visible_face_yaw_from_prediction(
            p,
            pred_edge_60,
            anchor_xy,
            stride,
            calib,
            fallback_face_type=face_type,
        )

    u = float((anchor_xy[0] + uv_offset[0]) * stride)
    v = float((anchor_xy[1] + uv_offset[1]) * stride)
    center = _back_project_metric_point(u, v, z, calib)
    return {
        "center": center,
        "depth": z,
        "dims": dims,
        "yaw": float(rot_y),
        "edge_yaw": float(edge_yaw),
        "uv": np.array([u, v], dtype=np.float32),
        "visible_face_type": None if face_type is None else int(face_type),
        "face_center": None if face_type is None else center,
    }


def face_center_from_corners(corners_3d, face_type):
    """Return the center point of one face from 3D box corners."""
    if corners_3d is None or face_type not in FACE_CORNERS:
        return None
    corners = np.asarray(corners_3d, dtype=np.float32)
    if corners.shape != (8, 3) or not np.isfinite(corners).all():
        return None
    return corners[list(FACE_CORNERS[face_type])].mean(axis=0)


def rebuild_box_corners_for_visualization(
    corners_3d,
    dims,
    yaw,
    visible_face_type=None,
    face_center_3d=None,
    box_center_3d=None,
):
    """Rebuild box corners for visualization while preserving the appropriate anchor.

    Face-based objects stay anchored on the selected visible face center. Whole-box objects stay anchored
    on the geometric box center.
    """
    dims_arr = np.asarray(dims, dtype=np.float32)
    if dims_arr.shape != (3,) or not np.isfinite(dims_arr).all() or not np.isfinite(float(yaw)):
        return None

    if visible_face_type is not None:
        if face_center_3d is None:
            face_center_3d = face_center_from_corners(corners_3d, int(visible_face_type))
        else:
            face_center_3d = np.asarray(face_center_3d, dtype=np.float32)
        if face_center_3d is None or face_center_3d.shape != (3,) or not np.isfinite(face_center_3d).all():
            return None
        return compute_3d_box_corners(face_center_3d, dims_arr, float(yaw), face_type=int(visible_face_type))

    if box_center_3d is not None:
        box_center_3d = np.asarray(box_center_3d, dtype=np.float32)
        if box_center_3d.shape != (3,) or not np.isfinite(box_center_3d).all():
            return None
        return compute_3d_box_corners(box_center_3d, dims_arr, float(yaw), face_type=-1)

    corners = np.asarray(corners_3d, dtype=np.float32)
    if corners.shape != (8, 3) or not np.isfinite(corners).all():
        return None
    return compute_3d_box_corners(corners.mean(axis=0), dims_arr, float(yaw), face_type=-1)


def extract_3d_attrs_from_gt(
    target_42,
    cls_id,
    calib,
    img_w,
    img_h,
    face_3d_classes,
    complete_3d_classes,
    face_type=None,
    score_thr=FACE_VISIBILITY_SCORE_THRESH,
):
    """Extract raw 3D attributes from a single 42-dim GT label.

    Args:
        target_42: GT 42-dim label.
        cls_id: Integer class ID.
        calib: Per-sample calibration.
        img_w: Image width in pixels.
        img_h: Image height in pixels.
        face_3d_classes: Class IDs that use face annotations.
        complete_3d_classes: Class IDs with whole-box-only 3D labels.
        face_type: Optional face index (0-3). When provided, decode only that GT-visible face.
        score_thr: Minimum visible-face score used to treat a GT face as valid.

    Returns:
        Dict with center, depth, dims, yaw, uv, and edge_yaw, or None if the requested representation is invalid.
    """
    t = target_42
    z3d = t[2]
    if np.isnan(z3d) or z3d <= 0:
        return None

    if cls_id not in face_3d_classes and cls_id not in complete_3d_classes:
        return None

    depth_scale = calib.get("depth_scale", 1.0) if calib else 1.0
    dims = t[3:6].astype(np.float32)
    rot_y = float(t[6])
    edge_yaw = float("nan")

    if face_type is None:
        z = float(z3d * depth_scale)
        u = float(t[7] * img_w)
        v = float(t[8] * img_h)
    else:
        if cls_id not in face_3d_classes or face_type not in range(4):
            return None
        face = t[FACE_OFFSETS_42[face_type] : FACE_OFFSETS_42[face_type] + 8]
        is_vis, score = face[7], face[6]
        if is_vis != 1 or np.isnan(score) or score < score_thr:
            return None
        if np.isnan(face[2]) or face[2] <= 0:
            return None
        z = float(face[2] * depth_scale)
        u = float(face[4] * img_w)
        v = float(face[5] * img_h)
        edge_yaw = decode_multi_visible_face_yaw_from_gt(
            t,
            cls_id,
            calib,
            img_w,
            img_h,
            face_3d_classes,
            complete_3d_classes,
            fallback_face_type=face_type,
            score_thr=score_thr,
        )

    center = _back_project_metric_point(u, v, z, calib)
    return {
        "center": center,
        "depth": z,
        "dims": dims,
        "yaw": rot_y,
        "edge_yaw": float(edge_yaw),
        "uv": np.array([u, v], dtype=np.float32),
        "visible_face_type": None if face_type is None else int(face_type),
        "face_center": None if face_type is None else center,
    }


# ---- Bird's Eye View (BEV) visualization ----

def draw_bev_blank(max_range=200, lateral_range=50):
    """Create blank BEV canvas with distance grid.

    Args:
        max_range: Forward range in meters.
        lateral_range: Lateral range in meters (±lateral_range).

    Returns:
        (bev_img, pixels_per_meter, ego_center_x, ego_center_y) tuple.
    """
    ppm = 10  # pixels per meter
    w = lateral_range * 2 * ppm
    h = max_range * ppm
    bev = np.ones((h, w, 3), dtype=np.uint8) * 40  # dark gray background

    ego_cx = w // 2
    ego_cy = h  # bottom center

    # Draw grid lines
    for d in range(0, max_range + 1, 20):
        y = ego_cy - d * ppm
        if 0 <= y < h:
            cv2.line(bev, (0, y), (w, y), (80, 80, 80), 1)
            cv2.putText(bev, f"{d}m", (5, y - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (150, 150, 150), 1)

    for l in range(-lateral_range, lateral_range + 1, 10):
        x = ego_cx + l * ppm
        if 0 <= x < w:
            cv2.line(bev, (x, 0), (x, h), (80, 80, 80), 1)

    # Ego vehicle marker
    cv2.rectangle(bev, (ego_cx - 8, ego_cy - 20), (ego_cx + 8, ego_cy), (255, 200, 0), -1)

    return bev, ppm, ego_cx, ego_cy


def draw_bev_object(bev_img, center_3d, dims, rot_y, ppm, ego_cx, ego_cy, is_pred=True):
    """Draw a single object on BEV image.

    Args:
        bev_img: BEV canvas image.
        center_3d: (x, y, z) in camera coordinates (x=right, z=forward).
        dims: (l, h, w) dimensions.
        rot_y: Rotation angle in radians.
        ppm: Pixels per meter.
        ego_cx: Ego center x in pixels.
        ego_cy: Ego center y in pixels.
        is_pred: True for predictions (red), False for GT (green).
    """
    x, _, z = center_3d
    l, _, w = dims

    if not (np.isfinite(x) and np.isfinite(z) and z > 0):
        return

    # Camera coords: x=right, z=forward → BEV: right=+x, up=+z
    bev_x = int(ego_cx + x * ppm)
    bev_y = int(ego_cy - z * ppm)

    if not (0 <= bev_x < bev_img.shape[1] and 0 <= bev_y < bev_img.shape[0]):
        return

    color = (0, 0, 255) if is_pred else (0, 200, 0)  # Red for pred, green for GT

    # Draw rotated rectangle
    rect = ((bev_x, bev_y), (int(w * ppm), int(l * ppm)), -np.degrees(rot_y))
    box_pts = cv2.boxPoints(rect).astype(np.intp)
    cv2.drawContours(bev_img, [box_pts], 0, color, 2)

    # Arrow showing forward direction
    dx = int(l * 0.5 * ppm * np.sin(rot_y))
    dy = int(-l * 0.5 * ppm * np.cos(rot_y))
    cv2.arrowedLine(bev_img, (bev_x, bev_y), (bev_x + dx, bev_y + dy), color, 1, tipLength=0.3)


def create_bev_image(gt_3d_attrs_list, pred_3d_attrs_list, max_range=200, lateral_range=50):
    """Create BEV visualization with GT (green) and predictions (red).

    Args:
        gt_3d_attrs_list: List of dicts with center, dims, yaw (from extract_3d_attrs_from_gt).
        pred_3d_attrs_list: List of dicts with center, dims, yaw (from extract_3d_attrs_from_prediction).
        max_range: Forward range in meters.
        lateral_range: Lateral range in meters.

    Returns:
        RGB numpy image (H, W, 3).
    """
    bev, ppm, ego_cx, ego_cy = draw_bev_blank(max_range, lateral_range)

    # Draw GT objects (green, draw first so predictions overlay)
    for attrs in gt_3d_attrs_list:
        if attrs is not None:
            draw_bev_object(bev, attrs["center"], attrs["dims"], attrs["yaw"],
                            ppm, ego_cx, ego_cy, is_pred=False)

    # Draw predicted objects (red)
    for attrs in pred_3d_attrs_list:
        if attrs is not None:
            draw_bev_object(bev, attrs["center"], attrs["dims"], attrs["yaw"],
                            ppm, ego_cx, ego_cy, is_pred=True)

    # Add legend
    cv2.putText(bev, "GT", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2)
    cv2.putText(bev, "Pred", (10, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    return cv2.cvtColor(bev, cv2.COLOR_BGR2RGB)