yolov26_3d/tools/scripts_for_gt/visualization/plots_3d.py

"""3D visualization plotting utilities.

Self-contained module extracted from yolov5-3d/utils/plots.py.
Contains only the functions required by visualize_single_frame.py and
visualize_batch.py for ground-truth 3D visualization, with no cross-repo
dependencies.
"""

import cv2
import numpy as np
import torch
from ultralytics.utils.plotting import Annotator  # noqa: F401  (re-exported)


# ---------------------------------------------------------------------------
# Color helpers
# ---------------------------------------------------------------------------

class Colors:
    """Provides an RGB color palette derived from Ultralytics color scheme."""

    def __init__(self):
        hexs = (
            "FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231",
            "48F90A", "92CC17", "3DDB86", "1A9334", "00D4BB",
            "2C99A8", "00C2FF", "344593", "6473FF", "0018EC",
            "8438FF", "520085", "CB38FF", "FF95C8", "FF37C7",
        )
        self.palette = [self.hex2rgb(f"#{c}") for c in hexs]
        self.n = len(self.palette)

    def __call__(self, i, bgr=False):
        """Return colour for index ``i`` (BGR if ``bgr=True``, else RGB)."""
        c = self.palette[int(i) % self.n]
        return (c[2], c[1], c[0]) if bgr else c

    @staticmethod
    def hex2rgb(h):
        """Convert hex colour string to (R, G, B) tuple."""
        return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))


colors = Colors()  # module-level singleton; callers: from plots_3d import colors


# ---------------------------------------------------------------------------
# 3D geometry helpers
# ---------------------------------------------------------------------------

def rotation_3d_in_axis(points, angles, axis=0):
    """Rotate *points* around a specified camera-frame axis.

    Args:
        points (np.ndarray): (N, 3) array of 3D points.
        angles (float): Rotation angle in radians.
        axis (int): 0=X, 1=Y, 2=Z.

    Returns:
        np.ndarray: Rotated points (N, 3).
    """
    rot_sin = np.sin(angles)
    rot_cos = np.cos(angles)
    ones = np.ones_like(rot_cos)
    zeros = np.zeros_like(rot_cos)

    if axis == 1:
        rot_mat = np.stack([
            np.stack([rot_cos, zeros, -rot_sin]),
            np.stack([zeros, ones, zeros]),
            np.stack([rot_sin, zeros, rot_cos]),
        ])
    elif axis == 2:
        rot_mat = np.stack([
            np.stack([rot_cos, rot_sin, zeros]),
            np.stack([-rot_sin, rot_cos, zeros]),
            np.stack([zeros, zeros, ones]),
        ])
    elif axis == 0:
        rot_mat = np.stack([
            np.stack([ones, zeros, zeros]),
            np.stack([zeros, rot_cos, rot_sin]),
            np.stack([zeros, -rot_sin, rot_cos]),
        ])
    else:
        raise ValueError(f"axis must be in {{0, 1, 2}}, got {axis}")

    return np.dot(points, rot_mat)


def compute_3d_box_corners_4face(center_3d, dimensions, rotation, face_type=0):
    """Compute the 8 corners of a 3D bounding box from a face-center point.

    Args:
        center_3d (array-like): (x, y, z) centre of the specified face in camera coords.
        dimensions (array-like): (length, height, width) of the box in metres.
        rotation (float): rot_y — rotation about the Y axis in radians.
        face_type (int): 0=front, 1=tail/rear, 2=left, 3=right, -1=box centre.

    Returns:
        np.ndarray: (8, 3) corner coordinates in camera frame.
    """
    l, h, w = dimensions

    corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1).astype(np.float64)
    corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]

    offsets = {
        1: [0, 0.5, 0.5],    # tail
        0: [1, 0.5, 0.5],    # front
        3: [0.5, 0.5, 0],    # right
        2: [0.5, 0.5, 1],    # left
        -1: [0.5, 0.5, 0.5], # whole centre
    }
    corners_norm = corners_norm - offsets.get(face_type, offsets[-1])

    corners = np.array([l, h, w]).reshape(1, 3) * corners_norm.reshape(8, 3)
    corners = rotation_3d_in_axis(corners, rotation, axis=1)
    corners += np.array(center_3d).reshape(1, 3)
    return corners


# ---------------------------------------------------------------------------
# Fisheye (KB) distortion helpers
# ---------------------------------------------------------------------------

def apply_fisheye_distortion(x, y, distort_coeffs):
    """Apply Kannala-Brandt fisheye distortion to normalised camera coordinates.

    Args:
        x (float): Normalised x coordinate (x3d / z3d).
        y (float): Normalised y coordinate (y3d / z3d).
        distort_coeffs (list): [k1, k2, k3, k4] KB coefficients.

    Returns:
        tuple[float, float]: Distorted normalised coordinates (xd, yd).
    """
    if not distort_coeffs or len(distort_coeffs) < 4:
        return x, y

    k1, k2, k3, k4 = distort_coeffs[:4]
    r = np.sqrt(x * x + y * y)
    if r < 1e-8:
        return x, y

    theta = np.arctan(r)
    theta_d = theta * (1 + k1 * theta**2 + k2 * theta**4 + k3 * theta**6 + k4 * theta**8)
    scale = theta_d / r
    return x * scale, y * scale


def remove_fisheye_distortion(xd, yd, distort_coeffs, max_iter=20):
    """Remove KB fisheye distortion from normalised camera coordinates.

    Args:
        xd (float): Distorted normalised x coordinate.
        yd (float): Distorted normalised y coordinate.
        distort_coeffs (list): [k1, k2, k3, k4] KB coefficients.
        max_iter (int): Maximum Newton-Raphson iterations.

    Returns:
        tuple[float, float]: Undistorted normalised coordinates (xn, yn).
    """
    if not distort_coeffs or len(distort_coeffs) < 4:
        return xd, yd

    k1, k2, k3, k4 = distort_coeffs[:4]
    r_d = np.sqrt(xd * xd + yd * yd)
    if r_d < 1e-8:
        return xd, yd

    theta_d = r_d
    theta = theta_d / (1 + k1 * theta_d * theta_d)

    for _ in range(max_iter):
        theta2 = theta * theta
        theta4 = theta2 * theta2
        theta6 = theta4 * theta2
        theta8 = theta4 * theta4

        f = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8) - theta_d
        f_prime = 1 + 3 * k1 * theta2 + 5 * k2 * theta4 + 7 * k3 * theta6 + 9 * k4 * theta8

        theta_new = theta - f / f_prime
        if abs(theta_new - theta) < 1e-8:
            theta = theta_new
            break
        theta = theta_new

    r = np.tan(theta)
    scale = r / r_d
    return xd * scale, yd * scale


# ---------------------------------------------------------------------------
# 3D-to-2D projection helpers
# ---------------------------------------------------------------------------

def project_3d_to_2d_with_distortion(points_3d, calib):
    """Project 3D points to 2D using KB fisheye camera calibration.

    Args:
        points_3d (np.ndarray): (N, 3) points in camera coordinates.
        calib (dict): Camera parameters ``fx``, ``fy``, ``cx``, ``cy``,
            and optional ``distort_coeffs``.

    Returns:
        np.ndarray: (N, 2) image coordinates (NaN for behind-camera points).
    """
    fx, fy = calib['fx'], calib['fy']
    cx, cy = calib['cx'], calib['cy']
    distort_coeffs = calib.get('distort_coeffs', [])

    points_2d = []
    for x, y, z in points_3d:
        if z > 0.1:
            xn, yn = x / z, y / z
            xd, yd = apply_fisheye_distortion(xn, yn, distort_coeffs)
            points_2d.append([fx * xd + cx, fy * yd + cy])
        else:
            points_2d.append([np.nan, np.nan])

    return np.array(points_2d)


def sample_3d_edge(p1, p2, num_samples=10):
    """Uniformly sample *num_samples* points along the 3D edge from *p1* to *p2*.

    Args:
        p1 (array-like): Start point (x, y, z).
        p2 (array-like): End point (x, y, z).
        num_samples (int): Number of sample points.

    Returns:
        np.ndarray: (num_samples, 3) sampled 3D points.
    """
    t = np.linspace(0, 1, num_samples).reshape(-1, 1)
    return p1 + t * (p2 - p1)


def project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=10):
    """Project 3D box edges to 2D by sampling, handling fisheye distortion.

    Args:
        corners_3d (np.ndarray): (8, 3) 3D corner coordinates.
        calib (dict): Camera calibration dict.
        samples_per_edge (int): Number of samples per edge.

    Returns:
        dict: Mapping edge_name → (N, 2) 2D projected points.
    """
    edges = {
        'back_0': (4, 5), 'back_1': (5, 6), 'back_2': (6, 7), 'back_3': (7, 4),
        'connect_0': (0, 4), 'connect_1': (1, 5), 'connect_2': (2, 6), 'connect_3': (3, 7),
        'front_0': (0, 1), 'front_1': (1, 2), 'front_2': (2, 3), 'front_3': (3, 0),
        'front_x1': (0, 2), 'front_x2': (1, 3),
    }

    return {
        name: project_3d_to_2d_with_distortion(
            sample_3d_edge(corners_3d[i], corners_3d[j], samples_per_edge), calib
        )
        for name, (i, j) in edges.items()
    }


def plot_box3d_on_img_with_distortion(img, edge_points_2d,
                                      color_front=(255, 0, 0),
                                      color_back=(0, 0, 255),
                                      color_side=(0, 255, 255),
                                      thickness=1):
    """Draw a 3D box on *img* using pre-projected edge point lists (fisheye-aware).

    Args:
        img (np.ndarray): BGR image to draw on.
        edge_points_2d (dict): Output of :func:`project_3d_box_edges_with_distortion`.
        color_front (tuple): BGR colour for front-face edges.
        color_back (tuple): BGR colour for back-face edges.
        color_side (tuple): BGR colour for side connecting edges.
        thickness (int): Line thickness in pixels.

    Returns:
        np.ndarray: Modified image.
    """
    front_edges = {'front_0', 'front_1', 'front_2', 'front_3', 'front_x1', 'front_x2'}
    back_edges = {'back_0', 'back_1', 'back_2', 'back_3'}

    for edge_name, points in edge_points_2d.items():
        if np.any(np.isnan(points)):
            continue
        pts = points.astype(np.int32)
        if edge_name in front_edges:
            color = color_front
        elif edge_name in back_edges:
            color = color_back
        else:
            color = color_side
        cv2.polylines(img, [pts], isClosed=False, color=color, thickness=thickness, lineType=cv2.LINE_AA)

    return img


def project_3d_to_2d_with_calib(points_3d, calib):
    """Project 3D points to 2D using pinhole calibration (no distortion).

    Args:
        points_3d (np.ndarray): (N, 3) points in camera coordinates.
        calib (dict): Camera parameters ``fx``, ``fy``, ``cx``, ``cy``.

    Returns:
        np.ndarray: (N, 2) image coordinates (NaN for behind-camera points).
    """
    fx, fy = calib['fx'], calib['fy']
    cx, cy = calib['cx'], calib['cy']

    points_2d = []
    for x, y, z in points_3d:
        if z > 0.1:
            points_2d.append([fx * x / z + cx, fy * y / z + cy])
        else:
            points_2d.append([np.nan, np.nan])

    return np.array(points_2d)


def project_3d_to_2d_simple(points_3d, img_size):
    """Project 3D points to 2D using a simple estimated pinhole model.

    Args:
        points_3d (np.ndarray): (N, 3) points in camera coordinates.
        img_size (tuple[int, int]): ``(width, height)`` of the image.

    Returns:
        np.ndarray: (N, 2) image coordinates.
    """
    w, h = img_size
    fx = fy = w * 1.2
    cx, cy = w / 2, h / 2

    points_2d = []
    for x, y, z in points_3d:
        if z > 0.1:
            points_2d.append([fx * x / z + cx, fy * y / z + cy])
        else:
            points_2d.append([np.nan, np.nan])

    return np.array(points_2d)


def plot_box3d_on_img(img, corners_2d,
                      color_front=(255, 0, 0),
                      color_back=(0, 0, 255),
                      color_side=(0, 255, 255),
                      thickness=1):
    """Draw a 3D bounding box on *img* from 2D projected corners.

    Args:
        img (np.ndarray): BGR image to draw on.
        corners_2d (np.ndarray): (8, 2) projected corner coordinates.
        color_front (tuple): BGR colour for front-face edges (indices 0-3).
        color_back (tuple): BGR colour for back-face edges (indices 4-7).
        color_side (tuple): BGR colour for connecting side edges.
        thickness (int): Line thickness in pixels.

    Returns:
        np.ndarray: Modified image.
    """
    line_indices = (
        (4, 5), (5, 6), (6, 7), (7, 4),                  # back face
        (0, 4), (1, 5), (2, 6), (3, 7),                   # side edges
        (0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3),  # front face + X mark
    )
    front_edges = {(0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3)}
    back_edges = {(4, 5), (5, 6), (6, 7), (7, 4)}
    corners = corners_2d.astype(np.int32)

    for start, end in line_indices:
        try:
            pt1 = (corners[start, 0], corners[start, 1])
            pt2 = (corners[end, 0], corners[end, 1])
            if (start, end) in front_edges:
                cv2.line(img, pt1, pt2, color_front, thickness, cv2.LINE_AA)
            elif (start, end) in back_edges:
                cv2.line(img, pt1, pt2, color_back, thickness, cv2.LINE_AA)
            else:
                cv2.line(img, pt1, pt2, color_side, thickness, cv2.LINE_AA)
        except Exception:
            pass

    return img


# ---------------------------------------------------------------------------
# 3D box reconstruction from target label format
# ---------------------------------------------------------------------------

def _reconstruct_3d_box_from_face(face_uv, face_z, dims, rot_y, face_type, calib):
    """Reconstruct 3D box corners from a visible face centre.

    Args:
        face_uv (tuple[float, float]): Pixel coordinates (u, v) of the face centre.
        face_z (float): Depth of the face centre in metres.
        dims (array-like): (length, height, width) in metres.
        rot_y (float): Yaw rotation in radians.
        face_type (int): 0=front, 1=rear, 2=left, 3=right.
        calib (dict): Camera calibration dict.

    Returns:
        tuple[np.ndarray, list] | None: ``(corners_3d, object_3d)`` or ``None`` on failure.
    """
    if calib is None:
        return None

    fx, fy = calib['fx'], calib['fy']
    cx, cy = calib['cx'], calib['cy']
    distort_coeffs = calib.get('distort_coeffs', [])

    u_face, v_face = face_uv
    xd = (u_face - cx) / fx
    yd = (v_face - cy) / fy

    if distort_coeffs and len(distort_coeffs) >= 4:
        xn, yn = remove_fisheye_distortion(xd, yd, distort_coeffs)
    else:
        xn, yn = xd, yd

    l, h, w = dims
    if np.isnan(l) or np.isnan(h) or np.isnan(w) or np.isnan(rot_y):
        return None

    face_center_3d = np.array([xn * face_z, yn * face_z, face_z])
    corners_3d = compute_3d_box_corners_4face(face_center_3d, dims, rot_y, face_type=face_type)
    object_3d = [face_center_3d[0], face_center_3d[1], face_center_3d[2], l, h, w, rot_y, face_type]
    return corners_3d, object_3d


def decode_and_reconstruct_3d_box_from_target(target, calib, img_width, img_height,
                                               face_3d_classes=None, complete_3d_classes=None):
    """Decode a ground-truth target vector and reconstruct its 3D box.

    The target array follows the 48-column format used in ``YOLOGround3DDataset``:

    * col 0  — image index
    * col 1  — class id
    * cols 2-5 — normalised 2D bbox [x, y, w, h]
    * cols 6-8 — 3D centre [x3d, y3d, z3d] in camera coords
    * cols 9-11 — dimensions [l, h, w]
    * col 12 — rot_y
    * cols 13-14 — normalised UV projection of 3D centre
    * col 15 — alpha
    * cols 16-23 — front face [x3d, y3d, z3d, alpha, xc, yc, score, visible]
    * cols 24-31 — rear face (same layout)
    * cols 32-39 — left face
    * cols 40-47 — right face

    Args:
        target (np.ndarray): (48,) label vector.
        calib (dict): Camera calibration dict; must contain ``depth_scale``.
        img_width (int): Image width in pixels.
        img_height (int): Image height in pixels.
        face_3d_classes (list | None): Class IDs using face-based reconstruction.
            Defaults to ``[0, 13]`` (vehicles, tricycles).
        complete_3d_classes (list | None): Class IDs using complete-box
            reconstruction.  Defaults to ``[1, 2, 3]`` (pedestrians, bicycles,
            riders).

    Returns:
        dict | None: Result dict with keys ``should_draw``, ``cls``,
        ``corners_3d``, ``face_center_2d``, ``face_color``, ``center_2d``,
        ``object_3d``; or ``None`` if the target is invalid / unsupported.
    """
    if face_3d_classes is None:
        face_3d_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8]  # vehicles
    if complete_3d_classes is None:
        complete_3d_classes = [9, 10, 11, 12]  # pedestrian / cyclists

    if len(target) < 15 or np.isnan(target[1]):
        return None

    cls = int(target[1])
    if cls not in face_3d_classes and cls not in complete_3d_classes:
        return None

    depth_scale = calib['depth_scale']

    result = {
        'should_draw': True,
        'cls': cls,
        'corners_3d': None,
        'face_center_2d': None,
        'face_color': None,
        'center_2d': None,
    }

    if cls in face_3d_classes:
        if len(target) < 48:
            return None

        face_offsets = [16, 24, 32, 40]
        # BGR: front=red, rear=blue, left=green, right=yellow
        face_colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (0, 255, 255)]

        best_face_type = -1
        best_score = -1.0
        best_face_data = None

        for face_type, face_offset in enumerate(face_offsets):
            if target.shape[0] < face_offset + 8:
                continue
            face_data = target[face_offset:face_offset + 8]
            _, _, z3d_face, _, xc_face, yc_face, score, is_visible = face_data

            if is_visible == -1 or np.isnan(is_visible) or is_visible != 1:
                continue
            if np.isnan(score) or score < 0.3:
                continue
            if np.isnan(xc_face) or np.isnan(yc_face) or np.isnan(z3d_face) or z3d_face <= 0:
                continue

            if score > best_score:
                best_score = score
                best_face_type = face_type
                best_face_data = face_data

        if best_face_type != -1 and best_face_data is not None:
            xc_face = best_face_data[4]
            yc_face = best_face_data[5]
            z3d_face = best_face_data[2] * depth_scale

            u_face = xc_face * img_width
            v_face = yc_face * img_height
            dims = target[9:12]
            rot_y = target[12]

            result_face = _reconstruct_3d_box_from_face(
                (u_face, v_face), z3d_face, dims, rot_y, best_face_type, calib
            )
            if result_face is not None:
                corners_3d, object_3d = result_face
                result['corners_3d'] = corners_3d
                result['face_center_2d'] = (u_face, v_face)
                result['face_color'] = face_colors[best_face_type]
                result['object_3d'] = object_3d

    elif cls in complete_3d_classes:
        x3d, y3d, z3d = target[6:9]
        dimensions = target[9:12]
        rot_y = target[12]
        xc_norm, yc_norm = target[13:15]

        z3d = z3d * depth_scale

        if np.isnan(z3d) or z3d <= 0 or np.any(np.isnan(dimensions)):
            return None
        if np.isnan(x3d) or np.isnan(y3d):
            return None

        corners_3d = compute_3d_box_corners_4face(
            np.array([x3d, y3d, z3d]), dimensions, rot_y, face_type=-1
        )
        result['corners_3d'] = corners_3d
        result['center_2d'] = (xc_norm * img_width, yc_norm * img_height)
        result['object_3d'] = [
            x3d, y3d, z3d,
            dimensions[0], dimensions[1], dimensions[2],
            rot_y, -1,
        ]

    return result


# ---------------------------------------------------------------------------
# Drawing helpers
# ---------------------------------------------------------------------------

def draw_3d_box_from_corners(im, corners_3d, calib, img_shape,
                              face_center_2d=None, face_color=None, thickness=1):
    """Project and draw a 3D box given raw corner coordinates.

    Args:
        im (np.ndarray): BGR image array (H, W, 3).
        corners_3d (np.ndarray): (8, 3) corners in camera frame
            (output of :func:`compute_3d_box_corners_4face`).
        calib (dict | None): Camera calibration dict.
        img_shape (tuple[int, int]): ``(width, height)`` of *im*.
        face_center_2d (tuple[float, float] | None): Pixel coords to mark with a dot.
        face_color (tuple | None): BGR colour for the face-centre dot.
        thickness (int): Line thickness in pixels.

    Returns:
        np.ndarray: Modified image.
    """
    w, h = img_shape

    # compute_3d_box_corners_4face places rear at indices 0-3 and front at 4-7.
    # plot_box3d_on_img expects front at indices 0-3 (drawn red) and rear at 4-7.
    corners_3d = corners_3d[[4, 5, 6, 7, 0, 1, 2, 3]]

    color_front = (0, 0, 255)   # Red  (BGR)
    color_back  = (255, 0, 0)   # Blue (BGR)
    color_side  = (255, 255, 0) # Cyan (BGR)

    if calib and calib.get('distort_coeffs'):
        edge_pts = project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=15)
        im = plot_box3d_on_img_with_distortion(
            im, edge_pts, color_front, color_back, color_side, thickness=thickness
        )
    else:
        corners_2d = (project_3d_to_2d_with_calib(corners_3d, calib)
                      if calib is not None
                      else project_3d_to_2d_simple(corners_3d, (w, h)))

        if not np.any(np.isnan(corners_2d)):
            im = plot_box3d_on_img(im, corners_2d, color_front, color_back, color_side, thickness=thickness)

    if face_center_2d is not None and face_color is not None:
        cv2.circle(im, (int(face_center_2d[0]), int(face_center_2d[1])), 2, face_color, -1, cv2.LINE_AA)

    return im


def plot_3d_boxes_from_decoded_targets(im, decoded_results, paths, calib=None, names=None,
                                       label_text=None, scale_factor=2):
    """Render 3D boxes from pre-decoded ground-truth targets onto *im*.

    Args:
        im (torch.Tensor): Batch tensor (N, 3, H, W) normalised [0, 1] in BGR
            format (as returned by the 3D dataloader).
        decoded_results (list[list[dict]]): ``decoded_results[i]`` holds a list of
            result dicts for image *i*, each produced by
            :func:`decode_and_reconstruct_3d_box_from_target`.
        paths (list[str]): Image file paths (informational only).
        calib (dict | list[dict] | None): Camera calibration dict (or first element
            of a list) used for 3D→2D projection.
        names (dict | None): Mapping from class id to class name (unused here but
            kept for API symmetry).
        label_text (str | None): Text to overlay in the top-left corner, e.g.
            ``"3D GT"``.
        scale_factor (int): Upscale factor applied to the output image.

    Returns:
        np.ndarray | None: (H*scale_factor, W*scale_factor, 3) RGB image with 3D
        boxes drawn, or ``None`` if the input is empty.
    """
    if im.ndim == 3:
        im = im.unsqueeze(0)

    im_np = im[0].cpu().numpy().transpose(1, 2, 0)
    im_np = np.ascontiguousarray(im_np * 255, dtype=np.uint8)
    h_orig, w_orig = im_np.shape[:2]

    h_new = h_orig * scale_factor
    w_new = w_orig * scale_factor
    im_bgr = cv2.resize(im_np, (w_new, h_new), interpolation=cv2.INTER_LINEAR)

    if isinstance(calib, list):
        calib = calib[0] if calib else None

    calib_scaled = None
    if calib is not None:
        calib_scaled = {
            'fx': calib['fx'] * scale_factor,
            'fy': calib['fy'] * scale_factor,
            'cx': calib['cx'] * scale_factor,
            'cy': calib['cy'] * scale_factor,
            'distort_coeffs': calib.get('distort_coeffs', []),
        }

    for decoded in decoded_results[0]:
        if not decoded or not decoded['should_draw'] or decoded['corners_3d'] is None:
            continue

        cls = decoded['cls']

        if cls in range(9):  # vehicles (car/suv/pickup/...special_vehicle/unknown) — face-based
            face_center_2d = decoded.get('face_center_2d')
            if face_center_2d is not None:
                face_center_2d = (
                    face_center_2d[0] * scale_factor,
                    face_center_2d[1] * scale_factor,
                )
            im_bgr = draw_3d_box_from_corners(
                im_bgr,
                decoded['corners_3d'],
                calib_scaled,
                (w_new, h_new),
                face_center_2d=face_center_2d,
                face_color=decoded.get('face_color'),
            )

        elif cls in (9, 10, 11, 12):  # pedestrian / bicyclists / bicycles / tricycles — complete box
            im_bgr = draw_3d_box_from_corners(
                im_bgr,
                decoded['corners_3d'],
                calib_scaled,
                (w_new, h_new),
                thickness=2,
            )
            if decoded.get('center_2d'):
                color = colors(cls)
                u, v = decoded['center_2d']
                u_s, v_s = int(u * scale_factor), int(v * scale_factor)
                cv2.circle(im_bgr, (u_s, v_s), 4, color, -1, cv2.LINE_AA)
                cv2.circle(im_bgr, (u_s, v_s), 6, (255, 255, 255), 2, cv2.LINE_AA)

    if label_text:
        cv2.putText(im_bgr, label_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX,
                    1.5, (0, 255, 255), 3, cv2.LINE_AA)

    return cv2.cvtColor(im_bgr, cv2.COLOR_BGR2RGB)


# ---------------------------------------------------------------------------
# Bird's-Eye View (BEV) visualization
# ---------------------------------------------------------------------------

def draw_bev_blank(max_range=200):
    """Create a blank BEV image with distance and lateral grid lines.

    Args:
        max_range (int): Maximum forward range in metres (default 200).

    Returns:
        np.ndarray: (H, W, 3) BGR image with grid and ego-vehicle box drawn.
    """
    pixels_per_meter = 20
    img_height = max_range * pixels_per_meter   # e.g. 4000 px for 200 m
    img_width  = 100 * pixels_per_meter         # lateral range -50 m … +50 m

    bevimg = np.ones((img_height, img_width, 3), dtype=np.uint8) * 255
    ego_center_x = img_width  // 2
    ego_center_y = img_height  # ego position at bottom-centre

    ego_half_l = int(4.5 * pixels_per_meter) // 2
    ego_half_w = int(1.8 * pixels_per_meter) // 2
    ego_box = np.array([
        [ego_center_x - ego_half_w, ego_center_y - ego_half_l],
        [ego_center_x + ego_half_w, ego_center_y - ego_half_l],
        [ego_center_x + ego_half_w, ego_center_y + ego_half_l],
        [ego_center_x - ego_half_w, ego_center_y + ego_half_l],
    ], dtype=np.int32)

    # Horizontal (range) grid
    grid_step_px = 20 * pixels_per_meter  # every 20 m
    for i in range(max_range // 20 + 1):
        y_pos = ego_center_y - i * grid_step_px
        if y_pos >= 0:
            cv2.line(bevimg, (0, y_pos), (img_width, y_pos), (180, 180, 180), 3, cv2.LINE_AA)
            cv2.putText(bevimg, f"{i * 20}m", (ego_center_x + 15, y_pos - 15),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.2, (80, 80, 80), 2, cv2.LINE_AA)

    # Vertical (lateral) grid
    lat_step_px = 10 * pixels_per_meter  # every 10 m
    for i in range(-5, 6):
        x_pos = ego_center_x + i * lat_step_px
        if 0 <= x_pos < img_width:
            cv2.line(bevimg, (x_pos, 0), (x_pos, img_height), (180, 180, 180), 3, cv2.LINE_AA)
            if i != 0:
                cv2.putText(bevimg, f"{i * 10}m", (x_pos - 40, img_height - 20),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.0, (80, 80, 80), 2, cv2.LINE_AA)

    cv2.drawContours(bevimg, [ego_box], 0, (255, 0, 0), -1)  # blue-filled ego box
    return bevimg


def cam_corners_front_rear(pred3d, facetype):
    """Compute 8 box corners from a face-centre 3D representation.

    Args:
        pred3d (array-like): ``[x, y, z, l, h, w, rot_y]`` (7 values).
        facetype (int | str): Face type identifier — ``0``/``'front'``,
            ``1``/``'tail'``, ``2``/``'left'``, ``3``/``'right'``,
            ``-1``/``'whole'``.

    Returns:
        np.ndarray: (8, 3) 3D corner coordinates in camera frame.
    """
    dims = pred3d[3:6]
    corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)
    corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]].astype(float)

    if facetype in ('tail', 1):
        corners_norm -= [0, 0.5, 0.5]
    elif facetype in ('front', 0):
        corners_norm -= [1, 0.5, 0.5]
    elif facetype in ('right', 3):
        corners_norm -= [0.5, 0.5, 0]
    elif facetype in ('left', 2):
        corners_norm -= [0.5, 0.5, 1]
    elif facetype in ('whole', -1):
        corners_norm -= [0.5, 0.5, 0.5]
    else:
        raise AssertionError(f"Non-valid face type: {facetype}")

    corners = dims.reshape(1, 3) * corners_norm.reshape(8, 3)
    corners = rotation_3d_in_axis(corners, pred3d[6], axis=1)
    corners += pred3d[:3].reshape(1, 3)
    return corners


def drawbev(bevimg, vehicle3d, is_pred=True):
    """Draw a single vehicle box on a BEV image.

    Args:
        bevimg (np.ndarray): BEV image produced by :func:`draw_bev_blank`.
        vehicle3d (list | np.ndarray): ``[x, y, z, l, h, w, ..., rot_y, face_type]``.
            The second-to-last element is ``rot_y`` and the last is ``face_type``.
        is_pred (bool): ``True`` → red box (prediction); ``False`` → green box (GT).

    Returns:
        np.ndarray: Modified BEV image.
    """
    x, y, z = vehicle3d[0], vehicle3d[1], vehicle3d[2]
    l, h, w = vehicle3d[3], vehicle3d[4], vehicle3d[5]
    rotation_y = vehicle3d[-2]
    face_type  = vehicle3d[-1]

    pixels_per_meter = 20
    max_range    = 200
    lateral_range = 50

    img_height   = bevimg.shape[0]
    img_width    = bevimg.shape[1]
    ego_center_x = img_width  // 2
    ego_center_y = img_height

    if x > lateral_range or x < -lateral_range or z > max_range or z < 0:
        return bevimg

    corners = cam_corners_front_rear(np.array([x, y, z, l, h, w, rotation_y]), face_type)
    xyz3d_front  = np.mean(corners[4:8, :], axis=0)
    xyz3d_center = np.mean(corners[0:8, :], axis=0)

    center = (
        int(ego_center_x + xyz3d_center[0] * pixels_per_meter),
        int(ego_center_y - xyz3d_center[2] * pixels_per_meter),
    )
    front_point = (
        int(ego_center_x + xyz3d_front[0] * pixels_per_meter),
        int(ego_center_y - xyz3d_front[2] * pixels_per_meter),
    )

    rect = (center, (l * pixels_per_meter, w * pixels_per_meter), np.degrees(rotation_y))
    box  = np.intp(cv2.boxPoints(rect))

    color = (0, 0, 255) if is_pred else (0, 255, 0)
    cv2.drawContours(bevimg, [box], 0, color, 3, cv2.LINE_AA)
    cv2.arrowedLine(bevimg, center, front_point, color, thickness=3, tipLength=0.3, line_type=cv2.LINE_AA)

    return bevimg