Files
2026-06-24 09:35:46 +08:00

850 lines
30 KiB
Python
Executable File

"""3D visualization plotting utilities.
Self-contained module extracted from yolov5-3d/utils/plots.py.
Contains only the functions required by visualize_single_frame.py and
visualize_batch.py for ground-truth 3D visualization, with no cross-repo
dependencies.
"""
import cv2
import numpy as np
import torch
from ultralytics.utils.plotting import Annotator # noqa: F401 (re-exported)
# ---------------------------------------------------------------------------
# Color helpers
# ---------------------------------------------------------------------------
class Colors:
"""Provides an RGB color palette derived from Ultralytics color scheme."""
def __init__(self):
hexs = (
"FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231",
"48F90A", "92CC17", "3DDB86", "1A9334", "00D4BB",
"2C99A8", "00C2FF", "344593", "6473FF", "0018EC",
"8438FF", "520085", "CB38FF", "FF95C8", "FF37C7",
)
self.palette = [self.hex2rgb(f"#{c}") for c in hexs]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
"""Return colour for index ``i`` (BGR if ``bgr=True``, else RGB)."""
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h):
"""Convert hex colour string to (R, G, B) tuple."""
return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
colors = Colors() # module-level singleton; callers: from plots_3d import colors
# ---------------------------------------------------------------------------
# 3D geometry helpers
# ---------------------------------------------------------------------------
def rotation_3d_in_axis(points, angles, axis=0):
"""Rotate *points* around a specified camera-frame axis.
Args:
points (np.ndarray): (N, 3) array of 3D points.
angles (float): Rotation angle in radians.
axis (int): 0=X, 1=Y, 2=Z.
Returns:
np.ndarray: Rotated points (N, 3).
"""
rot_sin = np.sin(angles)
rot_cos = np.cos(angles)
ones = np.ones_like(rot_cos)
zeros = np.zeros_like(rot_cos)
if axis == 1:
rot_mat = np.stack([
np.stack([rot_cos, zeros, -rot_sin]),
np.stack([zeros, ones, zeros]),
np.stack([rot_sin, zeros, rot_cos]),
])
elif axis == 2:
rot_mat = np.stack([
np.stack([rot_cos, rot_sin, zeros]),
np.stack([-rot_sin, rot_cos, zeros]),
np.stack([zeros, zeros, ones]),
])
elif axis == 0:
rot_mat = np.stack([
np.stack([ones, zeros, zeros]),
np.stack([zeros, rot_cos, rot_sin]),
np.stack([zeros, -rot_sin, rot_cos]),
])
else:
raise ValueError(f"axis must be in {{0, 1, 2}}, got {axis}")
return np.dot(points, rot_mat)
def compute_3d_box_corners_4face(center_3d, dimensions, rotation, face_type=0):
"""Compute the 8 corners of a 3D bounding box from a face-center point.
Args:
center_3d (array-like): (x, y, z) centre of the specified face in camera coords.
dimensions (array-like): (length, height, width) of the box in metres.
rotation (float): rot_y — rotation about the Y axis in radians.
face_type (int): 0=front, 1=tail/rear, 2=left, 3=right, -1=box centre.
Returns:
np.ndarray: (8, 3) corner coordinates in camera frame.
"""
l, h, w = dimensions
corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1).astype(np.float64)
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
offsets = {
1: [0, 0.5, 0.5], # tail
0: [1, 0.5, 0.5], # front
3: [0.5, 0.5, 0], # right
2: [0.5, 0.5, 1], # left
-1: [0.5, 0.5, 0.5], # whole centre
}
corners_norm = corners_norm - offsets.get(face_type, offsets[-1])
corners = np.array([l, h, w]).reshape(1, 3) * corners_norm.reshape(8, 3)
corners = rotation_3d_in_axis(corners, rotation, axis=1)
corners += np.array(center_3d).reshape(1, 3)
return corners
# ---------------------------------------------------------------------------
# Fisheye (KB) distortion helpers
# ---------------------------------------------------------------------------
def apply_fisheye_distortion(x, y, distort_coeffs):
"""Apply Kannala-Brandt fisheye distortion to normalised camera coordinates.
Args:
x (float): Normalised x coordinate (x3d / z3d).
y (float): Normalised y coordinate (y3d / z3d).
distort_coeffs (list): [k1, k2, k3, k4] KB coefficients.
Returns:
tuple[float, float]: Distorted normalised coordinates (xd, yd).
"""
if not distort_coeffs or len(distort_coeffs) < 4:
return x, y
k1, k2, k3, k4 = distort_coeffs[:4]
r = np.sqrt(x * x + y * y)
if r < 1e-8:
return x, y
theta = np.arctan(r)
theta_d = theta * (1 + k1 * theta**2 + k2 * theta**4 + k3 * theta**6 + k4 * theta**8)
scale = theta_d / r
return x * scale, y * scale
def remove_fisheye_distortion(xd, yd, distort_coeffs, max_iter=20):
"""Remove KB fisheye distortion from normalised camera coordinates.
Args:
xd (float): Distorted normalised x coordinate.
yd (float): Distorted normalised y coordinate.
distort_coeffs (list): [k1, k2, k3, k4] KB coefficients.
max_iter (int): Maximum Newton-Raphson iterations.
Returns:
tuple[float, float]: Undistorted normalised coordinates (xn, yn).
"""
if not distort_coeffs or len(distort_coeffs) < 4:
return xd, yd
k1, k2, k3, k4 = distort_coeffs[:4]
r_d = np.sqrt(xd * xd + yd * yd)
if r_d < 1e-8:
return xd, yd
theta_d = r_d
theta = theta_d / (1 + k1 * theta_d * theta_d)
for _ in range(max_iter):
theta2 = theta * theta
theta4 = theta2 * theta2
theta6 = theta4 * theta2
theta8 = theta4 * theta4
f = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8) - theta_d
f_prime = 1 + 3 * k1 * theta2 + 5 * k2 * theta4 + 7 * k3 * theta6 + 9 * k4 * theta8
theta_new = theta - f / f_prime
if abs(theta_new - theta) < 1e-8:
theta = theta_new
break
theta = theta_new
r = np.tan(theta)
scale = r / r_d
return xd * scale, yd * scale
# ---------------------------------------------------------------------------
# 3D-to-2D projection helpers
# ---------------------------------------------------------------------------
def project_3d_to_2d_with_distortion(points_3d, calib):
"""Project 3D points to 2D using KB fisheye camera calibration.
Args:
points_3d (np.ndarray): (N, 3) points in camera coordinates.
calib (dict): Camera parameters ``fx``, ``fy``, ``cx``, ``cy``,
and optional ``distort_coeffs``.
Returns:
np.ndarray: (N, 2) image coordinates (NaN for behind-camera points).
"""
fx, fy = calib['fx'], calib['fy']
cx, cy = calib['cx'], calib['cy']
distort_coeffs = calib.get('distort_coeffs', [])
points_2d = []
for x, y, z in points_3d:
if z > 0.1:
xn, yn = x / z, y / z
xd, yd = apply_fisheye_distortion(xn, yn, distort_coeffs)
points_2d.append([fx * xd + cx, fy * yd + cy])
else:
points_2d.append([np.nan, np.nan])
return np.array(points_2d)
def sample_3d_edge(p1, p2, num_samples=10):
"""Uniformly sample *num_samples* points along the 3D edge from *p1* to *p2*.
Args:
p1 (array-like): Start point (x, y, z).
p2 (array-like): End point (x, y, z).
num_samples (int): Number of sample points.
Returns:
np.ndarray: (num_samples, 3) sampled 3D points.
"""
t = np.linspace(0, 1, num_samples).reshape(-1, 1)
return p1 + t * (p2 - p1)
def project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=10):
"""Project 3D box edges to 2D by sampling, handling fisheye distortion.
Args:
corners_3d (np.ndarray): (8, 3) 3D corner coordinates.
calib (dict): Camera calibration dict.
samples_per_edge (int): Number of samples per edge.
Returns:
dict: Mapping edge_name → (N, 2) 2D projected points.
"""
edges = {
'back_0': (4, 5), 'back_1': (5, 6), 'back_2': (6, 7), 'back_3': (7, 4),
'connect_0': (0, 4), 'connect_1': (1, 5), 'connect_2': (2, 6), 'connect_3': (3, 7),
'front_0': (0, 1), 'front_1': (1, 2), 'front_2': (2, 3), 'front_3': (3, 0),
'front_x1': (0, 2), 'front_x2': (1, 3),
}
return {
name: project_3d_to_2d_with_distortion(
sample_3d_edge(corners_3d[i], corners_3d[j], samples_per_edge), calib
)
for name, (i, j) in edges.items()
}
def plot_box3d_on_img_with_distortion(img, edge_points_2d,
color_front=(255, 0, 0),
color_back=(0, 0, 255),
color_side=(0, 255, 255),
thickness=1):
"""Draw a 3D box on *img* using pre-projected edge point lists (fisheye-aware).
Args:
img (np.ndarray): BGR image to draw on.
edge_points_2d (dict): Output of :func:`project_3d_box_edges_with_distortion`.
color_front (tuple): BGR colour for front-face edges.
color_back (tuple): BGR colour for back-face edges.
color_side (tuple): BGR colour for side connecting edges.
thickness (int): Line thickness in pixels.
Returns:
np.ndarray: Modified image.
"""
front_edges = {'front_0', 'front_1', 'front_2', 'front_3', 'front_x1', 'front_x2'}
back_edges = {'back_0', 'back_1', 'back_2', 'back_3'}
for edge_name, points in edge_points_2d.items():
if np.any(np.isnan(points)):
continue
pts = points.astype(np.int32)
if edge_name in front_edges:
color = color_front
elif edge_name in back_edges:
color = color_back
else:
color = color_side
cv2.polylines(img, [pts], isClosed=False, color=color, thickness=thickness, lineType=cv2.LINE_AA)
return img
def project_3d_to_2d_with_calib(points_3d, calib):
"""Project 3D points to 2D using pinhole calibration (no distortion).
Args:
points_3d (np.ndarray): (N, 3) points in camera coordinates.
calib (dict): Camera parameters ``fx``, ``fy``, ``cx``, ``cy``.
Returns:
np.ndarray: (N, 2) image coordinates (NaN for behind-camera points).
"""
fx, fy = calib['fx'], calib['fy']
cx, cy = calib['cx'], calib['cy']
points_2d = []
for x, y, z in points_3d:
if z > 0.1:
points_2d.append([fx * x / z + cx, fy * y / z + cy])
else:
points_2d.append([np.nan, np.nan])
return np.array(points_2d)
def project_3d_to_2d_simple(points_3d, img_size):
"""Project 3D points to 2D using a simple estimated pinhole model.
Args:
points_3d (np.ndarray): (N, 3) points in camera coordinates.
img_size (tuple[int, int]): ``(width, height)`` of the image.
Returns:
np.ndarray: (N, 2) image coordinates.
"""
w, h = img_size
fx = fy = w * 1.2
cx, cy = w / 2, h / 2
points_2d = []
for x, y, z in points_3d:
if z > 0.1:
points_2d.append([fx * x / z + cx, fy * y / z + cy])
else:
points_2d.append([np.nan, np.nan])
return np.array(points_2d)
def plot_box3d_on_img(img, corners_2d,
color_front=(255, 0, 0),
color_back=(0, 0, 255),
color_side=(0, 255, 255),
thickness=1):
"""Draw a 3D bounding box on *img* from 2D projected corners.
Args:
img (np.ndarray): BGR image to draw on.
corners_2d (np.ndarray): (8, 2) projected corner coordinates.
color_front (tuple): BGR colour for front-face edges (indices 0-3).
color_back (tuple): BGR colour for back-face edges (indices 4-7).
color_side (tuple): BGR colour for connecting side edges.
thickness (int): Line thickness in pixels.
Returns:
np.ndarray: Modified image.
"""
line_indices = (
(4, 5), (5, 6), (6, 7), (7, 4), # back face
(0, 4), (1, 5), (2, 6), (3, 7), # side edges
(0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3), # front face + X mark
)
front_edges = {(0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3)}
back_edges = {(4, 5), (5, 6), (6, 7), (7, 4)}
corners = corners_2d.astype(np.int32)
for start, end in line_indices:
try:
pt1 = (corners[start, 0], corners[start, 1])
pt2 = (corners[end, 0], corners[end, 1])
if (start, end) in front_edges:
cv2.line(img, pt1, pt2, color_front, thickness, cv2.LINE_AA)
elif (start, end) in back_edges:
cv2.line(img, pt1, pt2, color_back, thickness, cv2.LINE_AA)
else:
cv2.line(img, pt1, pt2, color_side, thickness, cv2.LINE_AA)
except Exception:
pass
return img
# ---------------------------------------------------------------------------
# 3D box reconstruction from target label format
# ---------------------------------------------------------------------------
def _reconstruct_3d_box_from_face(face_uv, face_z, dims, rot_y, face_type, calib):
"""Reconstruct 3D box corners from a visible face centre.
Args:
face_uv (tuple[float, float]): Pixel coordinates (u, v) of the face centre.
face_z (float): Depth of the face centre in metres.
dims (array-like): (length, height, width) in metres.
rot_y (float): Yaw rotation in radians.
face_type (int): 0=front, 1=rear, 2=left, 3=right.
calib (dict): Camera calibration dict.
Returns:
tuple[np.ndarray, list] | None: ``(corners_3d, object_3d)`` or ``None`` on failure.
"""
if calib is None:
return None
fx, fy = calib['fx'], calib['fy']
cx, cy = calib['cx'], calib['cy']
distort_coeffs = calib.get('distort_coeffs', [])
u_face, v_face = face_uv
xd = (u_face - cx) / fx
yd = (v_face - cy) / fy
if distort_coeffs and len(distort_coeffs) >= 4:
xn, yn = remove_fisheye_distortion(xd, yd, distort_coeffs)
else:
xn, yn = xd, yd
l, h, w = dims
if np.isnan(l) or np.isnan(h) or np.isnan(w) or np.isnan(rot_y):
return None
face_center_3d = np.array([xn * face_z, yn * face_z, face_z])
corners_3d = compute_3d_box_corners_4face(face_center_3d, dims, rot_y, face_type=face_type)
object_3d = [face_center_3d[0], face_center_3d[1], face_center_3d[2], l, h, w, rot_y, face_type]
return corners_3d, object_3d
def decode_and_reconstruct_3d_box_from_target(target, calib, img_width, img_height,
face_3d_classes=None, complete_3d_classes=None):
"""Decode a ground-truth target vector and reconstruct its 3D box.
The target array follows the 48-column format used in ``YOLOGround3DDataset``:
* col 0 — image index
* col 1 — class id
* cols 2-5 — normalised 2D bbox [x, y, w, h]
* cols 6-8 — 3D centre [x3d, y3d, z3d] in camera coords
* cols 9-11 — dimensions [l, h, w]
* col 12 — rot_y
* cols 13-14 — normalised UV projection of 3D centre
* col 15 — alpha
* cols 16-23 — front face [x3d, y3d, z3d, alpha, xc, yc, score, visible]
* cols 24-31 — rear face (same layout)
* cols 32-39 — left face
* cols 40-47 — right face
Args:
target (np.ndarray): (48,) label vector.
calib (dict): Camera calibration dict; must contain ``depth_scale``.
img_width (int): Image width in pixels.
img_height (int): Image height in pixels.
face_3d_classes (list | None): Class IDs using face-based reconstruction.
Defaults to ``[0, 13]`` (vehicles, tricycles).
complete_3d_classes (list | None): Class IDs using complete-box
reconstruction. Defaults to ``[1, 2, 3]`` (pedestrians, bicycles,
riders).
Returns:
dict | None: Result dict with keys ``should_draw``, ``cls``,
``corners_3d``, ``face_center_2d``, ``face_color``, ``center_2d``,
``object_3d``; or ``None`` if the target is invalid / unsupported.
"""
if face_3d_classes is None:
face_3d_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8] # vehicles
if complete_3d_classes is None:
complete_3d_classes = [9, 10, 11, 12] # pedestrian / cyclists
if len(target) < 15 or np.isnan(target[1]):
return None
cls = int(target[1])
if cls not in face_3d_classes and cls not in complete_3d_classes:
return None
depth_scale = calib['depth_scale']
result = {
'should_draw': True,
'cls': cls,
'corners_3d': None,
'face_center_2d': None,
'face_color': None,
'center_2d': None,
}
if cls in face_3d_classes:
if len(target) < 48:
return None
face_offsets = [16, 24, 32, 40]
# BGR: front=red, rear=blue, left=green, right=yellow
face_colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (0, 255, 255)]
best_face_type = -1
best_score = -1.0
best_face_data = None
for face_type, face_offset in enumerate(face_offsets):
if target.shape[0] < face_offset + 8:
continue
face_data = target[face_offset:face_offset + 8]
_, _, z3d_face, _, xc_face, yc_face, score, is_visible = face_data
if is_visible == -1 or np.isnan(is_visible) or is_visible != 1:
continue
if np.isnan(score) or score < 0.3:
continue
if np.isnan(xc_face) or np.isnan(yc_face) or np.isnan(z3d_face) or z3d_face <= 0:
continue
if score > best_score:
best_score = score
best_face_type = face_type
best_face_data = face_data
if best_face_type != -1 and best_face_data is not None:
xc_face = best_face_data[4]
yc_face = best_face_data[5]
z3d_face = best_face_data[2] * depth_scale
u_face = xc_face * img_width
v_face = yc_face * img_height
dims = target[9:12]
rot_y = target[12]
result_face = _reconstruct_3d_box_from_face(
(u_face, v_face), z3d_face, dims, rot_y, best_face_type, calib
)
if result_face is not None:
corners_3d, object_3d = result_face
result['corners_3d'] = corners_3d
result['face_center_2d'] = (u_face, v_face)
result['face_color'] = face_colors[best_face_type]
result['object_3d'] = object_3d
elif cls in complete_3d_classes:
x3d, y3d, z3d = target[6:9]
dimensions = target[9:12]
rot_y = target[12]
xc_norm, yc_norm = target[13:15]
z3d = z3d * depth_scale
if np.isnan(z3d) or z3d <= 0 or np.any(np.isnan(dimensions)):
return None
if np.isnan(x3d) or np.isnan(y3d):
return None
corners_3d = compute_3d_box_corners_4face(
np.array([x3d, y3d, z3d]), dimensions, rot_y, face_type=-1
)
result['corners_3d'] = corners_3d
result['center_2d'] = (xc_norm * img_width, yc_norm * img_height)
result['object_3d'] = [
x3d, y3d, z3d,
dimensions[0], dimensions[1], dimensions[2],
rot_y, -1,
]
return result
# ---------------------------------------------------------------------------
# Drawing helpers
# ---------------------------------------------------------------------------
def draw_3d_box_from_corners(im, corners_3d, calib, img_shape,
face_center_2d=None, face_color=None, thickness=1):
"""Project and draw a 3D box given raw corner coordinates.
Args:
im (np.ndarray): BGR image array (H, W, 3).
corners_3d (np.ndarray): (8, 3) corners in camera frame
(output of :func:`compute_3d_box_corners_4face`).
calib (dict | None): Camera calibration dict.
img_shape (tuple[int, int]): ``(width, height)`` of *im*.
face_center_2d (tuple[float, float] | None): Pixel coords to mark with a dot.
face_color (tuple | None): BGR colour for the face-centre dot.
thickness (int): Line thickness in pixels.
Returns:
np.ndarray: Modified image.
"""
w, h = img_shape
# compute_3d_box_corners_4face places rear at indices 0-3 and front at 4-7.
# plot_box3d_on_img expects front at indices 0-3 (drawn red) and rear at 4-7.
corners_3d = corners_3d[[4, 5, 6, 7, 0, 1, 2, 3]]
color_front = (0, 0, 255) # Red (BGR)
color_back = (255, 0, 0) # Blue (BGR)
color_side = (255, 255, 0) # Cyan (BGR)
if calib and calib.get('distort_coeffs'):
edge_pts = project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=15)
im = plot_box3d_on_img_with_distortion(
im, edge_pts, color_front, color_back, color_side, thickness=thickness
)
else:
corners_2d = (project_3d_to_2d_with_calib(corners_3d, calib)
if calib is not None
else project_3d_to_2d_simple(corners_3d, (w, h)))
if not np.any(np.isnan(corners_2d)):
im = plot_box3d_on_img(im, corners_2d, color_front, color_back, color_side, thickness=thickness)
if face_center_2d is not None and face_color is not None:
cv2.circle(im, (int(face_center_2d[0]), int(face_center_2d[1])), 2, face_color, -1, cv2.LINE_AA)
return im
def plot_3d_boxes_from_decoded_targets(im, decoded_results, paths, calib=None, names=None,
label_text=None, scale_factor=2):
"""Render 3D boxes from pre-decoded ground-truth targets onto *im*.
Args:
im (torch.Tensor): Batch tensor (N, 3, H, W) normalised [0, 1] in BGR
format (as returned by the 3D dataloader).
decoded_results (list[list[dict]]): ``decoded_results[i]`` holds a list of
result dicts for image *i*, each produced by
:func:`decode_and_reconstruct_3d_box_from_target`.
paths (list[str]): Image file paths (informational only).
calib (dict | list[dict] | None): Camera calibration dict (or first element
of a list) used for 3D→2D projection.
names (dict | None): Mapping from class id to class name (unused here but
kept for API symmetry).
label_text (str | None): Text to overlay in the top-left corner, e.g.
``"3D GT"``.
scale_factor (int): Upscale factor applied to the output image.
Returns:
np.ndarray | None: (H*scale_factor, W*scale_factor, 3) RGB image with 3D
boxes drawn, or ``None`` if the input is empty.
"""
if im.ndim == 3:
im = im.unsqueeze(0)
im_np = im[0].cpu().numpy().transpose(1, 2, 0)
im_np = np.ascontiguousarray(im_np * 255, dtype=np.uint8)
h_orig, w_orig = im_np.shape[:2]
h_new = h_orig * scale_factor
w_new = w_orig * scale_factor
im_bgr = cv2.resize(im_np, (w_new, h_new), interpolation=cv2.INTER_LINEAR)
if isinstance(calib, list):
calib = calib[0] if calib else None
calib_scaled = None
if calib is not None:
calib_scaled = {
'fx': calib['fx'] * scale_factor,
'fy': calib['fy'] * scale_factor,
'cx': calib['cx'] * scale_factor,
'cy': calib['cy'] * scale_factor,
'distort_coeffs': calib.get('distort_coeffs', []),
}
for decoded in decoded_results[0]:
if not decoded or not decoded['should_draw'] or decoded['corners_3d'] is None:
continue
cls = decoded['cls']
if cls in range(9): # vehicles (car/suv/pickup/...special_vehicle/unknown) — face-based
face_center_2d = decoded.get('face_center_2d')
if face_center_2d is not None:
face_center_2d = (
face_center_2d[0] * scale_factor,
face_center_2d[1] * scale_factor,
)
im_bgr = draw_3d_box_from_corners(
im_bgr,
decoded['corners_3d'],
calib_scaled,
(w_new, h_new),
face_center_2d=face_center_2d,
face_color=decoded.get('face_color'),
)
elif cls in (9, 10, 11, 12): # pedestrian / bicyclists / bicycles / tricycles — complete box
im_bgr = draw_3d_box_from_corners(
im_bgr,
decoded['corners_3d'],
calib_scaled,
(w_new, h_new),
thickness=2,
)
if decoded.get('center_2d'):
color = colors(cls)
u, v = decoded['center_2d']
u_s, v_s = int(u * scale_factor), int(v * scale_factor)
cv2.circle(im_bgr, (u_s, v_s), 4, color, -1, cv2.LINE_AA)
cv2.circle(im_bgr, (u_s, v_s), 6, (255, 255, 255), 2, cv2.LINE_AA)
if label_text:
cv2.putText(im_bgr, label_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX,
1.5, (0, 255, 255), 3, cv2.LINE_AA)
return cv2.cvtColor(im_bgr, cv2.COLOR_BGR2RGB)
# ---------------------------------------------------------------------------
# Bird's-Eye View (BEV) visualization
# ---------------------------------------------------------------------------
def draw_bev_blank(max_range=200):
"""Create a blank BEV image with distance and lateral grid lines.
Args:
max_range (int): Maximum forward range in metres (default 200).
Returns:
np.ndarray: (H, W, 3) BGR image with grid and ego-vehicle box drawn.
"""
pixels_per_meter = 20
img_height = max_range * pixels_per_meter # e.g. 4000 px for 200 m
img_width = 100 * pixels_per_meter # lateral range -50 m … +50 m
bevimg = np.ones((img_height, img_width, 3), dtype=np.uint8) * 255
ego_center_x = img_width // 2
ego_center_y = img_height # ego position at bottom-centre
ego_half_l = int(4.5 * pixels_per_meter) // 2
ego_half_w = int(1.8 * pixels_per_meter) // 2
ego_box = np.array([
[ego_center_x - ego_half_w, ego_center_y - ego_half_l],
[ego_center_x + ego_half_w, ego_center_y - ego_half_l],
[ego_center_x + ego_half_w, ego_center_y + ego_half_l],
[ego_center_x - ego_half_w, ego_center_y + ego_half_l],
], dtype=np.int32)
# Horizontal (range) grid
grid_step_px = 20 * pixels_per_meter # every 20 m
for i in range(max_range // 20 + 1):
y_pos = ego_center_y - i * grid_step_px
if y_pos >= 0:
cv2.line(bevimg, (0, y_pos), (img_width, y_pos), (180, 180, 180), 3, cv2.LINE_AA)
cv2.putText(bevimg, f"{i * 20}m", (ego_center_x + 15, y_pos - 15),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (80, 80, 80), 2, cv2.LINE_AA)
# Vertical (lateral) grid
lat_step_px = 10 * pixels_per_meter # every 10 m
for i in range(-5, 6):
x_pos = ego_center_x + i * lat_step_px
if 0 <= x_pos < img_width:
cv2.line(bevimg, (x_pos, 0), (x_pos, img_height), (180, 180, 180), 3, cv2.LINE_AA)
if i != 0:
cv2.putText(bevimg, f"{i * 10}m", (x_pos - 40, img_height - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (80, 80, 80), 2, cv2.LINE_AA)
cv2.drawContours(bevimg, [ego_box], 0, (255, 0, 0), -1) # blue-filled ego box
return bevimg
def cam_corners_front_rear(pred3d, facetype):
"""Compute 8 box corners from a face-centre 3D representation.
Args:
pred3d (array-like): ``[x, y, z, l, h, w, rot_y]`` (7 values).
facetype (int | str): Face type identifier — ``0``/``'front'``,
``1``/``'tail'``, ``2``/``'left'``, ``3``/``'right'``,
``-1``/``'whole'``.
Returns:
np.ndarray: (8, 3) 3D corner coordinates in camera frame.
"""
dims = pred3d[3:6]
corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]].astype(float)
if facetype in ('tail', 1):
corners_norm -= [0, 0.5, 0.5]
elif facetype in ('front', 0):
corners_norm -= [1, 0.5, 0.5]
elif facetype in ('right', 3):
corners_norm -= [0.5, 0.5, 0]
elif facetype in ('left', 2):
corners_norm -= [0.5, 0.5, 1]
elif facetype in ('whole', -1):
corners_norm -= [0.5, 0.5, 0.5]
else:
raise AssertionError(f"Non-valid face type: {facetype}")
corners = dims.reshape(1, 3) * corners_norm.reshape(8, 3)
corners = rotation_3d_in_axis(corners, pred3d[6], axis=1)
corners += pred3d[:3].reshape(1, 3)
return corners
def drawbev(bevimg, vehicle3d, is_pred=True):
"""Draw a single vehicle box on a BEV image.
Args:
bevimg (np.ndarray): BEV image produced by :func:`draw_bev_blank`.
vehicle3d (list | np.ndarray): ``[x, y, z, l, h, w, ..., rot_y, face_type]``.
The second-to-last element is ``rot_y`` and the last is ``face_type``.
is_pred (bool): ``True`` → red box (prediction); ``False`` → green box (GT).
Returns:
np.ndarray: Modified BEV image.
"""
x, y, z = vehicle3d[0], vehicle3d[1], vehicle3d[2]
l, h, w = vehicle3d[3], vehicle3d[4], vehicle3d[5]
rotation_y = vehicle3d[-2]
face_type = vehicle3d[-1]
pixels_per_meter = 20
max_range = 200
lateral_range = 50
img_height = bevimg.shape[0]
img_width = bevimg.shape[1]
ego_center_x = img_width // 2
ego_center_y = img_height
if x > lateral_range or x < -lateral_range or z > max_range or z < 0:
return bevimg
corners = cam_corners_front_rear(np.array([x, y, z, l, h, w, rotation_y]), face_type)
xyz3d_front = np.mean(corners[4:8, :], axis=0)
xyz3d_center = np.mean(corners[0:8, :], axis=0)
center = (
int(ego_center_x + xyz3d_center[0] * pixels_per_meter),
int(ego_center_y - xyz3d_center[2] * pixels_per_meter),
)
front_point = (
int(ego_center_x + xyz3d_front[0] * pixels_per_meter),
int(ego_center_y - xyz3d_front[2] * pixels_per_meter),
)
rect = (center, (l * pixels_per_meter, w * pixels_per_meter), np.degrees(rotation_y))
box = np.intp(cv2.boxPoints(rect))
color = (0, 0, 255) if is_pred else (0, 255, 0)
cv2.drawContours(bevimg, [box], 0, color, 3, cv2.LINE_AA)
cv2.arrowedLine(bevimg, center, front_point, color, thickness=3, tipLength=0.3, line_type=cv2.LINE_AA)
return bevimg