850 lines
30 KiB
Python
Executable File
850 lines
30 KiB
Python
Executable File
"""3D visualization plotting utilities.
|
|
|
|
Self-contained module extracted from yolov5-3d/utils/plots.py.
|
|
Contains only the functions required by visualize_single_frame.py and
|
|
visualize_batch.py for ground-truth 3D visualization, with no cross-repo
|
|
dependencies.
|
|
"""
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import torch
|
|
from ultralytics.utils.plotting import Annotator # noqa: F401 (re-exported)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Color helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class Colors:
|
|
"""Provides an RGB color palette derived from Ultralytics color scheme."""
|
|
|
|
def __init__(self):
|
|
hexs = (
|
|
"FF3838", "FF9D97", "FF701F", "FFB21D", "CFD231",
|
|
"48F90A", "92CC17", "3DDB86", "1A9334", "00D4BB",
|
|
"2C99A8", "00C2FF", "344593", "6473FF", "0018EC",
|
|
"8438FF", "520085", "CB38FF", "FF95C8", "FF37C7",
|
|
)
|
|
self.palette = [self.hex2rgb(f"#{c}") for c in hexs]
|
|
self.n = len(self.palette)
|
|
|
|
def __call__(self, i, bgr=False):
|
|
"""Return colour for index ``i`` (BGR if ``bgr=True``, else RGB)."""
|
|
c = self.palette[int(i) % self.n]
|
|
return (c[2], c[1], c[0]) if bgr else c
|
|
|
|
@staticmethod
|
|
def hex2rgb(h):
|
|
"""Convert hex colour string to (R, G, B) tuple."""
|
|
return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
|
|
|
|
|
|
colors = Colors() # module-level singleton; callers: from plots_3d import colors
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 3D geometry helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def rotation_3d_in_axis(points, angles, axis=0):
|
|
"""Rotate *points* around a specified camera-frame axis.
|
|
|
|
Args:
|
|
points (np.ndarray): (N, 3) array of 3D points.
|
|
angles (float): Rotation angle in radians.
|
|
axis (int): 0=X, 1=Y, 2=Z.
|
|
|
|
Returns:
|
|
np.ndarray: Rotated points (N, 3).
|
|
"""
|
|
rot_sin = np.sin(angles)
|
|
rot_cos = np.cos(angles)
|
|
ones = np.ones_like(rot_cos)
|
|
zeros = np.zeros_like(rot_cos)
|
|
|
|
if axis == 1:
|
|
rot_mat = np.stack([
|
|
np.stack([rot_cos, zeros, -rot_sin]),
|
|
np.stack([zeros, ones, zeros]),
|
|
np.stack([rot_sin, zeros, rot_cos]),
|
|
])
|
|
elif axis == 2:
|
|
rot_mat = np.stack([
|
|
np.stack([rot_cos, rot_sin, zeros]),
|
|
np.stack([-rot_sin, rot_cos, zeros]),
|
|
np.stack([zeros, zeros, ones]),
|
|
])
|
|
elif axis == 0:
|
|
rot_mat = np.stack([
|
|
np.stack([ones, zeros, zeros]),
|
|
np.stack([zeros, rot_cos, rot_sin]),
|
|
np.stack([zeros, -rot_sin, rot_cos]),
|
|
])
|
|
else:
|
|
raise ValueError(f"axis must be in {{0, 1, 2}}, got {axis}")
|
|
|
|
return np.dot(points, rot_mat)
|
|
|
|
|
|
def compute_3d_box_corners_4face(center_3d, dimensions, rotation, face_type=0):
|
|
"""Compute the 8 corners of a 3D bounding box from a face-center point.
|
|
|
|
Args:
|
|
center_3d (array-like): (x, y, z) centre of the specified face in camera coords.
|
|
dimensions (array-like): (length, height, width) of the box in metres.
|
|
rotation (float): rot_y — rotation about the Y axis in radians.
|
|
face_type (int): 0=front, 1=tail/rear, 2=left, 3=right, -1=box centre.
|
|
|
|
Returns:
|
|
np.ndarray: (8, 3) corner coordinates in camera frame.
|
|
"""
|
|
l, h, w = dimensions
|
|
|
|
corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1).astype(np.float64)
|
|
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
|
|
|
|
offsets = {
|
|
1: [0, 0.5, 0.5], # tail
|
|
0: [1, 0.5, 0.5], # front
|
|
3: [0.5, 0.5, 0], # right
|
|
2: [0.5, 0.5, 1], # left
|
|
-1: [0.5, 0.5, 0.5], # whole centre
|
|
}
|
|
corners_norm = corners_norm - offsets.get(face_type, offsets[-1])
|
|
|
|
corners = np.array([l, h, w]).reshape(1, 3) * corners_norm.reshape(8, 3)
|
|
corners = rotation_3d_in_axis(corners, rotation, axis=1)
|
|
corners += np.array(center_3d).reshape(1, 3)
|
|
return corners
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fisheye (KB) distortion helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def apply_fisheye_distortion(x, y, distort_coeffs):
|
|
"""Apply Kannala-Brandt fisheye distortion to normalised camera coordinates.
|
|
|
|
Args:
|
|
x (float): Normalised x coordinate (x3d / z3d).
|
|
y (float): Normalised y coordinate (y3d / z3d).
|
|
distort_coeffs (list): [k1, k2, k3, k4] KB coefficients.
|
|
|
|
Returns:
|
|
tuple[float, float]: Distorted normalised coordinates (xd, yd).
|
|
"""
|
|
if not distort_coeffs or len(distort_coeffs) < 4:
|
|
return x, y
|
|
|
|
k1, k2, k3, k4 = distort_coeffs[:4]
|
|
r = np.sqrt(x * x + y * y)
|
|
if r < 1e-8:
|
|
return x, y
|
|
|
|
theta = np.arctan(r)
|
|
theta_d = theta * (1 + k1 * theta**2 + k2 * theta**4 + k3 * theta**6 + k4 * theta**8)
|
|
scale = theta_d / r
|
|
return x * scale, y * scale
|
|
|
|
|
|
def remove_fisheye_distortion(xd, yd, distort_coeffs, max_iter=20):
|
|
"""Remove KB fisheye distortion from normalised camera coordinates.
|
|
|
|
Args:
|
|
xd (float): Distorted normalised x coordinate.
|
|
yd (float): Distorted normalised y coordinate.
|
|
distort_coeffs (list): [k1, k2, k3, k4] KB coefficients.
|
|
max_iter (int): Maximum Newton-Raphson iterations.
|
|
|
|
Returns:
|
|
tuple[float, float]: Undistorted normalised coordinates (xn, yn).
|
|
"""
|
|
if not distort_coeffs or len(distort_coeffs) < 4:
|
|
return xd, yd
|
|
|
|
k1, k2, k3, k4 = distort_coeffs[:4]
|
|
r_d = np.sqrt(xd * xd + yd * yd)
|
|
if r_d < 1e-8:
|
|
return xd, yd
|
|
|
|
theta_d = r_d
|
|
theta = theta_d / (1 + k1 * theta_d * theta_d)
|
|
|
|
for _ in range(max_iter):
|
|
theta2 = theta * theta
|
|
theta4 = theta2 * theta2
|
|
theta6 = theta4 * theta2
|
|
theta8 = theta4 * theta4
|
|
|
|
f = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8) - theta_d
|
|
f_prime = 1 + 3 * k1 * theta2 + 5 * k2 * theta4 + 7 * k3 * theta6 + 9 * k4 * theta8
|
|
|
|
theta_new = theta - f / f_prime
|
|
if abs(theta_new - theta) < 1e-8:
|
|
theta = theta_new
|
|
break
|
|
theta = theta_new
|
|
|
|
r = np.tan(theta)
|
|
scale = r / r_d
|
|
return xd * scale, yd * scale
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 3D-to-2D projection helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def project_3d_to_2d_with_distortion(points_3d, calib):
|
|
"""Project 3D points to 2D using KB fisheye camera calibration.
|
|
|
|
Args:
|
|
points_3d (np.ndarray): (N, 3) points in camera coordinates.
|
|
calib (dict): Camera parameters ``fx``, ``fy``, ``cx``, ``cy``,
|
|
and optional ``distort_coeffs``.
|
|
|
|
Returns:
|
|
np.ndarray: (N, 2) image coordinates (NaN for behind-camera points).
|
|
"""
|
|
fx, fy = calib['fx'], calib['fy']
|
|
cx, cy = calib['cx'], calib['cy']
|
|
distort_coeffs = calib.get('distort_coeffs', [])
|
|
|
|
points_2d = []
|
|
for x, y, z in points_3d:
|
|
if z > 0.1:
|
|
xn, yn = x / z, y / z
|
|
xd, yd = apply_fisheye_distortion(xn, yn, distort_coeffs)
|
|
points_2d.append([fx * xd + cx, fy * yd + cy])
|
|
else:
|
|
points_2d.append([np.nan, np.nan])
|
|
|
|
return np.array(points_2d)
|
|
|
|
|
|
def sample_3d_edge(p1, p2, num_samples=10):
|
|
"""Uniformly sample *num_samples* points along the 3D edge from *p1* to *p2*.
|
|
|
|
Args:
|
|
p1 (array-like): Start point (x, y, z).
|
|
p2 (array-like): End point (x, y, z).
|
|
num_samples (int): Number of sample points.
|
|
|
|
Returns:
|
|
np.ndarray: (num_samples, 3) sampled 3D points.
|
|
"""
|
|
t = np.linspace(0, 1, num_samples).reshape(-1, 1)
|
|
return p1 + t * (p2 - p1)
|
|
|
|
|
|
def project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=10):
|
|
"""Project 3D box edges to 2D by sampling, handling fisheye distortion.
|
|
|
|
Args:
|
|
corners_3d (np.ndarray): (8, 3) 3D corner coordinates.
|
|
calib (dict): Camera calibration dict.
|
|
samples_per_edge (int): Number of samples per edge.
|
|
|
|
Returns:
|
|
dict: Mapping edge_name → (N, 2) 2D projected points.
|
|
"""
|
|
edges = {
|
|
'back_0': (4, 5), 'back_1': (5, 6), 'back_2': (6, 7), 'back_3': (7, 4),
|
|
'connect_0': (0, 4), 'connect_1': (1, 5), 'connect_2': (2, 6), 'connect_3': (3, 7),
|
|
'front_0': (0, 1), 'front_1': (1, 2), 'front_2': (2, 3), 'front_3': (3, 0),
|
|
'front_x1': (0, 2), 'front_x2': (1, 3),
|
|
}
|
|
|
|
return {
|
|
name: project_3d_to_2d_with_distortion(
|
|
sample_3d_edge(corners_3d[i], corners_3d[j], samples_per_edge), calib
|
|
)
|
|
for name, (i, j) in edges.items()
|
|
}
|
|
|
|
|
|
def plot_box3d_on_img_with_distortion(img, edge_points_2d,
|
|
color_front=(255, 0, 0),
|
|
color_back=(0, 0, 255),
|
|
color_side=(0, 255, 255),
|
|
thickness=1):
|
|
"""Draw a 3D box on *img* using pre-projected edge point lists (fisheye-aware).
|
|
|
|
Args:
|
|
img (np.ndarray): BGR image to draw on.
|
|
edge_points_2d (dict): Output of :func:`project_3d_box_edges_with_distortion`.
|
|
color_front (tuple): BGR colour for front-face edges.
|
|
color_back (tuple): BGR colour for back-face edges.
|
|
color_side (tuple): BGR colour for side connecting edges.
|
|
thickness (int): Line thickness in pixels.
|
|
|
|
Returns:
|
|
np.ndarray: Modified image.
|
|
"""
|
|
front_edges = {'front_0', 'front_1', 'front_2', 'front_3', 'front_x1', 'front_x2'}
|
|
back_edges = {'back_0', 'back_1', 'back_2', 'back_3'}
|
|
|
|
for edge_name, points in edge_points_2d.items():
|
|
if np.any(np.isnan(points)):
|
|
continue
|
|
pts = points.astype(np.int32)
|
|
if edge_name in front_edges:
|
|
color = color_front
|
|
elif edge_name in back_edges:
|
|
color = color_back
|
|
else:
|
|
color = color_side
|
|
cv2.polylines(img, [pts], isClosed=False, color=color, thickness=thickness, lineType=cv2.LINE_AA)
|
|
|
|
return img
|
|
|
|
|
|
def project_3d_to_2d_with_calib(points_3d, calib):
|
|
"""Project 3D points to 2D using pinhole calibration (no distortion).
|
|
|
|
Args:
|
|
points_3d (np.ndarray): (N, 3) points in camera coordinates.
|
|
calib (dict): Camera parameters ``fx``, ``fy``, ``cx``, ``cy``.
|
|
|
|
Returns:
|
|
np.ndarray: (N, 2) image coordinates (NaN for behind-camera points).
|
|
"""
|
|
fx, fy = calib['fx'], calib['fy']
|
|
cx, cy = calib['cx'], calib['cy']
|
|
|
|
points_2d = []
|
|
for x, y, z in points_3d:
|
|
if z > 0.1:
|
|
points_2d.append([fx * x / z + cx, fy * y / z + cy])
|
|
else:
|
|
points_2d.append([np.nan, np.nan])
|
|
|
|
return np.array(points_2d)
|
|
|
|
|
|
def project_3d_to_2d_simple(points_3d, img_size):
|
|
"""Project 3D points to 2D using a simple estimated pinhole model.
|
|
|
|
Args:
|
|
points_3d (np.ndarray): (N, 3) points in camera coordinates.
|
|
img_size (tuple[int, int]): ``(width, height)`` of the image.
|
|
|
|
Returns:
|
|
np.ndarray: (N, 2) image coordinates.
|
|
"""
|
|
w, h = img_size
|
|
fx = fy = w * 1.2
|
|
cx, cy = w / 2, h / 2
|
|
|
|
points_2d = []
|
|
for x, y, z in points_3d:
|
|
if z > 0.1:
|
|
points_2d.append([fx * x / z + cx, fy * y / z + cy])
|
|
else:
|
|
points_2d.append([np.nan, np.nan])
|
|
|
|
return np.array(points_2d)
|
|
|
|
|
|
def plot_box3d_on_img(img, corners_2d,
|
|
color_front=(255, 0, 0),
|
|
color_back=(0, 0, 255),
|
|
color_side=(0, 255, 255),
|
|
thickness=1):
|
|
"""Draw a 3D bounding box on *img* from 2D projected corners.
|
|
|
|
Args:
|
|
img (np.ndarray): BGR image to draw on.
|
|
corners_2d (np.ndarray): (8, 2) projected corner coordinates.
|
|
color_front (tuple): BGR colour for front-face edges (indices 0-3).
|
|
color_back (tuple): BGR colour for back-face edges (indices 4-7).
|
|
color_side (tuple): BGR colour for connecting side edges.
|
|
thickness (int): Line thickness in pixels.
|
|
|
|
Returns:
|
|
np.ndarray: Modified image.
|
|
"""
|
|
line_indices = (
|
|
(4, 5), (5, 6), (6, 7), (7, 4), # back face
|
|
(0, 4), (1, 5), (2, 6), (3, 7), # side edges
|
|
(0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3), # front face + X mark
|
|
)
|
|
front_edges = {(0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3)}
|
|
back_edges = {(4, 5), (5, 6), (6, 7), (7, 4)}
|
|
corners = corners_2d.astype(np.int32)
|
|
|
|
for start, end in line_indices:
|
|
try:
|
|
pt1 = (corners[start, 0], corners[start, 1])
|
|
pt2 = (corners[end, 0], corners[end, 1])
|
|
if (start, end) in front_edges:
|
|
cv2.line(img, pt1, pt2, color_front, thickness, cv2.LINE_AA)
|
|
elif (start, end) in back_edges:
|
|
cv2.line(img, pt1, pt2, color_back, thickness, cv2.LINE_AA)
|
|
else:
|
|
cv2.line(img, pt1, pt2, color_side, thickness, cv2.LINE_AA)
|
|
except Exception:
|
|
pass
|
|
|
|
return img
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 3D box reconstruction from target label format
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _reconstruct_3d_box_from_face(face_uv, face_z, dims, rot_y, face_type, calib):
|
|
"""Reconstruct 3D box corners from a visible face centre.
|
|
|
|
Args:
|
|
face_uv (tuple[float, float]): Pixel coordinates (u, v) of the face centre.
|
|
face_z (float): Depth of the face centre in metres.
|
|
dims (array-like): (length, height, width) in metres.
|
|
rot_y (float): Yaw rotation in radians.
|
|
face_type (int): 0=front, 1=rear, 2=left, 3=right.
|
|
calib (dict): Camera calibration dict.
|
|
|
|
Returns:
|
|
tuple[np.ndarray, list] | None: ``(corners_3d, object_3d)`` or ``None`` on failure.
|
|
"""
|
|
if calib is None:
|
|
return None
|
|
|
|
fx, fy = calib['fx'], calib['fy']
|
|
cx, cy = calib['cx'], calib['cy']
|
|
distort_coeffs = calib.get('distort_coeffs', [])
|
|
|
|
u_face, v_face = face_uv
|
|
xd = (u_face - cx) / fx
|
|
yd = (v_face - cy) / fy
|
|
|
|
if distort_coeffs and len(distort_coeffs) >= 4:
|
|
xn, yn = remove_fisheye_distortion(xd, yd, distort_coeffs)
|
|
else:
|
|
xn, yn = xd, yd
|
|
|
|
l, h, w = dims
|
|
if np.isnan(l) or np.isnan(h) or np.isnan(w) or np.isnan(rot_y):
|
|
return None
|
|
|
|
face_center_3d = np.array([xn * face_z, yn * face_z, face_z])
|
|
corners_3d = compute_3d_box_corners_4face(face_center_3d, dims, rot_y, face_type=face_type)
|
|
object_3d = [face_center_3d[0], face_center_3d[1], face_center_3d[2], l, h, w, rot_y, face_type]
|
|
return corners_3d, object_3d
|
|
|
|
|
|
def decode_and_reconstruct_3d_box_from_target(target, calib, img_width, img_height,
|
|
face_3d_classes=None, complete_3d_classes=None):
|
|
"""Decode a ground-truth target vector and reconstruct its 3D box.
|
|
|
|
The target array follows the 48-column format used in ``YOLOGround3DDataset``:
|
|
|
|
* col 0 — image index
|
|
* col 1 — class id
|
|
* cols 2-5 — normalised 2D bbox [x, y, w, h]
|
|
* cols 6-8 — 3D centre [x3d, y3d, z3d] in camera coords
|
|
* cols 9-11 — dimensions [l, h, w]
|
|
* col 12 — rot_y
|
|
* cols 13-14 — normalised UV projection of 3D centre
|
|
* col 15 — alpha
|
|
* cols 16-23 — front face [x3d, y3d, z3d, alpha, xc, yc, score, visible]
|
|
* cols 24-31 — rear face (same layout)
|
|
* cols 32-39 — left face
|
|
* cols 40-47 — right face
|
|
|
|
Args:
|
|
target (np.ndarray): (48,) label vector.
|
|
calib (dict): Camera calibration dict; must contain ``depth_scale``.
|
|
img_width (int): Image width in pixels.
|
|
img_height (int): Image height in pixels.
|
|
face_3d_classes (list | None): Class IDs using face-based reconstruction.
|
|
Defaults to ``[0, 13]`` (vehicles, tricycles).
|
|
complete_3d_classes (list | None): Class IDs using complete-box
|
|
reconstruction. Defaults to ``[1, 2, 3]`` (pedestrians, bicycles,
|
|
riders).
|
|
|
|
Returns:
|
|
dict | None: Result dict with keys ``should_draw``, ``cls``,
|
|
``corners_3d``, ``face_center_2d``, ``face_color``, ``center_2d``,
|
|
``object_3d``; or ``None`` if the target is invalid / unsupported.
|
|
"""
|
|
if face_3d_classes is None:
|
|
face_3d_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8] # vehicles
|
|
if complete_3d_classes is None:
|
|
complete_3d_classes = [9, 10, 11, 12] # pedestrian / cyclists
|
|
|
|
if len(target) < 15 or np.isnan(target[1]):
|
|
return None
|
|
|
|
cls = int(target[1])
|
|
if cls not in face_3d_classes and cls not in complete_3d_classes:
|
|
return None
|
|
|
|
depth_scale = calib['depth_scale']
|
|
|
|
result = {
|
|
'should_draw': True,
|
|
'cls': cls,
|
|
'corners_3d': None,
|
|
'face_center_2d': None,
|
|
'face_color': None,
|
|
'center_2d': None,
|
|
}
|
|
|
|
if cls in face_3d_classes:
|
|
if len(target) < 48:
|
|
return None
|
|
|
|
face_offsets = [16, 24, 32, 40]
|
|
# BGR: front=red, rear=blue, left=green, right=yellow
|
|
face_colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (0, 255, 255)]
|
|
|
|
best_face_type = -1
|
|
best_score = -1.0
|
|
best_face_data = None
|
|
|
|
for face_type, face_offset in enumerate(face_offsets):
|
|
if target.shape[0] < face_offset + 8:
|
|
continue
|
|
face_data = target[face_offset:face_offset + 8]
|
|
_, _, z3d_face, _, xc_face, yc_face, score, is_visible = face_data
|
|
|
|
if is_visible == -1 or np.isnan(is_visible) or is_visible != 1:
|
|
continue
|
|
if np.isnan(score) or score < 0.3:
|
|
continue
|
|
if np.isnan(xc_face) or np.isnan(yc_face) or np.isnan(z3d_face) or z3d_face <= 0:
|
|
continue
|
|
|
|
if score > best_score:
|
|
best_score = score
|
|
best_face_type = face_type
|
|
best_face_data = face_data
|
|
|
|
if best_face_type != -1 and best_face_data is not None:
|
|
xc_face = best_face_data[4]
|
|
yc_face = best_face_data[5]
|
|
z3d_face = best_face_data[2] * depth_scale
|
|
|
|
u_face = xc_face * img_width
|
|
v_face = yc_face * img_height
|
|
dims = target[9:12]
|
|
rot_y = target[12]
|
|
|
|
result_face = _reconstruct_3d_box_from_face(
|
|
(u_face, v_face), z3d_face, dims, rot_y, best_face_type, calib
|
|
)
|
|
if result_face is not None:
|
|
corners_3d, object_3d = result_face
|
|
result['corners_3d'] = corners_3d
|
|
result['face_center_2d'] = (u_face, v_face)
|
|
result['face_color'] = face_colors[best_face_type]
|
|
result['object_3d'] = object_3d
|
|
|
|
elif cls in complete_3d_classes:
|
|
x3d, y3d, z3d = target[6:9]
|
|
dimensions = target[9:12]
|
|
rot_y = target[12]
|
|
xc_norm, yc_norm = target[13:15]
|
|
|
|
z3d = z3d * depth_scale
|
|
|
|
if np.isnan(z3d) or z3d <= 0 or np.any(np.isnan(dimensions)):
|
|
return None
|
|
if np.isnan(x3d) or np.isnan(y3d):
|
|
return None
|
|
|
|
corners_3d = compute_3d_box_corners_4face(
|
|
np.array([x3d, y3d, z3d]), dimensions, rot_y, face_type=-1
|
|
)
|
|
result['corners_3d'] = corners_3d
|
|
result['center_2d'] = (xc_norm * img_width, yc_norm * img_height)
|
|
result['object_3d'] = [
|
|
x3d, y3d, z3d,
|
|
dimensions[0], dimensions[1], dimensions[2],
|
|
rot_y, -1,
|
|
]
|
|
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Drawing helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def draw_3d_box_from_corners(im, corners_3d, calib, img_shape,
|
|
face_center_2d=None, face_color=None, thickness=1):
|
|
"""Project and draw a 3D box given raw corner coordinates.
|
|
|
|
Args:
|
|
im (np.ndarray): BGR image array (H, W, 3).
|
|
corners_3d (np.ndarray): (8, 3) corners in camera frame
|
|
(output of :func:`compute_3d_box_corners_4face`).
|
|
calib (dict | None): Camera calibration dict.
|
|
img_shape (tuple[int, int]): ``(width, height)`` of *im*.
|
|
face_center_2d (tuple[float, float] | None): Pixel coords to mark with a dot.
|
|
face_color (tuple | None): BGR colour for the face-centre dot.
|
|
thickness (int): Line thickness in pixels.
|
|
|
|
Returns:
|
|
np.ndarray: Modified image.
|
|
"""
|
|
w, h = img_shape
|
|
|
|
# compute_3d_box_corners_4face places rear at indices 0-3 and front at 4-7.
|
|
# plot_box3d_on_img expects front at indices 0-3 (drawn red) and rear at 4-7.
|
|
corners_3d = corners_3d[[4, 5, 6, 7, 0, 1, 2, 3]]
|
|
|
|
color_front = (0, 0, 255) # Red (BGR)
|
|
color_back = (255, 0, 0) # Blue (BGR)
|
|
color_side = (255, 255, 0) # Cyan (BGR)
|
|
|
|
if calib and calib.get('distort_coeffs'):
|
|
edge_pts = project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=15)
|
|
im = plot_box3d_on_img_with_distortion(
|
|
im, edge_pts, color_front, color_back, color_side, thickness=thickness
|
|
)
|
|
else:
|
|
corners_2d = (project_3d_to_2d_with_calib(corners_3d, calib)
|
|
if calib is not None
|
|
else project_3d_to_2d_simple(corners_3d, (w, h)))
|
|
|
|
if not np.any(np.isnan(corners_2d)):
|
|
im = plot_box3d_on_img(im, corners_2d, color_front, color_back, color_side, thickness=thickness)
|
|
|
|
if face_center_2d is not None and face_color is not None:
|
|
cv2.circle(im, (int(face_center_2d[0]), int(face_center_2d[1])), 2, face_color, -1, cv2.LINE_AA)
|
|
|
|
return im
|
|
|
|
|
|
def plot_3d_boxes_from_decoded_targets(im, decoded_results, paths, calib=None, names=None,
|
|
label_text=None, scale_factor=2):
|
|
"""Render 3D boxes from pre-decoded ground-truth targets onto *im*.
|
|
|
|
Args:
|
|
im (torch.Tensor): Batch tensor (N, 3, H, W) normalised [0, 1] in BGR
|
|
format (as returned by the 3D dataloader).
|
|
decoded_results (list[list[dict]]): ``decoded_results[i]`` holds a list of
|
|
result dicts for image *i*, each produced by
|
|
:func:`decode_and_reconstruct_3d_box_from_target`.
|
|
paths (list[str]): Image file paths (informational only).
|
|
calib (dict | list[dict] | None): Camera calibration dict (or first element
|
|
of a list) used for 3D→2D projection.
|
|
names (dict | None): Mapping from class id to class name (unused here but
|
|
kept for API symmetry).
|
|
label_text (str | None): Text to overlay in the top-left corner, e.g.
|
|
``"3D GT"``.
|
|
scale_factor (int): Upscale factor applied to the output image.
|
|
|
|
Returns:
|
|
np.ndarray | None: (H*scale_factor, W*scale_factor, 3) RGB image with 3D
|
|
boxes drawn, or ``None`` if the input is empty.
|
|
"""
|
|
if im.ndim == 3:
|
|
im = im.unsqueeze(0)
|
|
|
|
im_np = im[0].cpu().numpy().transpose(1, 2, 0)
|
|
im_np = np.ascontiguousarray(im_np * 255, dtype=np.uint8)
|
|
h_orig, w_orig = im_np.shape[:2]
|
|
|
|
h_new = h_orig * scale_factor
|
|
w_new = w_orig * scale_factor
|
|
im_bgr = cv2.resize(im_np, (w_new, h_new), interpolation=cv2.INTER_LINEAR)
|
|
|
|
if isinstance(calib, list):
|
|
calib = calib[0] if calib else None
|
|
|
|
calib_scaled = None
|
|
if calib is not None:
|
|
calib_scaled = {
|
|
'fx': calib['fx'] * scale_factor,
|
|
'fy': calib['fy'] * scale_factor,
|
|
'cx': calib['cx'] * scale_factor,
|
|
'cy': calib['cy'] * scale_factor,
|
|
'distort_coeffs': calib.get('distort_coeffs', []),
|
|
}
|
|
|
|
for decoded in decoded_results[0]:
|
|
if not decoded or not decoded['should_draw'] or decoded['corners_3d'] is None:
|
|
continue
|
|
|
|
cls = decoded['cls']
|
|
|
|
if cls in range(9): # vehicles (car/suv/pickup/...special_vehicle/unknown) — face-based
|
|
face_center_2d = decoded.get('face_center_2d')
|
|
if face_center_2d is not None:
|
|
face_center_2d = (
|
|
face_center_2d[0] * scale_factor,
|
|
face_center_2d[1] * scale_factor,
|
|
)
|
|
im_bgr = draw_3d_box_from_corners(
|
|
im_bgr,
|
|
decoded['corners_3d'],
|
|
calib_scaled,
|
|
(w_new, h_new),
|
|
face_center_2d=face_center_2d,
|
|
face_color=decoded.get('face_color'),
|
|
)
|
|
|
|
elif cls in (9, 10, 11, 12): # pedestrian / bicyclists / bicycles / tricycles — complete box
|
|
im_bgr = draw_3d_box_from_corners(
|
|
im_bgr,
|
|
decoded['corners_3d'],
|
|
calib_scaled,
|
|
(w_new, h_new),
|
|
thickness=2,
|
|
)
|
|
if decoded.get('center_2d'):
|
|
color = colors(cls)
|
|
u, v = decoded['center_2d']
|
|
u_s, v_s = int(u * scale_factor), int(v * scale_factor)
|
|
cv2.circle(im_bgr, (u_s, v_s), 4, color, -1, cv2.LINE_AA)
|
|
cv2.circle(im_bgr, (u_s, v_s), 6, (255, 255, 255), 2, cv2.LINE_AA)
|
|
|
|
if label_text:
|
|
cv2.putText(im_bgr, label_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX,
|
|
1.5, (0, 255, 255), 3, cv2.LINE_AA)
|
|
|
|
return cv2.cvtColor(im_bgr, cv2.COLOR_BGR2RGB)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Bird's-Eye View (BEV) visualization
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def draw_bev_blank(max_range=200):
|
|
"""Create a blank BEV image with distance and lateral grid lines.
|
|
|
|
Args:
|
|
max_range (int): Maximum forward range in metres (default 200).
|
|
|
|
Returns:
|
|
np.ndarray: (H, W, 3) BGR image with grid and ego-vehicle box drawn.
|
|
"""
|
|
pixels_per_meter = 20
|
|
img_height = max_range * pixels_per_meter # e.g. 4000 px for 200 m
|
|
img_width = 100 * pixels_per_meter # lateral range -50 m … +50 m
|
|
|
|
bevimg = np.ones((img_height, img_width, 3), dtype=np.uint8) * 255
|
|
ego_center_x = img_width // 2
|
|
ego_center_y = img_height # ego position at bottom-centre
|
|
|
|
ego_half_l = int(4.5 * pixels_per_meter) // 2
|
|
ego_half_w = int(1.8 * pixels_per_meter) // 2
|
|
ego_box = np.array([
|
|
[ego_center_x - ego_half_w, ego_center_y - ego_half_l],
|
|
[ego_center_x + ego_half_w, ego_center_y - ego_half_l],
|
|
[ego_center_x + ego_half_w, ego_center_y + ego_half_l],
|
|
[ego_center_x - ego_half_w, ego_center_y + ego_half_l],
|
|
], dtype=np.int32)
|
|
|
|
# Horizontal (range) grid
|
|
grid_step_px = 20 * pixels_per_meter # every 20 m
|
|
for i in range(max_range // 20 + 1):
|
|
y_pos = ego_center_y - i * grid_step_px
|
|
if y_pos >= 0:
|
|
cv2.line(bevimg, (0, y_pos), (img_width, y_pos), (180, 180, 180), 3, cv2.LINE_AA)
|
|
cv2.putText(bevimg, f"{i * 20}m", (ego_center_x + 15, y_pos - 15),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (80, 80, 80), 2, cv2.LINE_AA)
|
|
|
|
# Vertical (lateral) grid
|
|
lat_step_px = 10 * pixels_per_meter # every 10 m
|
|
for i in range(-5, 6):
|
|
x_pos = ego_center_x + i * lat_step_px
|
|
if 0 <= x_pos < img_width:
|
|
cv2.line(bevimg, (x_pos, 0), (x_pos, img_height), (180, 180, 180), 3, cv2.LINE_AA)
|
|
if i != 0:
|
|
cv2.putText(bevimg, f"{i * 10}m", (x_pos - 40, img_height - 20),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (80, 80, 80), 2, cv2.LINE_AA)
|
|
|
|
cv2.drawContours(bevimg, [ego_box], 0, (255, 0, 0), -1) # blue-filled ego box
|
|
return bevimg
|
|
|
|
|
|
def cam_corners_front_rear(pred3d, facetype):
|
|
"""Compute 8 box corners from a face-centre 3D representation.
|
|
|
|
Args:
|
|
pred3d (array-like): ``[x, y, z, l, h, w, rot_y]`` (7 values).
|
|
facetype (int | str): Face type identifier — ``0``/``'front'``,
|
|
``1``/``'tail'``, ``2``/``'left'``, ``3``/``'right'``,
|
|
``-1``/``'whole'``.
|
|
|
|
Returns:
|
|
np.ndarray: (8, 3) 3D corner coordinates in camera frame.
|
|
"""
|
|
dims = pred3d[3:6]
|
|
corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)
|
|
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]].astype(float)
|
|
|
|
if facetype in ('tail', 1):
|
|
corners_norm -= [0, 0.5, 0.5]
|
|
elif facetype in ('front', 0):
|
|
corners_norm -= [1, 0.5, 0.5]
|
|
elif facetype in ('right', 3):
|
|
corners_norm -= [0.5, 0.5, 0]
|
|
elif facetype in ('left', 2):
|
|
corners_norm -= [0.5, 0.5, 1]
|
|
elif facetype in ('whole', -1):
|
|
corners_norm -= [0.5, 0.5, 0.5]
|
|
else:
|
|
raise AssertionError(f"Non-valid face type: {facetype}")
|
|
|
|
corners = dims.reshape(1, 3) * corners_norm.reshape(8, 3)
|
|
corners = rotation_3d_in_axis(corners, pred3d[6], axis=1)
|
|
corners += pred3d[:3].reshape(1, 3)
|
|
return corners
|
|
|
|
|
|
def drawbev(bevimg, vehicle3d, is_pred=True):
|
|
"""Draw a single vehicle box on a BEV image.
|
|
|
|
Args:
|
|
bevimg (np.ndarray): BEV image produced by :func:`draw_bev_blank`.
|
|
vehicle3d (list | np.ndarray): ``[x, y, z, l, h, w, ..., rot_y, face_type]``.
|
|
The second-to-last element is ``rot_y`` and the last is ``face_type``.
|
|
is_pred (bool): ``True`` → red box (prediction); ``False`` → green box (GT).
|
|
|
|
Returns:
|
|
np.ndarray: Modified BEV image.
|
|
"""
|
|
x, y, z = vehicle3d[0], vehicle3d[1], vehicle3d[2]
|
|
l, h, w = vehicle3d[3], vehicle3d[4], vehicle3d[5]
|
|
rotation_y = vehicle3d[-2]
|
|
face_type = vehicle3d[-1]
|
|
|
|
pixels_per_meter = 20
|
|
max_range = 200
|
|
lateral_range = 50
|
|
|
|
img_height = bevimg.shape[0]
|
|
img_width = bevimg.shape[1]
|
|
ego_center_x = img_width // 2
|
|
ego_center_y = img_height
|
|
|
|
if x > lateral_range or x < -lateral_range or z > max_range or z < 0:
|
|
return bevimg
|
|
|
|
corners = cam_corners_front_rear(np.array([x, y, z, l, h, w, rotation_y]), face_type)
|
|
xyz3d_front = np.mean(corners[4:8, :], axis=0)
|
|
xyz3d_center = np.mean(corners[0:8, :], axis=0)
|
|
|
|
center = (
|
|
int(ego_center_x + xyz3d_center[0] * pixels_per_meter),
|
|
int(ego_center_y - xyz3d_center[2] * pixels_per_meter),
|
|
)
|
|
front_point = (
|
|
int(ego_center_x + xyz3d_front[0] * pixels_per_meter),
|
|
int(ego_center_y - xyz3d_front[2] * pixels_per_meter),
|
|
)
|
|
|
|
rect = (center, (l * pixels_per_meter, w * pixels_per_meter), np.degrees(rotation_y))
|
|
box = np.intp(cv2.boxPoints(rect))
|
|
|
|
color = (0, 0, 255) if is_pred else (0, 255, 0)
|
|
cv2.drawContours(bevimg, [box], 0, color, 3, cv2.LINE_AA)
|
|
cv2.arrowedLine(bevimg, center, front_point, color, thickness=3, tipLength=0.3, line_type=cv2.LINE_AA)
|
|
|
|
return bevimg
|