2777 lines
106 KiB
Python
Executable File
2777 lines
106 KiB
Python
Executable File
# Ultralytics AGPL-3.0 License - https://ultralytics.com/license
|
|
|
|
"""3D detection visualization utilities.
|
|
|
|
Provides functions for decoding 3D predictions, projecting 3D boxes to 2D,
|
|
and drawing 3D wireframe boxes on images. Ported from yolov5-3d/utils/plots.py.
|
|
"""
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
|
|
def _default_face_visibility_score_thresh():
|
|
"""Return the configured visible-face threshold, even when this module is imported standalone."""
|
|
try:
|
|
from ultralytics.utils import DEFAULT_CFG
|
|
|
|
return float(getattr(DEFAULT_CFG, "face_visibility_score_thresh", 0.05))
|
|
except Exception:
|
|
return 0.05
|
|
|
|
|
|
YAW_BIN_OFFSETS = (0.0, np.pi / 2, -np.pi / 2, np.pi)
|
|
FACE_OFFSETS_42 = (10, 18, 26, 34)
|
|
FACE_OFFSETS_41 = (0, 6, 12, 18)
|
|
FACE_EDGE_OFFSETS_60 = (0, 15, 30, 45)
|
|
FACE_CORNERS = {0: (4, 5, 6, 7), 1: (0, 1, 2, 3), 2: (1, 2, 5, 6), 3: (0, 3, 4, 7)}
|
|
FACE_BOTTOM_EDGE_CORNERS = {0: (6, 7), 1: (2, 3), 2: (2, 6), 3: (3, 7)}
|
|
FACE_VISIBILITY_SCORE_THRESH = _default_face_visibility_score_thresh()
|
|
# Edge-yaw keeps the face-based visible-face threshold for the primary face, but uses a stricter gate for the
|
|
# optional second face in the two-face bucket.
|
|
EDGE_YAW_VALID_VISIBILITY_SCORE_THRESH = 0.1
|
|
EDGE_YAW_CUT_SIDE_MIN_VISIBLE_LENGTH_RATIO = 0.5
|
|
EDGE_YAW_MAX_LATERAL_DIST_M = 30.0
|
|
CUT_STATE_NORMAL = 0
|
|
CUT_STATE_IN = 1
|
|
CUT_STATE_OUT = 2
|
|
FACE_COLORS = ((0, 0, 255), (255, 0, 0), (0, 255, 0), (0, 255, 255))
|
|
|
|
|
|
def rotation_3d_in_axis(points, angles, axis=1):
|
|
"""Rotate points around a specified axis.
|
|
|
|
Args:
|
|
points: (N, 3) array of 3D points.
|
|
angles: Rotation angle in radians (scalar).
|
|
axis: 0=X, 1=Y, 2=Z.
|
|
|
|
Returns:
|
|
Rotated points (N, 3).
|
|
"""
|
|
rot_sin = np.sin(angles)
|
|
rot_cos = np.cos(angles)
|
|
ones = np.ones_like(rot_cos)
|
|
zeros = np.zeros_like(rot_cos)
|
|
if axis == 1: # Y axis (X=right, Y=down, Z=forward)
|
|
rot_mat = np.stack([
|
|
np.stack([rot_cos, zeros, -rot_sin]),
|
|
np.stack([zeros, ones, zeros]),
|
|
np.stack([rot_sin, zeros, rot_cos]),
|
|
])
|
|
elif axis == 2:
|
|
rot_mat = np.stack([
|
|
np.stack([rot_cos, rot_sin, zeros]),
|
|
np.stack([-rot_sin, rot_cos, zeros]),
|
|
np.stack([zeros, zeros, ones]),
|
|
])
|
|
elif axis == 0:
|
|
rot_mat = np.stack([
|
|
np.stack([ones, zeros, zeros]),
|
|
np.stack([zeros, rot_cos, rot_sin]),
|
|
np.stack([zeros, -rot_sin, rot_cos]),
|
|
])
|
|
else:
|
|
raise ValueError(f"axis should be in [0, 1, 2], got {axis}")
|
|
return np.dot(points, rot_mat)
|
|
|
|
|
|
def compute_3d_box_corners(center_3d, dimensions, rotation, face_type=-1):
|
|
"""Compute 8 corners of a 3D bounding box.
|
|
|
|
When face_type >= 0, center_3d is the center of that face (not box center).
|
|
|
|
Args:
|
|
center_3d: (x, y, z) center position in camera coordinates.
|
|
dimensions: (length, height, width) of the box.
|
|
rotation: rot_y (rotation around y-axis in radians).
|
|
face_type: -1=box center, 0=front, 1=rear, 2=left, 3=right.
|
|
|
|
Returns:
|
|
corners: (8, 3) array of corner coordinates.
|
|
"""
|
|
l, h, w = dimensions
|
|
|
|
# 8 corners via unravel_index pattern, reordered
|
|
corners_norm = np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1).astype(np.float64)
|
|
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
|
|
|
|
# Offset based on face type
|
|
offsets = {0: [1, 0.5, 0.5], 1: [0, 0.5, 0.5], 2: [0.5, 0.5, 1], 3: [0.5, 0.5, 0]}
|
|
corners_norm -= offsets.get(face_type, [0.5, 0.5, 0.5])
|
|
|
|
# Scale by dimensions and rotate
|
|
corners = np.array([l, h, w]).reshape(1, 3) * corners_norm.reshape(8, 3)
|
|
corners = rotation_3d_in_axis(corners, rotation, axis=1)
|
|
corners += np.array(center_3d).reshape(1, 3)
|
|
return corners
|
|
|
|
|
|
def apply_fisheye_distortion(x, y, distort_coeffs):
|
|
"""Apply Kannala-Brandt fisheye distortion to normalized camera coordinates."""
|
|
if distort_coeffs is None or len(distort_coeffs) < 4:
|
|
return x, y
|
|
|
|
k1, k2, k3, k4 = distort_coeffs[:4]
|
|
r = np.sqrt(x * x + y * y)
|
|
if r < 1e-8:
|
|
return x, y
|
|
|
|
theta = np.arctan(r)
|
|
theta2 = theta * theta
|
|
theta4 = theta2 * theta2
|
|
theta6 = theta4 * theta2
|
|
theta8 = theta4 * theta4
|
|
theta_d = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8)
|
|
scale = theta_d / r
|
|
return x * scale, y * scale
|
|
|
|
|
|
def remove_fisheye_distortion(xd, yd, distort_coeffs, max_iter=20):
|
|
"""Remove Kannala-Brandt fisheye distortion from normalized camera coordinates."""
|
|
if distort_coeffs is None or len(distort_coeffs) < 4:
|
|
return xd, yd
|
|
|
|
k1, k2, k3, k4 = distort_coeffs[:4]
|
|
r_d = np.sqrt(xd * xd + yd * yd)
|
|
if r_d < 1e-8:
|
|
return xd, yd
|
|
|
|
theta_d = r_d
|
|
theta_d2 = theta_d * theta_d
|
|
theta = theta_d / (1 + k1 * theta_d2)
|
|
|
|
for _ in range(max_iter):
|
|
theta2 = theta * theta
|
|
theta4 = theta2 * theta2
|
|
theta6 = theta4 * theta2
|
|
theta8 = theta4 * theta4
|
|
f = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8) - theta_d
|
|
f_prime = 1 + 3 * k1 * theta2 + 5 * k2 * theta4 + 7 * k3 * theta6 + 9 * k4 * theta8
|
|
theta_new = theta - f / f_prime
|
|
if abs(theta_new - theta) < 1e-8:
|
|
theta = theta_new
|
|
break
|
|
theta = theta_new
|
|
|
|
r = np.tan(theta)
|
|
scale = r / r_d
|
|
return xd * scale, yd * scale
|
|
|
|
|
|
def project_3d_to_2d_with_distortion(points_3d, calib):
|
|
"""Project 3D points with fisheye distortion-aware calibration."""
|
|
fx, fy = calib["fx"], calib["fy"]
|
|
cx, cy = calib["cx"], calib["cy"]
|
|
distort_coeffs = calib.get("distort_coeffs", [])
|
|
|
|
points_2d = np.full((len(points_3d), 2), np.nan)
|
|
for i, (x, y, z) in enumerate(points_3d):
|
|
if z > 0.1:
|
|
xn, yn = x / z, y / z
|
|
xd, yd = apply_fisheye_distortion(xn, yn, distort_coeffs)
|
|
points_2d[i] = [fx * xd + cx, fy * yd + cy]
|
|
return points_2d
|
|
|
|
|
|
def project_3d_to_2d_with_calib(points_3d, calib):
|
|
"""Project 3D points with standard pinhole calibration."""
|
|
fx, fy = calib["fx"], calib["fy"]
|
|
cx, cy = calib["cx"], calib["cy"]
|
|
|
|
points_2d = np.full((len(points_3d), 2), np.nan)
|
|
for i, (x, y, z) in enumerate(points_3d):
|
|
if z > 0.1:
|
|
points_2d[i] = [fx * x / z + cx, fy * y / z + cy]
|
|
return points_2d
|
|
|
|
|
|
def project_3d_to_2d(points_3d, calib):
|
|
"""Project 3D points to 2D using the provided calibration model."""
|
|
if calib is None:
|
|
return np.full((len(points_3d), 2), np.nan)
|
|
if calib.get("distort_coeffs") is not None and len(calib.get("distort_coeffs", [])) >= 4:
|
|
return project_3d_to_2d_with_distortion(points_3d, calib)
|
|
return project_3d_to_2d_with_calib(points_3d, calib)
|
|
|
|
|
|
def sample_3d_edge(p1, p2, num_samples=10):
|
|
"""Sample 3D points uniformly along a box edge."""
|
|
t = np.linspace(0, 1, num_samples).reshape(-1, 1)
|
|
return p1 + t * (p2 - p1)
|
|
|
|
|
|
def _point_inside_image(point_2d, img_w, img_h):
|
|
"""Return whether a projected point lies inside the image bounds."""
|
|
x, y = float(point_2d[0]), float(point_2d[1])
|
|
return np.isfinite(x) and np.isfinite(y) and 0.0 <= x <= img_w - 1 and 0.0 <= y <= img_h - 1
|
|
|
|
|
|
def _solve_edge_image_boundary_t(p0_2d, p1_2d, img_w, img_h):
|
|
"""Return the parametric interval whose projected segment lies inside the image."""
|
|
p0 = np.asarray(p0_2d, dtype=np.float64)
|
|
p1 = np.asarray(p1_2d, dtype=np.float64)
|
|
if not np.isfinite(p0).all() or not np.isfinite(p1).all():
|
|
return None
|
|
|
|
dx, dy = p1 - p0
|
|
t_min, t_max = 0.0, 1.0
|
|
for p, q in ((-dx, p0[0]), (dx, (img_w - 1) - p0[0]), (-dy, p0[1]), (dy, (img_h - 1) - p0[1])):
|
|
if abs(p) < 1e-12:
|
|
if q < 0:
|
|
return None
|
|
continue
|
|
t = q / p
|
|
if p < 0:
|
|
t_min = max(t_min, t)
|
|
else:
|
|
t_max = min(t_max, t)
|
|
if t_min > t_max:
|
|
return None
|
|
return t_min, t_max
|
|
|
|
|
|
def _project_edge_point_at_t(p1, p2, t, calib):
|
|
"""Project a single parametric point on a 3D edge."""
|
|
point_3d = np.asarray(p1, dtype=np.float64) + float(t) * (np.asarray(p2, dtype=np.float64) - np.asarray(p1, dtype=np.float64))
|
|
point_2d = project_3d_to_2d(point_3d[None, :], calib)[0]
|
|
return point_3d, point_2d
|
|
|
|
|
|
def _refine_visible_edge_boundary(p1, p2, calib, img_w, img_h, t_out, t_in, steps=12):
|
|
"""Refine one visible/hidden transition on a projected 3D edge."""
|
|
lo, hi = (float(t_out), float(t_in)) if t_out < t_in else (float(t_in), float(t_out))
|
|
for _ in range(steps):
|
|
mid = 0.5 * (lo + hi)
|
|
_, point_2d = _project_edge_point_at_t(p1, p2, mid, calib)
|
|
if _point_inside_image(point_2d, img_w, img_h):
|
|
hi = mid
|
|
else:
|
|
lo = mid
|
|
return hi if t_out < t_in else lo
|
|
|
|
|
|
def sample_partial_3d_edge(p1, p2, calib, img_w, img_h, num_samples=5, dense_samples=129):
|
|
"""Sample exactly ``num_samples`` points from the visible sub-segment of a projected 3D edge."""
|
|
endpoints_3d = np.asarray([p1, p2], dtype=np.float64)
|
|
dense_t = np.linspace(0.0, 1.0, dense_samples, dtype=np.float64)
|
|
dense_points_3d = endpoints_3d[0:1] + dense_t[:, None] * (endpoints_3d[1:2] - endpoints_3d[0:1])
|
|
dense_points_2d = project_3d_to_2d(dense_points_3d, calib)
|
|
visible = np.array([_point_inside_image(point_2d, img_w, img_h) for point_2d in dense_points_2d], dtype=bool)
|
|
if not visible.any():
|
|
return None, None
|
|
|
|
visible_idx = np.flatnonzero(visible)
|
|
split_idx = np.where(np.diff(visible_idx) > 1)[0] + 1
|
|
visible_runs = np.split(visible_idx, split_idx)
|
|
visible_run = max(visible_runs, key=len)
|
|
first_idx, last_idx = int(visible_run[0]), int(visible_run[-1])
|
|
|
|
t_start = dense_t[first_idx]
|
|
if first_idx > 0:
|
|
t_start = _refine_visible_edge_boundary(
|
|
endpoints_3d[0], endpoints_3d[1], calib, img_w, img_h, dense_t[first_idx - 1], dense_t[first_idx]
|
|
)
|
|
|
|
t_end = dense_t[last_idx]
|
|
if last_idx < len(dense_t) - 1:
|
|
t_end = _refine_visible_edge_boundary(
|
|
endpoints_3d[0], endpoints_3d[1], calib, img_w, img_h, dense_t[last_idx + 1], dense_t[last_idx]
|
|
)
|
|
|
|
if t_end - t_start < 1e-6:
|
|
return None, None
|
|
|
|
sample_t = np.linspace(t_start, t_end, num_samples, dtype=np.float64)
|
|
sample_points_3d = endpoints_3d[0:1] + sample_t[:, None] * (endpoints_3d[1:2] - endpoints_3d[0:1])
|
|
sample_points_2d = project_3d_to_2d(sample_points_3d, calib)
|
|
if np.any(np.isnan(sample_points_2d)):
|
|
return None, None
|
|
if not np.all([_point_inside_image(point_2d, img_w, img_h) for point_2d in sample_points_2d]):
|
|
return None, None
|
|
|
|
order = np.argsort(sample_points_2d[:, 0], kind="stable")
|
|
return sample_points_3d[order], sample_points_2d[order]
|
|
|
|
|
|
def project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=10):
|
|
"""Project sampled 3D box edges for distortion-aware wireframe drawing."""
|
|
edges = {
|
|
"back_0": (4, 5), "back_1": (5, 6), "back_2": (6, 7), "back_3": (7, 4),
|
|
"connect_0": (0, 4), "connect_1": (1, 5), "connect_2": (2, 6), "connect_3": (3, 7),
|
|
"front_0": (0, 1), "front_1": (1, 2), "front_2": (2, 3), "front_3": (3, 0),
|
|
"front_x1": (0, 2), "front_x2": (1, 3),
|
|
}
|
|
|
|
edge_points_2d = {}
|
|
for edge_name, (i, j) in edges.items():
|
|
sampled_3d = sample_3d_edge(corners_3d[i], corners_3d[j], samples_per_edge)
|
|
edge_points_2d[edge_name] = project_3d_to_2d_with_distortion(sampled_3d, calib)
|
|
return edge_points_2d
|
|
|
|
|
|
def plot_box3d_on_img_with_distortion(
|
|
img, edge_points_2d, color_front=(0, 0, 255), color_back=(255, 0, 0), color_side=(255, 255, 0), thickness=1
|
|
):
|
|
"""Draw a 3D box using distortion-aware projected edge samples."""
|
|
front_edges = {"front_0", "front_1", "front_2", "front_3", "front_x1", "front_x2"}
|
|
back_edges = {"back_0", "back_1", "back_2", "back_3", "back_x1", "back_x2"}
|
|
|
|
for edge_name, points in edge_points_2d.items():
|
|
if np.any(np.isnan(points)):
|
|
continue
|
|
pts = points.astype(np.int32)
|
|
color = color_front if edge_name in front_edges else color_back if edge_name in back_edges else color_side
|
|
cv2.polylines(img, [pts], isClosed=False, color=color, thickness=thickness, lineType=cv2.LINE_AA)
|
|
return img
|
|
|
|
|
|
def plot_box3d_on_img(img, corners_2d, color_front=(0, 0, 255), color_back=(255, 0, 0), color_side=(255, 255, 0), thickness=1):
|
|
"""Draw a 3D wireframe box from projected 2D corners."""
|
|
line_indices = (
|
|
(4, 5), (5, 6), (6, 7), (7, 4),
|
|
(0, 4), (1, 5), (2, 6), (3, 7),
|
|
(0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3),
|
|
)
|
|
front_edges = {(0, 1), (1, 2), (2, 3), (3, 0), (0, 2), (1, 3)}
|
|
back_edges = {(4, 5), (5, 6), (6, 7), (7, 4)}
|
|
|
|
pts = corners_2d.astype(np.int32)
|
|
for i, j in line_indices:
|
|
color = color_front if (i, j) in front_edges else color_back if (i, j) in back_edges else color_side
|
|
cv2.line(img, tuple(pts[i]), tuple(pts[j]), color, thickness, cv2.LINE_AA)
|
|
return img
|
|
|
|
|
|
def back_project_2d_to_3d(uv, depth, calib):
|
|
"""Back-project a pixel point to camera coordinates, removing distortion when needed."""
|
|
if calib is None or depth <= 0:
|
|
return None
|
|
|
|
fx, fy = calib["fx"], calib["fy"]
|
|
cx, cy = calib["cx"], calib["cy"]
|
|
u, v = uv
|
|
xd = (u - cx) / fx
|
|
yd = (v - cy) / fy
|
|
|
|
distort_coeffs = calib.get("distort_coeffs", [])
|
|
if distort_coeffs is not None and len(distort_coeffs) >= 4:
|
|
xn, yn = remove_fisheye_distortion(xd, yd, distort_coeffs)
|
|
else:
|
|
xn, yn = xd, yd
|
|
|
|
return np.array([xn * depth, yn * depth, depth], dtype=np.float64)
|
|
|
|
|
|
def reconstruct_3d_box_from_face(face_uv, face_z, dims, rot_y, face_type, calib):
|
|
"""Reconstruct 3D box corners from a visible face center."""
|
|
if calib is None or face_z <= 0:
|
|
return None
|
|
|
|
center_3d = back_project_2d_to_3d(face_uv, face_z, calib)
|
|
if center_3d is None:
|
|
return None
|
|
|
|
l, h, w = dims
|
|
if any(np.isnan(x) for x in (l, h, w, rot_y)):
|
|
return None
|
|
|
|
return compute_3d_box_corners(center_3d, dims, rot_y, face_type)
|
|
|
|
|
|
def reconstruct_3d_box_from_whole(uv, z3d, dims, rot_y, calib):
|
|
"""Reconstruct 3D box corners from whole-box center."""
|
|
if calib is None or z3d <= 0:
|
|
return None
|
|
|
|
center_3d = back_project_2d_to_3d(uv, z3d, calib)
|
|
if center_3d is None:
|
|
return None
|
|
|
|
l, h, w = dims
|
|
if any(np.isnan(x) for x in (l, h, w, rot_y)):
|
|
return None
|
|
|
|
return compute_3d_box_corners(center_3d, dims, rot_y, face_type=-1)
|
|
|
|
|
|
def get_face_bottom_edge_points(corners_3d, face_type, num_samples=5):
|
|
"""Sample points along the requested visible face bottom edge."""
|
|
if corners_3d is None or face_type not in FACE_BOTTOM_EDGE_CORNERS:
|
|
return None
|
|
start_idx, end_idx = FACE_BOTTOM_EDGE_CORNERS[face_type]
|
|
points_3d = sample_3d_edge(corners_3d[start_idx], corners_3d[end_idx], num_samples=num_samples)
|
|
return points_3d
|
|
|
|
|
|
def project_face_bottom_edge(corners_3d, face_type, calib, num_samples=5):
|
|
"""Project sampled visible-face bottom-edge points to the image plane."""
|
|
points_3d = get_face_bottom_edge_points(corners_3d, face_type, num_samples=num_samples)
|
|
if points_3d is None:
|
|
return None, None
|
|
points_2d = project_3d_to_2d(points_3d, calib)
|
|
if np.any(np.isnan(points_2d)):
|
|
return points_3d, None
|
|
order = np.argsort(points_2d[:, 0], kind="stable")
|
|
return points_3d[order], points_2d[order]
|
|
|
|
|
|
def project_partial_face_bottom_edge(corners_3d, face_type, calib, img_w, img_h, num_samples=5):
|
|
"""Project exactly ``num_samples`` points from the visible sub-segment of a face bottom edge."""
|
|
if corners_3d is None or face_type not in FACE_BOTTOM_EDGE_CORNERS:
|
|
return None, None
|
|
start_idx, end_idx = FACE_BOTTOM_EDGE_CORNERS[face_type]
|
|
return sample_partial_3d_edge(corners_3d[start_idx], corners_3d[end_idx], calib, img_w, img_h, num_samples=num_samples)
|
|
|
|
|
|
def collect_face_bottom_edges(corners_3d, face_types, calib, num_samples=5):
|
|
"""Project sampled bottom-edge points for all requested visible faces."""
|
|
if corners_3d is None:
|
|
return None, None
|
|
|
|
edge_points_3d, edge_points_2d = [], []
|
|
for face_type in face_types:
|
|
points_3d, points_2d = project_face_bottom_edge(corners_3d, face_type, calib, num_samples=num_samples)
|
|
if points_3d is None or points_2d is None:
|
|
continue
|
|
edge_points_3d.append(points_3d.astype(np.float32, copy=False))
|
|
edge_points_2d.append(points_2d.astype(np.float32, copy=False))
|
|
|
|
if not edge_points_2d:
|
|
return None, None
|
|
if len(edge_points_2d) == 1:
|
|
return edge_points_3d[0], edge_points_2d[0]
|
|
return np.stack(edge_points_3d, axis=0), np.stack(edge_points_2d, axis=0)
|
|
|
|
|
|
def _edge_batches_to_list(edge_points):
|
|
"""Normalize edge sample arrays to a list of `(5, D)` arrays."""
|
|
if edge_points is None:
|
|
return []
|
|
arr = np.asarray(edge_points, dtype=np.float32)
|
|
if arr.ndim == 2:
|
|
return [arr]
|
|
return [arr[i] for i in range(arr.shape[0])]
|
|
|
|
|
|
def _stack_edge_batches(edge_batches):
|
|
"""Convert a list of edge sample arrays back to the legacy stacked representation."""
|
|
if not edge_batches:
|
|
return None
|
|
if len(edge_batches) == 1:
|
|
return edge_batches[0]
|
|
return np.stack(edge_batches, axis=0)
|
|
|
|
|
|
def _append_edge_batch(edge_points_3d, edge_points_2d, decoded_edge):
|
|
"""Append one decoded edge sample set to stacked edge arrays."""
|
|
if decoded_edge is None:
|
|
return edge_points_3d, edge_points_2d
|
|
edge3d_list = _edge_batches_to_list(edge_points_3d)
|
|
edge2d_list = _edge_batches_to_list(edge_points_2d)
|
|
edge3d_list.append(np.asarray(decoded_edge["points_3d"], dtype=np.float32))
|
|
edge2d_list.append(np.asarray(decoded_edge["points_2d"], dtype=np.float32))
|
|
return _stack_edge_batches(edge3d_list), _stack_edge_batches(edge2d_list)
|
|
|
|
|
|
def collect_precomputed_edge_points_2d(edge_faces_points_2d, edge_faces_valid=None, visible_face_types=()):
|
|
"""Convert one object's precomputed face-edge tensors into drawable polyline batches."""
|
|
if edge_faces_points_2d is None:
|
|
return None
|
|
|
|
points = np.asarray(edge_faces_points_2d, dtype=np.float32)
|
|
if points.ndim != 3 or points.shape[0] == 0:
|
|
return None
|
|
|
|
if edge_faces_valid is None:
|
|
valid = np.ones(points.shape[0], dtype=bool)
|
|
else:
|
|
valid = np.asarray(edge_faces_valid, dtype=bool).reshape(-1)
|
|
if valid.shape[0] < points.shape[0]:
|
|
valid = np.pad(valid, (0, points.shape[0] - valid.shape[0]), constant_values=False)
|
|
else:
|
|
valid = valid[: points.shape[0]]
|
|
|
|
face_order = []
|
|
for face_type in visible_face_types or ():
|
|
face_type = int(face_type)
|
|
if 0 <= face_type < points.shape[0] and valid[face_type] and face_type not in face_order:
|
|
face_order.append(face_type)
|
|
for face_type in np.flatnonzero(valid):
|
|
face_type = int(face_type)
|
|
if face_type not in face_order:
|
|
face_order.append(face_type)
|
|
|
|
if not face_order:
|
|
return None
|
|
return _stack_edge_batches([points[face_type].astype(np.float32, copy=False) for face_type in face_order])
|
|
|
|
|
|
def decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride):
|
|
"""Decode one face block of auxiliary edge predictions into pixel UV and depth samples."""
|
|
if pred_edge_60 is None or face_type not in range(4):
|
|
return None
|
|
off = FACE_EDGE_OFFSETS_60[face_type]
|
|
face = np.asarray(pred_edge_60[off : off + 15], dtype=np.float32).reshape(5, 3)
|
|
points_2d = np.empty((5, 2), dtype=np.float32)
|
|
points_2d[:, 0] = (anchor_xy[0] + face[:, 0]) * stride
|
|
points_2d[:, 1] = (anchor_xy[1] + face[:, 1]) * stride
|
|
order = np.argsort(points_2d[:, 0], kind="stable")
|
|
return {
|
|
"points_2d": points_2d[order],
|
|
"depths": face[order, 2].astype(np.float32),
|
|
"face_type": face_type,
|
|
}
|
|
|
|
|
|
def _is_gt_face_cut(target_42, face_type):
|
|
"""Return whether a GT face was invalidated by crop handling."""
|
|
if face_type not in range(4):
|
|
return False
|
|
off = FACE_OFFSETS_42[face_type]
|
|
face = target_42[off : off + 8]
|
|
return np.all(face[:6] == -1) and face[7] <= 0
|
|
|
|
|
|
def get_gt_cut_state(target_42):
|
|
"""Return cut-object state from the GT face invalidation pattern."""
|
|
if target_42 is None or len(target_42) < 42:
|
|
return CUT_STATE_NORMAL
|
|
f_cut = _is_gt_face_cut(target_42, 0)
|
|
r_cut = _is_gt_face_cut(target_42, 1)
|
|
l_cut = _is_gt_face_cut(target_42, 2)
|
|
ri_cut = _is_gt_face_cut(target_42, 3)
|
|
if r_cut and l_cut and ri_cut:
|
|
return CUT_STATE_IN
|
|
if f_cut and l_cut and ri_cut:
|
|
return CUT_STATE_OUT
|
|
return CUT_STATE_NORMAL
|
|
|
|
|
|
def get_gt_cut_side(target_42, img_w, img_h, tol=1e-4, score_thr=FACE_VISIBILITY_SCORE_THRESH):
|
|
"""Infer whether a cut GT object is clipped on the left or right image border."""
|
|
visible_faces = []
|
|
for face_type, off in enumerate(FACE_OFFSETS_42):
|
|
face = target_42[off : off + 8]
|
|
if face[7] != 1 or np.isnan(face[6]) or face[6] < score_thr:
|
|
continue
|
|
if np.isnan(face[4]) or np.isnan(face[5]) or face[4] < 0 or face[5] < 0:
|
|
continue
|
|
visible_faces.append((face_type, face[4] * img_w, face[5] * img_h, float(face[6])))
|
|
|
|
if not visible_faces:
|
|
return None
|
|
|
|
_, best_u, _, _ = max(visible_faces, key=lambda item: item[3])
|
|
edge_u = best_u
|
|
side_faces = []
|
|
for face_type in (2, 3):
|
|
off = FACE_OFFSETS_42[face_type]
|
|
face = target_42[off : off + 8]
|
|
if np.isnan(face[4]) or face[4] < 0:
|
|
continue
|
|
side_faces.append((face_type, face[4] * img_w))
|
|
if side_faces:
|
|
edge_u = side_faces[0][1] if len(side_faces) == 1 else float(np.mean([item[1] for item in side_faces]))
|
|
|
|
if edge_u <= tol:
|
|
return "left"
|
|
if edge_u >= img_w - 1 - tol:
|
|
return "right"
|
|
return None
|
|
|
|
|
|
def get_cut_side_from_bbox_xyxy(bbox_xyxy, img_w, tol=1.0):
|
|
"""Infer whether a clipped box touches the left or right image border."""
|
|
if bbox_xyxy is None:
|
|
return None
|
|
x1, _, x2, _ = np.asarray(bbox_xyxy, dtype=np.float64)
|
|
touch_left = x1 <= tol and x2 > tol
|
|
touch_right = x2 >= img_w - 1 - tol and x1 < img_w - 1 - tol
|
|
if touch_left == touch_right:
|
|
return None
|
|
return "left" if touch_left else "right"
|
|
|
|
|
|
def _get_camera_facing_side_face_from_corners(corners_3d):
|
|
"""Return the side face whose outward normal points most toward the camera."""
|
|
if corners_3d is None:
|
|
return None
|
|
|
|
corners = np.asarray(corners_3d, dtype=np.float64)
|
|
if corners.shape != (8, 3) or not np.isfinite(corners).all():
|
|
return None
|
|
|
|
box_center = corners.mean(axis=0)
|
|
best_face_type, best_score = None, -np.inf
|
|
for face_type in (2, 3):
|
|
face_points = corners[list(FACE_CORNERS[face_type])]
|
|
face_center = face_points.mean(axis=0)
|
|
view_dir = -face_center
|
|
view_norm = float(np.linalg.norm(view_dir))
|
|
if view_norm < 1e-8:
|
|
continue
|
|
|
|
edge_a = face_points[1] - face_points[0]
|
|
edge_b = face_points[2] - face_points[1]
|
|
normal = np.cross(edge_a, edge_b)
|
|
normal_norm = float(np.linalg.norm(normal))
|
|
if normal_norm < 1e-8:
|
|
continue
|
|
|
|
if np.dot(normal, face_center - box_center) < 0:
|
|
normal = -normal
|
|
score = float(np.dot(normal / normal_norm, view_dir / view_norm))
|
|
if score > best_score:
|
|
best_face_type, best_score = face_type, score
|
|
|
|
return best_face_type
|
|
|
|
|
|
def get_cut_object_side_face(face_type_or_state, cut_side=None, corners_3d=None):
|
|
"""Resolve the partially visible side face for a cut object.
|
|
|
|
Prefer reconstructed box geometry when available so the near side can change with yaw.
|
|
Fall back to the historical image-border heuristic when only the crop side is known.
|
|
"""
|
|
if face_type_or_state not in {CUT_STATE_IN, CUT_STATE_OUT}:
|
|
return None
|
|
side_face_type = _get_camera_facing_side_face_from_corners(corners_3d)
|
|
if side_face_type in (2, 3):
|
|
return side_face_type
|
|
if cut_side not in {"left", "right"}:
|
|
return None
|
|
return 3 if cut_side == "left" else 2
|
|
|
|
|
|
def get_cut_object_side_face_from_yaw(cut_state, yaw):
|
|
"""Infer the partially visible side face from cut state and whole-box yaw."""
|
|
if cut_state == CUT_STATE_IN:
|
|
return 3 if np.sin(float(yaw)) > 0 else 2
|
|
if cut_state == CUT_STATE_OUT:
|
|
return 2 if np.sin(float(yaw)) < 0 else 3
|
|
return None
|
|
|
|
|
|
def get_pred_cut_state(pred_41):
|
|
"""Return predicted cut state from the cut classification logits."""
|
|
cut_logits = np.asarray(pred_41[38:41], dtype=np.float32)
|
|
return int(np.argmax(cut_logits))
|
|
|
|
|
|
def get_pred_cut_primary_face(cut_state):
|
|
"""Return the mandated longitudinal visible face for a cut prediction."""
|
|
if cut_state == CUT_STATE_IN:
|
|
return 0
|
|
if cut_state == CUT_STATE_OUT:
|
|
return 1
|
|
return None
|
|
|
|
|
|
def _reconstruct_pred_corners_for_cut_edge(pred_41, anchor_xy, stride, calib, cut_state=None):
|
|
"""Reconstruct predicted box corners for cut-edge side-face selection."""
|
|
if calib is None:
|
|
return None
|
|
|
|
cut_state = get_pred_cut_state(pred_41) if cut_state is None else int(cut_state)
|
|
dims = np.asarray(pred_41[27:30], dtype=np.float32)
|
|
rot_y = _decode_yaw_from_prediction(pred_41)
|
|
if np.any(np.isnan(dims)) or not np.isfinite(rot_y):
|
|
return None
|
|
|
|
primary_face = get_pred_cut_primary_face(cut_state)
|
|
if primary_face is not None:
|
|
off = FACE_OFFSETS_41[primary_face]
|
|
z_face = float(pred_41[off])
|
|
uv_face_offset = np.asarray(pred_41[off + 1 : off + 3], dtype=np.float32)
|
|
if np.isfinite(z_face) and z_face > 0 and np.isfinite(uv_face_offset).all():
|
|
u_face = float((anchor_xy[0] + uv_face_offset[0]) * stride)
|
|
v_face = float((anchor_xy[1] + uv_face_offset[1]) * stride)
|
|
corners = reconstruct_3d_box_from_face((u_face, v_face), z_face, dims, rot_y, primary_face, calib)
|
|
if corners is not None:
|
|
return corners
|
|
|
|
z_whole = float(pred_41[24])
|
|
uv_whole_offset = np.asarray(pred_41[25:27], dtype=np.float32)
|
|
if not np.isfinite(z_whole) or z_whole <= 0 or not np.isfinite(uv_whole_offset).all():
|
|
return None
|
|
|
|
u_whole = float((anchor_xy[0] + uv_whole_offset[0]) * stride)
|
|
v_whole = float((anchor_xy[1] + uv_whole_offset[1]) * stride)
|
|
return reconstruct_3d_box_from_whole((u_whole, v_whole), z_whole, dims, rot_y, calib)
|
|
|
|
|
|
def _resolve_pred_cut_state_for_decode(pred_41, bbox_xyxy=None, img_w=None):
|
|
"""Resolve predicted cut state only when the box is actually clipped at the image border."""
|
|
cut_state = get_pred_cut_state(pred_41)
|
|
if cut_state == CUT_STATE_NORMAL:
|
|
return cut_state, None
|
|
|
|
cut_side = None
|
|
if bbox_xyxy is not None and img_w is not None:
|
|
cut_side = get_cut_side_from_bbox_xyxy(bbox_xyxy, img_w)
|
|
if cut_side not in {"left", "right"}:
|
|
return CUT_STATE_NORMAL, None
|
|
return cut_state, cut_side
|
|
|
|
|
|
def _select_best_pred_face_score(pred_41):
|
|
"""Return the highest-scoring predicted face without applying a visibility threshold."""
|
|
best_face_type, best_score = None, float("-inf")
|
|
for face_type, off in enumerate(FACE_OFFSETS_41):
|
|
score = float(pred_41[off + 5])
|
|
if not np.isfinite(score):
|
|
continue
|
|
if score > best_score:
|
|
best_face_type = int(face_type)
|
|
best_score = float(score)
|
|
if best_face_type is None:
|
|
return None
|
|
return best_face_type, best_score
|
|
|
|
|
|
def select_pred_visible_faces_for_decode(pred_41, score_thr=FACE_VISIBILITY_SCORE_THRESH, bbox_xyxy=None, img_w=None):
|
|
"""Return visible faces used for decoding and drawing.
|
|
|
|
For cut objects we enforce the intended semantics:
|
|
- cut_in -> front face only
|
|
- cut_out -> rear face only
|
|
For normal objects we keep the thresholded visible-face list, but always retain the top1 face
|
|
even if its score is below the threshold. The partial side edge is handled separately by the cut-edge decoder.
|
|
"""
|
|
cut_state, _ = _resolve_pred_cut_state_for_decode(pred_41, bbox_xyxy=bbox_xyxy, img_w=img_w)
|
|
primary_face = get_pred_cut_primary_face(cut_state)
|
|
if primary_face is not None:
|
|
off = FACE_OFFSETS_41[primary_face]
|
|
return [(primary_face, float(pred_41[off + 5]))]
|
|
visible_faces = list(select_pred_visible_faces(pred_41, score_thr=score_thr))
|
|
best_face = _select_best_pred_face_score(pred_41)
|
|
if best_face is None:
|
|
return visible_faces
|
|
best_face_type, best_score = best_face
|
|
if all(int(face_type) != int(best_face_type) for face_type, _ in visible_faces):
|
|
visible_faces.append((int(best_face_type), float(best_score)))
|
|
return visible_faces
|
|
|
|
|
|
def decode_cut_partial_side_edge_from_prediction(
|
|
pred_41, pred_edge_60, anchor_xy, stride, img_w, cut_side=None, calib=None, corners_3d=None
|
|
):
|
|
"""Decode the partially visible side bottom edge for a cut prediction."""
|
|
if pred_edge_60 is None:
|
|
return None
|
|
cut_state = get_pred_cut_state(pred_41)
|
|
if cut_state == CUT_STATE_NORMAL:
|
|
return None
|
|
|
|
if corners_3d is None and calib is not None:
|
|
corners_3d = _reconstruct_pred_corners_for_cut_edge(pred_41, anchor_xy, stride, calib, cut_state=cut_state)
|
|
|
|
side_face_type = get_cut_object_side_face(cut_state, cut_side, corners_3d=corners_3d)
|
|
if side_face_type is None:
|
|
return None
|
|
|
|
decoded = decode_visible_face_edge_from_prediction(pred_edge_60, side_face_type, anchor_xy, stride)
|
|
if decoded is None:
|
|
return None
|
|
decoded["cut_state"] = cut_state
|
|
decoded["cut_side"] = cut_side
|
|
decoded["is_partial"] = True
|
|
return decoded
|
|
|
|
|
|
def _resolve_gt_cut_partial_side_face(target_42, img_w, img_h, bbox_xyxy=None, score_thr=FACE_VISIBILITY_SCORE_THRESH):
|
|
"""Resolve cut-object metadata needed to decode the partial side edge."""
|
|
cut_state = get_gt_cut_state(target_42)
|
|
if cut_state == CUT_STATE_NORMAL:
|
|
return cut_state, None
|
|
|
|
cut_side = get_cut_side_from_bbox_xyxy(bbox_xyxy, img_w)
|
|
if cut_side is None:
|
|
cut_side = get_gt_cut_side(target_42, img_w, img_h, score_thr=score_thr)
|
|
return cut_state, cut_side
|
|
|
|
|
|
def _reconstruct_gt_corners_for_cut_edge(
|
|
target_42, cls_id, calib, img_w, img_h, face_3d_classes, complete_3d_classes, score_thr=FACE_VISIBILITY_SCORE_THRESH
|
|
):
|
|
"""Reconstruct GT box corners using the same geometry source as box visualization when possible."""
|
|
if calib is None:
|
|
return None
|
|
|
|
depth_scale = calib.get("depth_scale", 1.0)
|
|
dims = target_42[3:6].astype(np.float32)
|
|
rot_y = float(target_42[6])
|
|
if np.any(np.isnan(dims)) or not np.isfinite(rot_y):
|
|
return None
|
|
|
|
if cls_id in face_3d_classes:
|
|
visible_faces = select_gt_visible_faces(target_42, score_thr=score_thr)
|
|
if visible_faces:
|
|
best_type, best_face = max(visible_faces, key=lambda item: float(item[1][6]))
|
|
u_face = float(best_face[4] * img_w)
|
|
v_face = float(best_face[5] * img_h)
|
|
z_face = float(best_face[2] * depth_scale)
|
|
if np.isfinite(u_face) and np.isfinite(v_face) and np.isfinite(z_face) and z_face > 0:
|
|
corners = reconstruct_3d_box_from_face((u_face, v_face), z_face, dims, rot_y, best_type, calib)
|
|
if corners is not None:
|
|
return corners
|
|
|
|
if cls_id not in face_3d_classes and cls_id not in complete_3d_classes:
|
|
return None
|
|
|
|
z3d = float(target_42[2])
|
|
whole_uv = target_42[7:9]
|
|
if np.any(np.isnan(whole_uv)) or not np.isfinite(z3d) or z3d <= 0:
|
|
return None
|
|
return reconstruct_3d_box_from_whole(
|
|
(float(whole_uv[0] * img_w), float(whole_uv[1] * img_h)), float(z3d * depth_scale), dims, rot_y, calib
|
|
)
|
|
|
|
|
|
def decode_cut_partial_side_edge_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
bbox_xyxy=None,
|
|
corners_3d=None,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
):
|
|
"""Decode the partially visible side bottom edge for a cut GT object."""
|
|
if cls_id not in face_3d_classes:
|
|
return None
|
|
cut_state, cut_side = _resolve_gt_cut_partial_side_face(target_42, img_w, img_h, bbox_xyxy=bbox_xyxy, score_thr=score_thr)
|
|
if cut_side not in {"left", "right"}:
|
|
return None
|
|
|
|
corners = corners_3d
|
|
if corners is None:
|
|
corners = _reconstruct_gt_corners_for_cut_edge(
|
|
target_42, cls_id, calib, img_w, img_h, face_3d_classes, complete_3d_classes, score_thr=score_thr
|
|
)
|
|
if corners is None:
|
|
return None
|
|
|
|
side_face_type = get_cut_object_side_face(cut_state, cut_side, corners_3d=corners)
|
|
if side_face_type is None or not _is_gt_face_cut(target_42, side_face_type):
|
|
return None
|
|
|
|
points_3d, points_2d = project_partial_face_bottom_edge(corners, side_face_type, calib, img_w, img_h, num_samples=5)
|
|
if points_3d is None or points_2d is None:
|
|
return None
|
|
|
|
return {
|
|
"points_3d": points_3d.astype(np.float32),
|
|
"points_2d": points_2d.astype(np.float32),
|
|
"depths": points_3d[:, 2].astype(np.float32),
|
|
"face_type": side_face_type,
|
|
"cut_state": cut_state,
|
|
"cut_side": cut_side,
|
|
"is_partial": True,
|
|
}
|
|
|
|
|
|
def decode_visible_face_edge_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
face_type=None,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
bbox_xyxy=None,
|
|
):
|
|
"""Decode GT visible-face bottom-edge samples from the current camera geometry."""
|
|
if cls_id not in face_3d_classes:
|
|
return None
|
|
|
|
partial_edge = decode_cut_partial_side_edge_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
bbox_xyxy=bbox_xyxy,
|
|
score_thr=score_thr,
|
|
)
|
|
if partial_edge is not None and (face_type is None or face_type == partial_edge["face_type"]):
|
|
return partial_edge
|
|
|
|
target_decoded = decode_3d_target(
|
|
target_42, cls_id, calib, img_w, img_h, face_3d_classes, complete_3d_classes, score_thr=score_thr
|
|
)
|
|
if target_decoded is None or target_decoded.get("corners_3d") is None:
|
|
return None
|
|
|
|
visible_face_types = tuple(int(face_type) for face_type, _ in select_gt_visible_faces(target_42, score_thr=score_thr))
|
|
selected_face = target_decoded.get("visible_face_type") if face_type is None else face_type
|
|
if selected_face not in range(4):
|
|
return None
|
|
if face_type is not None and selected_face not in visible_face_types:
|
|
return None
|
|
|
|
points_3d, points_2d = project_face_bottom_edge(target_decoded["corners_3d"], selected_face, calib, num_samples=5)
|
|
if points_3d is None or points_2d is None:
|
|
return None
|
|
|
|
return {
|
|
"points_3d": points_3d.astype(np.float32),
|
|
"points_2d": points_2d.astype(np.float32),
|
|
"depths": points_3d[:, 2].astype(np.float32),
|
|
"face_type": selected_face,
|
|
}
|
|
|
|
|
|
def _decoded_edge_to_points_3d(decoded_edge, calib):
|
|
"""Back-project one decoded edge sample set into 3D camera coordinates."""
|
|
if decoded_edge is None:
|
|
return None
|
|
points_3d = []
|
|
for pt, depth in zip(decoded_edge["points_2d"], decoded_edge["depths"]):
|
|
point_3d = back_project_2d_to_3d(tuple(pt), float(depth), calib)
|
|
if point_3d is None:
|
|
return None
|
|
points_3d.append(point_3d)
|
|
return np.asarray(points_3d, dtype=np.float32)
|
|
|
|
|
|
def _decoded_edge_points_are_drawable(points_2d, img_w=None, img_h=None, min_endpoint_dist_px=2.0):
|
|
"""Return whether decoded edge points correspond to a visible, drawable in-image segment."""
|
|
if points_2d is None:
|
|
return False
|
|
|
|
pts = np.asarray(points_2d, dtype=np.float32)
|
|
if pts.ndim != 2 or pts.shape[0] < 2 or pts.shape[1] != 2 or not np.isfinite(pts).all():
|
|
return False
|
|
|
|
if img_w is not None and img_h is not None:
|
|
if not np.all([_point_inside_image(point_2d, img_w, img_h) for point_2d in pts]):
|
|
return False
|
|
|
|
endpoint_dist = float(np.linalg.norm(pts[-1] - pts[0]))
|
|
return endpoint_dist >= float(min_endpoint_dist_px)
|
|
|
|
|
|
def _edge_segment_length_3d(points_3d):
|
|
"""Return the visible BEV length of one decoded bottom-edge segment.
|
|
|
|
Bottom-edge size recovery should ignore vertical noise in the decoded points and only measure the
|
|
ground-plane extent (x/z).
|
|
"""
|
|
if points_3d is None:
|
|
return None
|
|
pts = np.asarray(points_3d, dtype=np.float32)
|
|
if pts.ndim != 2 or pts.shape[0] < 2 or pts.shape[1] != 3 or not np.isfinite(pts).all():
|
|
return None
|
|
return float(np.linalg.norm(pts[-1, [0, 2]] - pts[0, [0, 2]]))
|
|
|
|
|
|
def _prediction_lateral_distance_m_from_center(center):
|
|
"""Return absolute lateral distance from any predicted metric-space anchor center."""
|
|
if center is None:
|
|
return None
|
|
center = np.asarray(center, dtype=np.float32).reshape(-1)
|
|
if center.shape[0] < 1 or not np.isfinite(center[0]):
|
|
return None
|
|
return float(abs(center[0]))
|
|
|
|
|
|
def edge_points_to_yaw(points_3d, face_type):
|
|
"""Infer whole-box yaw from visible-face bottom-edge 3D samples."""
|
|
if points_3d is None or len(points_3d) < 2 or face_type not in range(4):
|
|
return float("nan")
|
|
|
|
pts = np.asarray(points_3d, dtype=np.float64)
|
|
valid = np.isfinite(pts).all(axis=1)
|
|
pts = pts[valid]
|
|
if len(pts) < 2:
|
|
return float("nan")
|
|
|
|
tangent = np.array([pts[-1, 0] - pts[0, 0], pts[-1, 2] - pts[0, 2]], dtype=np.float64)
|
|
tangent_norm = float(np.linalg.norm(tangent))
|
|
if tangent_norm < 1e-8:
|
|
return float("nan")
|
|
tangent /= tangent_norm
|
|
|
|
midpoint = np.mean(pts[:, [0, 2]], axis=0)
|
|
|
|
def _rot_cw(v):
|
|
return np.array([v[1], -v[0]], dtype=np.float64)
|
|
|
|
def _rot_ccw(v):
|
|
return np.array([-v[1], v[0]], dtype=np.float64)
|
|
|
|
if face_type in (0, 1):
|
|
forward_candidates = (_rot_cw(tangent), -_rot_cw(tangent))
|
|
else:
|
|
forward_candidates = (tangent, -tangent)
|
|
|
|
def _face_normal(forward):
|
|
if face_type == 0:
|
|
return forward
|
|
if face_type == 1:
|
|
return -forward
|
|
if face_type == 2:
|
|
return _rot_ccw(forward)
|
|
return -_rot_ccw(forward)
|
|
|
|
# The edge samples arrive sorted left-to-right in image space, so the tangent has an
|
|
# unavoidable 180-degree ambiguity in world space. Resolve it by selecting the forward
|
|
# direction whose face normal points most toward the camera for the requested visible face.
|
|
best_forward = min(forward_candidates, key=lambda forward: float(np.dot(_face_normal(forward), midpoint)))
|
|
yaw = np.arctan2(-best_forward[1], best_forward[0])
|
|
|
|
return float((yaw + np.pi) % (2 * np.pi) - np.pi)
|
|
|
|
|
|
def visible_face_edges_to_yaw(face_edges_3d, face_scores=None):
|
|
"""Estimate whole-box yaw from one or more visible-face bottom edges."""
|
|
if face_edges_3d is None:
|
|
return float("nan")
|
|
|
|
items = list(face_edges_3d.items() if hasattr(face_edges_3d, "items") else face_edges_3d)
|
|
weighted_candidates = []
|
|
for face_type, points_3d in items:
|
|
weight = 1.0
|
|
if face_scores is not None:
|
|
if hasattr(face_scores, "get"):
|
|
weight = face_scores.get(face_type, 1.0)
|
|
else:
|
|
weight = face_scores[face_type]
|
|
if not np.isfinite(weight) or weight <= 0:
|
|
weight = 1.0
|
|
weighted_candidates.append(
|
|
{
|
|
"face_type": int(face_type),
|
|
"points_3d": np.asarray(points_3d, dtype=np.float32),
|
|
"score": float(weight),
|
|
}
|
|
)
|
|
|
|
longitudinal_candidates = [candidate for candidate in weighted_candidates if candidate["face_type"] in (0, 1)]
|
|
side_candidates = [candidate for candidate in weighted_candidates if candidate["face_type"] in (2, 3)]
|
|
if longitudinal_candidates and side_candidates:
|
|
longitudinal_candidate = max(longitudinal_candidates, key=lambda item: item["score"])
|
|
side_candidate = max(side_candidates, key=lambda item: item["score"])
|
|
yaw = _estimate_two_edge_yaw_from_candidates(longitudinal_candidate, side_candidate)
|
|
if np.isfinite(yaw):
|
|
return yaw
|
|
|
|
yaws, weights = [], []
|
|
for face_type, points_3d in items:
|
|
yaw = edge_points_to_yaw(points_3d, face_type)
|
|
if not np.isfinite(yaw):
|
|
continue
|
|
|
|
weight = 1.0
|
|
if face_scores is not None:
|
|
if hasattr(face_scores, "get"):
|
|
weight = face_scores.get(face_type, 1.0)
|
|
else:
|
|
weight = face_scores[face_type]
|
|
if not np.isfinite(weight) or weight <= 0:
|
|
weight = 1.0
|
|
|
|
yaws.append(float(yaw))
|
|
weights.append(float(weight))
|
|
|
|
if not yaws:
|
|
return float("nan")
|
|
if len(yaws) == 1:
|
|
return float(yaws[0])
|
|
|
|
forward = np.stack([np.cos(yaws), -np.sin(yaws)], axis=1)
|
|
mean_forward = np.sum(forward * np.asarray(weights, dtype=np.float64)[:, None], axis=0)
|
|
norm = float(np.linalg.norm(mean_forward))
|
|
if norm < 1e-8:
|
|
return float(yaws[int(np.argmax(weights))])
|
|
|
|
mean_forward /= norm
|
|
yaw = np.arctan2(-mean_forward[1], mean_forward[0])
|
|
return float((yaw + np.pi) % (2 * np.pi) - np.pi)
|
|
|
|
|
|
def _bev_edge_points(points_3d):
|
|
"""Return finite (x, z) BEV points for one decoded edge."""
|
|
pts = np.asarray(points_3d, dtype=np.float64)
|
|
if pts.ndim != 2 or pts.shape[0] < 2 or pts.shape[1] != 3:
|
|
return None
|
|
valid = np.isfinite(pts).all(axis=1)
|
|
pts = pts[valid]
|
|
if len(pts) < 2:
|
|
return None
|
|
return pts[:, [0, 2]]
|
|
|
|
|
|
def _fit_bev_edge_axis(points_3d):
|
|
"""Fit one dominant BEV line direction to decoded edge points."""
|
|
bev_points = _bev_edge_points(points_3d)
|
|
if bev_points is None:
|
|
return None, None
|
|
midpoint = np.mean(bev_points, axis=0)
|
|
centered = bev_points - midpoint
|
|
try:
|
|
_, _, vh = np.linalg.svd(centered, full_matrices=False)
|
|
except np.linalg.LinAlgError:
|
|
return None, None
|
|
axis = np.asarray(vh[0], dtype=np.float64)
|
|
norm = float(np.linalg.norm(axis))
|
|
if norm < 1e-8:
|
|
return None, None
|
|
return axis / norm, midpoint
|
|
|
|
|
|
def _estimate_two_edge_yaw_from_candidates(
|
|
longitudinal_candidate,
|
|
side_candidate,
|
|
reference_yaw=None,
|
|
):
|
|
"""Estimate yaw from two edges in BEV while keeping the box as parallel as possible to the side edge."""
|
|
if longitudinal_candidate is None or side_candidate is None:
|
|
return float("nan")
|
|
if int(longitudinal_candidate["face_type"]) not in (0, 1) or int(side_candidate["face_type"]) not in (2, 3):
|
|
return float("nan")
|
|
|
|
side_axis, side_midpoint = _fit_bev_edge_axis(side_candidate["points_3d"])
|
|
long_axis, long_midpoint = _fit_bev_edge_axis(longitudinal_candidate["points_3d"])
|
|
if side_axis is None or long_midpoint is None or side_midpoint is None:
|
|
return float("nan")
|
|
|
|
long_face_type = int(longitudinal_candidate["face_type"])
|
|
side_face_type = int(side_candidate["face_type"])
|
|
|
|
def _rot_ccw(v):
|
|
return np.array([-v[1], v[0]], dtype=np.float64)
|
|
|
|
def _face_normal(forward, face_type):
|
|
if face_type == 0:
|
|
return forward
|
|
if face_type == 1:
|
|
return -forward
|
|
if face_type == 2:
|
|
return _rot_ccw(forward)
|
|
return -_rot_ccw(forward)
|
|
|
|
forward_candidates = (side_axis, -side_axis)
|
|
best_forward = min(
|
|
forward_candidates,
|
|
key=lambda forward: float(np.dot(_face_normal(forward, long_face_type), long_midpoint))
|
|
+ float(np.dot(_face_normal(forward, side_face_type), side_midpoint)),
|
|
)
|
|
|
|
if reference_yaw is not None and np.isfinite(reference_yaw):
|
|
ref_forward = np.array([np.cos(float(reference_yaw)), -np.sin(float(reference_yaw))], dtype=np.float64)
|
|
if float(np.dot(best_forward, ref_forward)) < 0.0:
|
|
best_forward = -best_forward
|
|
|
|
yaw = np.arctan2(-best_forward[1], best_forward[0])
|
|
return float((yaw + np.pi) % (2 * np.pi) - np.pi)
|
|
|
|
|
|
def _resolve_two_face_candidate_roles(candidates, yaw):
|
|
"""Assign one decoded edge to the longitudinal face and the other to the side face from geometry."""
|
|
if candidates is None or len(candidates) < 2 or not np.isfinite(float(yaw)):
|
|
return None
|
|
|
|
forward_bev = np.array([np.cos(float(yaw)), -np.sin(float(yaw))], dtype=np.float64)
|
|
right_bev = np.array([np.sin(float(yaw)), np.cos(float(yaw))], dtype=np.float64)
|
|
|
|
role_candidates = []
|
|
for index, candidate in enumerate(candidates[:2]):
|
|
axis, midpoint = _fit_bev_edge_axis(candidate["points_3d"])
|
|
if axis is None or midpoint is None:
|
|
return None
|
|
role_candidates.append(
|
|
{
|
|
"index": int(index),
|
|
"candidate": candidate,
|
|
"axis": axis,
|
|
"midpoint": midpoint,
|
|
"forward_align": abs(float(np.dot(axis, forward_bev))),
|
|
"right_align": abs(float(np.dot(axis, right_bev))),
|
|
}
|
|
)
|
|
|
|
def _role_label_penalty(info, role):
|
|
face_type = int(info["candidate"].get("face_type", -1))
|
|
if role == "longitudinal":
|
|
return 0 if face_type in (0, 1) else 1
|
|
return 0 if face_type in (2, 3) else 1
|
|
|
|
assignments = ((0, 1), (1, 0))
|
|
best_assignment = min(
|
|
assignments,
|
|
key=lambda assignment: (
|
|
(1.0 - role_candidates[assignment[0]]["right_align"]) + (1.0 - role_candidates[assignment[1]]["forward_align"]),
|
|
_role_label_penalty(role_candidates[assignment[0]], "longitudinal")
|
|
+ _role_label_penalty(role_candidates[assignment[1]], "side"),
|
|
-(role_candidates[assignment[0]]["right_align"] + role_candidates[assignment[1]]["forward_align"]),
|
|
),
|
|
)
|
|
longitudinal_info = role_candidates[best_assignment[0]]
|
|
side_info = role_candidates[best_assignment[1]]
|
|
return {
|
|
"forward_bev": forward_bev,
|
|
"right_bev": right_bev,
|
|
"longitudinal": longitudinal_info,
|
|
"side": side_info,
|
|
}
|
|
|
|
|
|
def _resolve_two_face_center_from_geometry(longitudinal_info, side_info, length_m, width_m):
|
|
"""Recover the two-face box center from the pair of perpendicular visible edges."""
|
|
if longitudinal_info is None or side_info is None:
|
|
return None
|
|
|
|
forward_bev = np.asarray(longitudinal_info["forward_bev"], dtype=np.float64)
|
|
right_bev = np.asarray(longitudinal_info["right_bev"], dtype=np.float64)
|
|
long_mid = np.asarray(longitudinal_info["midpoint"], dtype=np.float64)
|
|
side_mid = np.asarray(side_info["midpoint"], dtype=np.float64)
|
|
if not np.isfinite(long_mid).all() or not np.isfinite(side_mid).all():
|
|
return None
|
|
|
|
raw_longitudinal_face_type = int(longitudinal_info["candidate"].get("face_type", -1))
|
|
if raw_longitudinal_face_type == 0:
|
|
longitudinal_options = ((1.0, 0),)
|
|
elif raw_longitudinal_face_type == 1:
|
|
longitudinal_options = ((-1.0, 1),)
|
|
else:
|
|
longitudinal_options = ((1.0, 0), (-1.0, 1))
|
|
|
|
best = None
|
|
for longitudinal_sign, longitudinal_face_type in longitudinal_options:
|
|
center_from_longitudinal = long_mid - longitudinal_sign * forward_bev * (float(length_m) * 0.5)
|
|
for side_sign, side_face_type in ((1.0, 2), (-1.0, 3)):
|
|
center_from_side = side_mid - side_sign * right_bev * (float(width_m) * 0.5)
|
|
disagreement = float(np.linalg.norm(center_from_longitudinal - center_from_side))
|
|
if best is None or disagreement < best["disagreement"]:
|
|
best = {
|
|
"center_from_longitudinal": center_from_longitudinal,
|
|
"center_from_side": center_from_side,
|
|
"longitudinal_face_type": int(longitudinal_face_type),
|
|
"side_face_type": int(side_face_type),
|
|
"disagreement": disagreement,
|
|
}
|
|
|
|
if best is None:
|
|
return None
|
|
|
|
longitudinal_coord = float(np.dot(best["center_from_longitudinal"], forward_bev))
|
|
lateral_coord = float(np.dot(best["center_from_side"], right_bev))
|
|
center_bev = longitudinal_coord * forward_bev + lateral_coord * right_bev
|
|
return {
|
|
"center_bev": center_bev,
|
|
"longitudinal_face_type": int(best["longitudinal_face_type"]),
|
|
"side_face_type": int(best["side_face_type"]),
|
|
"center_from_longitudinal": best["center_from_longitudinal"],
|
|
"center_from_side": best["center_from_side"],
|
|
}
|
|
|
|
|
|
def _estimate_single_edge_yaw_with_cut_primary_face(candidate, cut_state, reference_yaw=None):
|
|
"""Resolve single-edge yaw with cut-state longitudinal semantics when available."""
|
|
if candidate is None or cut_state not in (CUT_STATE_IN, CUT_STATE_OUT):
|
|
return float("nan")
|
|
|
|
face_type = int(candidate["face_type"])
|
|
if face_type in (0, 1):
|
|
yaw = edge_points_to_yaw(candidate["points_3d"], face_type)
|
|
if reference_yaw is not None and np.isfinite(reference_yaw):
|
|
return _align_yaw_to_reference_pi_periodic(yaw, reference_yaw)
|
|
primary_face = get_pred_cut_primary_face(cut_state)
|
|
if primary_face in (0, 1) and int(primary_face) != face_type:
|
|
return float((float(yaw) + 2 * np.pi) % (2 * np.pi) - np.pi)
|
|
return float(yaw)
|
|
if face_type not in (2, 3):
|
|
return float("nan")
|
|
|
|
axis, _ = _fit_bev_edge_axis(candidate["points_3d"])
|
|
midpoint = _bev_edge_points(candidate["points_3d"])
|
|
if axis is None or midpoint is None:
|
|
return float("nan")
|
|
midpoint = np.mean(midpoint, axis=0)
|
|
|
|
yaw_candidates = [float((np.arctan2(-forward[1], forward[0]) + np.pi) % (2 * np.pi) - np.pi) for forward in (axis, -axis)]
|
|
primary_face = get_pred_cut_primary_face(cut_state)
|
|
if primary_face in (0, 1):
|
|
matched = []
|
|
for yaw in yaw_candidates:
|
|
forward = np.array([np.cos(float(yaw)), -np.sin(float(yaw))], dtype=np.float64)
|
|
longitudinal_score = float(np.dot(forward, midpoint))
|
|
if (int(primary_face) == 0 and longitudinal_score > 0.0) or (int(primary_face) == 1 and longitudinal_score < 0.0):
|
|
matched.append(float(yaw))
|
|
candidates = matched or yaw_candidates
|
|
else:
|
|
candidates = yaw_candidates
|
|
yaw = float(candidates[0])
|
|
if reference_yaw is not None and np.isfinite(reference_yaw):
|
|
return _align_yaw_to_reference_pi_periodic(yaw, reference_yaw)
|
|
return yaw
|
|
|
|
|
|
def extract_face_regressed_size_priors_from_prediction(pred_41):
|
|
"""Extract per-face size regression hints from one denormalized 41-dim prediction."""
|
|
p = np.asarray(pred_41, dtype=np.float32).reshape(-1)
|
|
priors = {}
|
|
for face_type, off in enumerate(FACE_OFFSETS_41):
|
|
size_pair = np.asarray(p[off + 3 : off + 5], dtype=np.float32).reshape(-1)
|
|
if size_pair.shape != (2,) or not np.isfinite(size_pair).all():
|
|
continue
|
|
if face_type in (0, 1):
|
|
priors[int(face_type)] = {
|
|
"height": float(abs(size_pair[0])),
|
|
"width": float(abs(size_pair[1])),
|
|
}
|
|
else:
|
|
priors[int(face_type)] = {
|
|
"length": float(abs(size_pair[0])),
|
|
"height": float(abs(size_pair[1])),
|
|
}
|
|
return priors
|
|
|
|
|
|
def _select_edge_or_regressed_size(measured_size_m, regressed_size_m, min_fraction=0.85, max_fraction=1.35):
|
|
"""Use edge-measured size when it is geometrically sane, otherwise fall back to regression."""
|
|
regressed = float(abs(regressed_size_m))
|
|
if not np.isfinite(regressed) or regressed <= 1e-6:
|
|
return None, None
|
|
|
|
measured = None if measured_size_m is None else float(abs(measured_size_m))
|
|
if measured is None or not np.isfinite(measured) or measured <= 1e-6:
|
|
return regressed, "regressed"
|
|
|
|
fraction = measured / regressed
|
|
if fraction < float(min_fraction) or fraction > float(max_fraction):
|
|
return regressed, "regressed"
|
|
return measured, "edge"
|
|
|
|
|
|
def reconstruct_edge_based_box_from_selection(edge_selection, box_center_y_m, regressed_dims, face_regressed_dims_by_type=None):
|
|
"""Reconstruct a full 3D box from one or two selected visible-face bottom edges.
|
|
|
|
Two-face mode:
|
|
- side edge provides yaw/length and lateral anchor
|
|
- front/rear edge provides width and longitudinal anchor
|
|
|
|
One-face mode:
|
|
- front/rear edge provides yaw/width and the visible-face longitudinal+lateral anchor
|
|
- side edge provides yaw/length and the visible-face longitudinal+lateral anchor
|
|
|
|
The selected edge geometry stays the anchor. Height and the missing dimensions in one-face mode
|
|
come from the regressed branch.
|
|
"""
|
|
if edge_selection is None:
|
|
return None
|
|
|
|
yaw = float(edge_selection.get("yaw", float("nan")))
|
|
if not np.isfinite(yaw):
|
|
return None
|
|
|
|
dims_reg = np.asarray(regressed_dims, dtype=np.float32).reshape(-1)
|
|
if dims_reg.shape != (3,) or not np.isfinite(dims_reg).all():
|
|
return None
|
|
reg_length = float(abs(dims_reg[0]))
|
|
box_height = float(abs(dims_reg[1]))
|
|
reg_width = float(abs(dims_reg[2]))
|
|
if reg_length <= 1e-6 or box_height <= 1e-6 or reg_width <= 1e-6:
|
|
return None
|
|
|
|
face_types = tuple(int(face_type) for face_type in (edge_selection.get("face_types") or ()))
|
|
edge_batches = _edge_batches_to_list(edge_selection.get("edge_points_3d"))
|
|
if len(face_types) != len(edge_batches):
|
|
return None
|
|
|
|
face_is_partial = tuple(bool(flag) for flag in (edge_selection.get("face_is_partial") or ()))
|
|
if len(face_is_partial) < len(face_types):
|
|
face_is_partial = face_is_partial + (False,) * (len(face_types) - len(face_is_partial))
|
|
|
|
candidates = []
|
|
for face_type, points_3d, is_partial in zip(face_types, edge_batches, face_is_partial):
|
|
pts = np.asarray(points_3d, dtype=np.float32)
|
|
if pts.ndim != 2 or pts.shape[0] < 2 or pts.shape[1] != 3 or not np.isfinite(pts).all():
|
|
return None
|
|
candidates.append({"face_type": int(face_type), "points_3d": pts, "is_partial": bool(is_partial)})
|
|
|
|
forward = np.array([np.cos(yaw), 0.0, -np.sin(yaw)], dtype=np.float64)
|
|
right = np.array([np.sin(yaw), 0.0, np.cos(yaw)], dtype=np.float64)
|
|
center_x = None
|
|
center_z = None
|
|
length_m = None
|
|
width_m = None
|
|
length_source = None
|
|
width_source = None
|
|
mode = None
|
|
resolved_face_types = list(face_types)
|
|
resolved_longitudinal_face_type = None
|
|
resolved_side_face_type = None
|
|
|
|
face_regressed_dims_by_type = face_regressed_dims_by_type or {}
|
|
|
|
def _face_size_prior(candidate, key, fallback, max_ratio=1.25):
|
|
if candidate is None or bool(candidate.get("is_partial")):
|
|
return float(fallback)
|
|
prior = face_regressed_dims_by_type.get(int(candidate["face_type"]), {})
|
|
value = prior.get(key)
|
|
if value is None or not np.isfinite(float(value)) or float(value) <= 1e-6:
|
|
return float(fallback)
|
|
prior_value = float(value)
|
|
fallback_value = float(abs(fallback))
|
|
if fallback_value <= 1e-6:
|
|
return prior_value
|
|
ratio = max(prior_value / fallback_value, fallback_value / prior_value)
|
|
if ratio > float(max_ratio):
|
|
return fallback_value
|
|
return prior_value
|
|
|
|
role_resolution = _resolve_two_face_candidate_roles(candidates, yaw) if len(candidates) >= 2 else None
|
|
if role_resolution is not None:
|
|
longitudinal_info = {
|
|
**role_resolution["longitudinal"],
|
|
"forward_bev": role_resolution["forward_bev"],
|
|
"right_bev": role_resolution["right_bev"],
|
|
}
|
|
side_info = {
|
|
**role_resolution["side"],
|
|
"forward_bev": role_resolution["forward_bev"],
|
|
"right_bev": role_resolution["right_bev"],
|
|
}
|
|
longitudinal_candidate = longitudinal_info["candidate"]
|
|
side_candidate = side_info["candidate"]
|
|
side_length_m = None if bool(side_candidate.get("is_partial")) else _edge_segment_length_3d(side_candidate["points_3d"])
|
|
width_from_long_m = (
|
|
None if bool(longitudinal_candidate.get("is_partial")) else _edge_segment_length_3d(longitudinal_candidate["points_3d"])
|
|
)
|
|
|
|
length_m, length_source = _select_edge_or_regressed_size(
|
|
side_length_m,
|
|
_face_size_prior(side_candidate, "length", reg_length),
|
|
)
|
|
width_m, width_source = _select_edge_or_regressed_size(
|
|
width_from_long_m,
|
|
_face_size_prior(longitudinal_candidate, "width", reg_width),
|
|
)
|
|
if length_m is None or width_m is None:
|
|
return None
|
|
|
|
center_resolution = _resolve_two_face_center_from_geometry(longitudinal_info, side_info, length_m, width_m)
|
|
if center_resolution is None:
|
|
return None
|
|
center_bev = np.asarray(center_resolution["center_bev"], dtype=np.float64)
|
|
if center_bev.shape != (2,) or not np.isfinite(center_bev).all():
|
|
return None
|
|
center_x = float(center_bev[0])
|
|
center_z = float(center_bev[1])
|
|
resolved_longitudinal_face_type = int(center_resolution["longitudinal_face_type"])
|
|
resolved_side_face_type = int(center_resolution["side_face_type"])
|
|
resolved_face_types[int(longitudinal_info["index"])] = resolved_longitudinal_face_type
|
|
resolved_face_types[int(side_info["index"])] = resolved_side_face_type
|
|
mode = "two-face"
|
|
else:
|
|
longitudinal_candidate = next((candidate for candidate in candidates if candidate["face_type"] in (0, 1)), None)
|
|
side_candidate = next((candidate for candidate in candidates if candidate["face_type"] in (2, 3)), None)
|
|
|
|
if mode == "two-face":
|
|
pass
|
|
elif longitudinal_candidate is not None:
|
|
long_mid = np.mean(np.asarray(longitudinal_candidate["points_3d"], dtype=np.float64), axis=0)
|
|
width_from_long_m = (
|
|
None if bool(longitudinal_candidate.get("is_partial")) else _edge_segment_length_3d(longitudinal_candidate["points_3d"])
|
|
)
|
|
if not np.isfinite(long_mid).all():
|
|
return None
|
|
width_m, width_source = _select_edge_or_regressed_size(
|
|
width_from_long_m,
|
|
_face_size_prior(longitudinal_candidate, "width", reg_width),
|
|
)
|
|
if width_m is None:
|
|
return None
|
|
longitudinal_sign = 1.0 if int(longitudinal_candidate["face_type"]) == 0 else -1.0
|
|
center_from_longitudinal = long_mid - longitudinal_sign * forward * (float(reg_length) * 0.5)
|
|
center_x = float(center_from_longitudinal[0])
|
|
center_z = float(center_from_longitudinal[2])
|
|
length_m = float(reg_length)
|
|
width_source = width_source or "regressed"
|
|
length_source = "regressed"
|
|
resolved_longitudinal_face_type = int(longitudinal_candidate["face_type"])
|
|
mode = "front-rear"
|
|
elif side_candidate is not None:
|
|
side_mid = np.mean(np.asarray(side_candidate["points_3d"], dtype=np.float64), axis=0)
|
|
side_length_m = None if bool(side_candidate.get("is_partial")) else _edge_segment_length_3d(side_candidate["points_3d"])
|
|
if not np.isfinite(side_mid).all():
|
|
return None
|
|
length_m, length_source = _select_edge_or_regressed_size(
|
|
side_length_m,
|
|
_face_size_prior(side_candidate, "length", reg_length),
|
|
)
|
|
if length_m is None:
|
|
return None
|
|
side_sign = 1.0 if int(side_candidate["face_type"]) == 2 else -1.0
|
|
center_from_side = side_mid - side_sign * right * (float(reg_width) * 0.5)
|
|
center_x = float(center_from_side[0])
|
|
center_z = float(center_from_side[2])
|
|
width_m = float(reg_width)
|
|
width_source = "regressed"
|
|
resolved_side_face_type = int(side_candidate["face_type"])
|
|
mode = "side"
|
|
else:
|
|
return None
|
|
|
|
all_y = np.concatenate([candidate["points_3d"][:, 1] for candidate in candidates], axis=0)
|
|
if all_y.size == 0 or not np.isfinite(all_y).all():
|
|
if box_center_y_m is None or not np.isfinite(float(box_center_y_m)):
|
|
return None
|
|
center_y = float(box_center_y_m)
|
|
else:
|
|
center_y = float(np.mean(all_y) - box_height * 0.5)
|
|
|
|
center = np.array(
|
|
[
|
|
float(center_x),
|
|
float(center_y),
|
|
float(center_z),
|
|
],
|
|
dtype=np.float32,
|
|
)
|
|
if not np.isfinite(center).all():
|
|
return None
|
|
|
|
dims = np.array([float(length_m), float(box_height), float(width_m)], dtype=np.float32)
|
|
corners_3d = compute_3d_box_corners(center, dims, float(yaw), face_type=-1)
|
|
return {
|
|
"center": center,
|
|
"dims": dims,
|
|
"yaw": float(yaw),
|
|
"corners_3d": corners_3d.astype(np.float32),
|
|
"mode": mode,
|
|
"side_length_m": float(length_m),
|
|
"width_m": float(width_m),
|
|
"length_source": length_source,
|
|
"width_source": width_source,
|
|
"face_types": tuple(int(face_type) for face_type in resolved_face_types),
|
|
"longitudinal_face_type": resolved_longitudinal_face_type,
|
|
"side_face_type": resolved_side_face_type,
|
|
}
|
|
|
|
|
|
def reconstruct_two_face_box_from_edge_selection(edge_selection, box_height_m):
|
|
"""Backward-compatible two-face-only wrapper around the generalized edge-based reconstruction."""
|
|
edge_box = reconstruct_edge_based_box_from_selection(
|
|
edge_selection,
|
|
box_center_y_m=None,
|
|
regressed_dims=np.array([1.0, float(box_height_m), 1.0], dtype=np.float32),
|
|
)
|
|
if edge_box is None or edge_box.get("mode") != "two-face":
|
|
return None
|
|
return edge_box
|
|
|
|
|
|
def classify_edge_yaw_prediction_bucket(face_types, is_valid):
|
|
"""Bucket one prediction by whether edge-yaw would be used from prediction-side cues only."""
|
|
face_types = tuple(int(face_type) for face_type in (face_types or ()))
|
|
has_longitudinal = any(face_type in (0, 1) for face_type in face_types)
|
|
has_side = any(face_type in (2, 3) for face_type in face_types)
|
|
|
|
if bool(is_valid) and has_longitudinal and has_side:
|
|
return "two-face"
|
|
if has_side and not has_longitudinal:
|
|
return "side only"
|
|
if has_longitudinal:
|
|
return "front_rear_only"
|
|
return None
|
|
|
|
|
|
def _align_yaw_to_reference_pi_periodic(yaw, reference_yaw):
|
|
"""Choose the pi-equivalent yaw closest to a reference heading."""
|
|
if not np.isfinite(yaw) or not np.isfinite(reference_yaw):
|
|
return float(yaw)
|
|
|
|
base = float((float(yaw) + np.pi) % (2 * np.pi) - np.pi)
|
|
alt = float((float(yaw) + 2 * np.pi) % (2 * np.pi) - np.pi)
|
|
return min(
|
|
(base, alt),
|
|
key=lambda candidate: abs(float((candidate - float(reference_yaw) + np.pi) % (2 * np.pi) - np.pi)),
|
|
)
|
|
|
|
|
|
def _draw_edge_points(img, edge_points_2d=None, edge_color=(0, 255, 0), thickness=1):
|
|
"""Draw sampled bottom-edge points and the connecting polylines."""
|
|
if edge_points_2d is None:
|
|
return img
|
|
|
|
pts = np.asarray(edge_points_2d, dtype=np.float32)
|
|
if pts.size == 0 or np.any(np.isnan(pts)):
|
|
return img
|
|
if pts.ndim == 2:
|
|
pts = pts[None, ...]
|
|
if pts.ndim != 3 or pts.shape[1] == 0:
|
|
return img
|
|
|
|
radius = max(1, thickness + 1)
|
|
for poly in pts:
|
|
pts_i = np.round(poly).astype(np.int32)
|
|
cv2.polylines(img, [pts_i], isClosed=False, color=edge_color, thickness=thickness, lineType=cv2.LINE_AA)
|
|
for pt in pts_i:
|
|
cv2.circle(img, tuple(pt), radius, edge_color, -1, cv2.LINE_AA)
|
|
return img
|
|
|
|
|
|
def decode_3d_target(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
bbox_xyxy=None,
|
|
):
|
|
"""Decode a single 42-dim GT label to 3D box corners for visualization."""
|
|
t = target_42
|
|
if np.isnan(t[2]) or t[2] <= 0:
|
|
return None
|
|
|
|
depth_scale = calib.get("depth_scale", 1.0) if calib else 1.0
|
|
dims = t[3:6]
|
|
rot_y = t[6]
|
|
|
|
if cls_id in face_3d_classes:
|
|
best_type, best_score, best_data = -1, -1.0, None
|
|
visible_faces = []
|
|
for ft, off in enumerate(FACE_OFFSETS_42):
|
|
face = t[off : off + 8]
|
|
is_vis, score = face[7], face[6]
|
|
if is_vis != 1 or np.isnan(score) or score < score_thr:
|
|
continue
|
|
z_f = face[2]
|
|
if np.isnan(z_f) or z_f <= 0:
|
|
continue
|
|
visible_faces.append(ft)
|
|
if score > best_score:
|
|
best_score, best_type, best_data = float(score), ft, face
|
|
|
|
if best_type < 0:
|
|
return None
|
|
|
|
u = best_data[4] * img_w
|
|
v = best_data[5] * img_h
|
|
z_f = best_data[2] * depth_scale
|
|
corners = reconstruct_3d_box_from_face((u, v), z_f, dims, rot_y, best_type, calib)
|
|
if corners is None:
|
|
return None
|
|
|
|
edge_points_3d, edge_points_2d = collect_face_bottom_edges(corners, visible_faces, calib, num_samples=5)
|
|
partial_edge = decode_cut_partial_side_edge_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
bbox_xyxy=bbox_xyxy,
|
|
corners_3d=corners,
|
|
score_thr=score_thr,
|
|
)
|
|
if partial_edge is not None:
|
|
edge_points_3d, edge_points_2d = _append_edge_batch(edge_points_3d, edge_points_2d, partial_edge)
|
|
visible_faces = list(dict.fromkeys([*visible_faces, partial_edge["face_type"]]))
|
|
|
|
return {
|
|
"corners_3d": corners,
|
|
"face_center_2d": (u, v),
|
|
"face_color": FACE_COLORS[best_type],
|
|
"visible_face_type": best_type,
|
|
"visible_face_types": tuple(visible_faces),
|
|
"edge_points_2d": edge_points_2d,
|
|
"edge_points_3d": edge_points_3d,
|
|
"cls": cls_id,
|
|
}
|
|
|
|
if cls_id in complete_3d_classes:
|
|
u = t[7] * img_w
|
|
v = t[8] * img_h
|
|
z = t[2] * depth_scale
|
|
corners = reconstruct_3d_box_from_whole((u, v), z, dims, rot_y, calib)
|
|
if corners is None:
|
|
return None
|
|
return {
|
|
"corners_3d": corners,
|
|
"face_center_2d": None,
|
|
"face_color": None,
|
|
"visible_face_type": None,
|
|
"visible_face_types": (),
|
|
"edge_points_2d": None,
|
|
"edge_points_3d": None,
|
|
"cls": cls_id,
|
|
}
|
|
|
|
return None
|
|
|
|
|
|
def decode_3d_prediction(
|
|
pred_41,
|
|
anchor_xy,
|
|
stride,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
cls_id,
|
|
pred_edge_60=None,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
bbox_xyxy=None,
|
|
):
|
|
"""Decode a single 41-dim denormalized prediction to 3D box corners."""
|
|
p = pred_41
|
|
rot_y = _decode_yaw_from_prediction(p)
|
|
z_whole = p[24]
|
|
uv_whole_offset = p[25:27]
|
|
dims_whole = p[27:30]
|
|
u_whole = (anchor_xy[0] + uv_whole_offset[0]) * stride
|
|
v_whole = (anchor_xy[1] + uv_whole_offset[1]) * stride
|
|
|
|
if cls_id in face_3d_classes:
|
|
_, cut_side = _resolve_pred_cut_state_for_decode(p, bbox_xyxy=bbox_xyxy, img_w=img_w)
|
|
visible_faces = select_pred_visible_faces_for_decode(p, score_thr=score_thr, bbox_xyxy=bbox_xyxy, img_w=img_w)
|
|
anchor_face = select_best_score_pred_face_anchor(
|
|
p,
|
|
anchor_xy,
|
|
stride,
|
|
calib,
|
|
visible_faces,
|
|
)
|
|
if anchor_face is None:
|
|
return None
|
|
|
|
anchor_face_type = int(anchor_face["face_type"])
|
|
anchor_face_center_3d = np.asarray(anchor_face["center_3d"], dtype=np.float32)
|
|
if anchor_face_center_3d.shape != (3,) or not np.isfinite(anchor_face_center_3d).all():
|
|
return None
|
|
corners = compute_3d_box_corners(anchor_face_center_3d, dims_whole, rot_y, anchor_face_type)
|
|
|
|
edge_points_3d, edge_points_2d = collect_face_bottom_edges(
|
|
corners, [face_type for face_type, _ in visible_faces], calib, num_samples=5
|
|
)
|
|
if pred_edge_60 is not None:
|
|
pred_edge_points_2d, pred_edge_points_3d = [], []
|
|
for face_type, _ in visible_faces:
|
|
pred_edge = decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride)
|
|
if pred_edge is None:
|
|
continue
|
|
points_3d = [
|
|
back_project_2d_to_3d(tuple(pt), depth, calib) for pt, depth in zip(pred_edge["points_2d"], pred_edge["depths"])
|
|
]
|
|
if any(point is None for point in points_3d):
|
|
continue
|
|
pred_edge_points_2d.append(pred_edge["points_2d"].astype(np.float32, copy=False))
|
|
pred_edge_points_3d.append(np.asarray(points_3d, dtype=np.float32))
|
|
if pred_edge_points_2d:
|
|
edge_points_2d = _stack_edge_batches(pred_edge_points_2d)
|
|
edge_points_3d = _stack_edge_batches(pred_edge_points_3d)
|
|
|
|
partial_edge = decode_cut_partial_side_edge_from_prediction(
|
|
p,
|
|
pred_edge_60,
|
|
anchor_xy,
|
|
stride,
|
|
img_w,
|
|
cut_side=cut_side,
|
|
corners_3d=corners,
|
|
)
|
|
if partial_edge is not None:
|
|
partial_points_3d = [
|
|
back_project_2d_to_3d(tuple(pt), depth, calib)
|
|
for pt, depth in zip(partial_edge["points_2d"], partial_edge["depths"])
|
|
]
|
|
if all(point is not None for point in partial_points_3d):
|
|
partial_edge = {**partial_edge, "points_3d": np.asarray(partial_points_3d, dtype=np.float32)}
|
|
visible_face_types = {face_type for face_type, _ in visible_faces}
|
|
if partial_edge["face_type"] not in visible_face_types:
|
|
edge_points_3d, edge_points_2d = _append_edge_batch(edge_points_3d, edge_points_2d, partial_edge)
|
|
visible_faces = [*visible_faces, (partial_edge["face_type"], 1.0)]
|
|
|
|
return {
|
|
"corners_3d": corners,
|
|
"face_center_2d": tuple(np.asarray(anchor_face["center_2d"], dtype=np.float32).tolist()),
|
|
"face_color": FACE_COLORS[anchor_face_type],
|
|
"visible_face_type": anchor_face_type,
|
|
"visible_face_types": tuple(face_type for face_type, _ in visible_faces),
|
|
"edge_points_2d": edge_points_2d,
|
|
"edge_points_3d": edge_points_3d,
|
|
"cls": cls_id,
|
|
}
|
|
|
|
if cls_id in complete_3d_classes:
|
|
corners = reconstruct_3d_box_from_whole((u_whole, v_whole), z_whole, dims_whole, rot_y, calib)
|
|
if corners is None:
|
|
return None
|
|
return {
|
|
"corners_3d": corners,
|
|
"face_center_2d": None,
|
|
"face_color": None,
|
|
"visible_face_type": None,
|
|
"visible_face_types": (),
|
|
"edge_points_2d": None,
|
|
"edge_points_3d": None,
|
|
"cls": cls_id,
|
|
}
|
|
|
|
return None
|
|
|
|
|
|
def draw_3d_box(
|
|
img,
|
|
corners_3d,
|
|
calib,
|
|
face_center_2d=None,
|
|
face_color=None,
|
|
edge_points_2d=None,
|
|
edge_color=(0, 255, 0),
|
|
thickness=1,
|
|
):
|
|
"""Project and draw a 3D box wireframe on an image."""
|
|
corners_3d = corners_3d[[4, 5, 6, 7, 0, 1, 2, 3]]
|
|
color_front = (0, 0, 255)
|
|
color_back = (255, 0, 0)
|
|
color_side = (255, 255, 0)
|
|
|
|
distort_coeffs = calib.get("distort_coeffs", []) if calib is not None else []
|
|
if distort_coeffs is not None and len(distort_coeffs) >= 4:
|
|
edge_points_2d_box = project_3d_box_edges_with_distortion(corners_3d, calib, samples_per_edge=15)
|
|
plot_box3d_on_img_with_distortion(
|
|
img, edge_points_2d_box, color_front=color_front, color_back=color_back, color_side=color_side, thickness=thickness
|
|
)
|
|
else:
|
|
corners_2d = project_3d_to_2d(corners_3d, calib)
|
|
if np.any(np.isnan(corners_2d)):
|
|
return img
|
|
plot_box3d_on_img(
|
|
img, corners_2d, color_front=color_front, color_back=color_back, color_side=color_side, thickness=thickness
|
|
)
|
|
|
|
if face_center_2d is not None and face_color is not None:
|
|
cv2.circle(img, (int(face_center_2d[0]), int(face_center_2d[1])), 2, face_color, -1, cv2.LINE_AA)
|
|
|
|
_draw_edge_points(img, edge_points_2d=edge_points_2d, edge_color=edge_color, thickness=thickness)
|
|
return img
|
|
|
|
|
|
def plot_3d_boxes_on_image(img_tensor, decoded_results, calib=None, label_text=None, scale_factor=2):
|
|
"""Draw decoded 3D boxes on an image tensor.
|
|
|
|
Args:
|
|
img_tensor: (3, H, W) or (N, 3, H, W) tensor normalized [0, 1] BGR.
|
|
decoded_results: List of dicts from decode_3d_target/decode_3d_prediction.
|
|
calib: Dict with fx, fy, cx, cy.
|
|
label_text: Optional text overlay (e.g., "3D GT" or "3D Pred").
|
|
scale_factor: Upscale factor for clearer visualization.
|
|
|
|
Returns:
|
|
(H*scale, W*scale, 3) RGB numpy image, or None if no boxes.
|
|
"""
|
|
if img_tensor.ndim == 4:
|
|
img_tensor = img_tensor[0]
|
|
|
|
im = img_tensor.cpu().numpy().transpose(1, 2, 0)
|
|
im = np.ascontiguousarray(im * 255, dtype=np.uint8)
|
|
h, w = im.shape[:2]
|
|
|
|
h_new, w_new = h * scale_factor, w * scale_factor
|
|
im = cv2.resize(im, (w_new, h_new), interpolation=cv2.INTER_LINEAR)
|
|
|
|
# Scale calibration
|
|
if calib is not None:
|
|
calib_s = {
|
|
"fx": calib["fx"] * scale_factor,
|
|
"fy": calib["fy"] * scale_factor,
|
|
"cx": calib["cx"] * scale_factor,
|
|
"cy": calib["cy"] * scale_factor,
|
|
"distort_coeffs": calib.get("distort_coeffs", []),
|
|
"depth_scale": calib.get("depth_scale", 1.0),
|
|
}
|
|
else:
|
|
calib_s = {"fx": w_new * 1.2, "fy": w_new * 1.2, "cx": w_new / 2, "cy": h_new / 2, "distort_coeffs": []}
|
|
|
|
for d in decoded_results:
|
|
if d is None or d.get("corners_3d") is None:
|
|
continue
|
|
fc = d.get("face_center_2d")
|
|
if fc is not None:
|
|
fc = (fc[0] * scale_factor, fc[1] * scale_factor)
|
|
edge_points_2d = d.get("edge_points_2d")
|
|
if edge_points_2d is not None:
|
|
edge_points_2d = np.asarray(edge_points_2d, dtype=np.float32) * scale_factor
|
|
draw_3d_box(
|
|
im,
|
|
d["corners_3d"],
|
|
calib_s,
|
|
fc,
|
|
d.get("face_color"),
|
|
edge_points_2d=edge_points_2d,
|
|
thickness=max(1, scale_factor),
|
|
)
|
|
|
|
if label_text:
|
|
cv2.putText(im, label_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 255), 3, cv2.LINE_AA)
|
|
|
|
return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
|
|
|
|
|
def decode_3d_prediction_batch(preds_3d_sel, anchors, strides, cls_ids, calib, img_w, img_h,
|
|
face_3d_classes, complete_3d_classes):
|
|
"""Batch decode multiple 3D predictions for visualization.
|
|
|
|
Args:
|
|
preds_3d_sel: (k, 41) numpy array — denormalized 3D predictions.
|
|
anchors: (2, k) numpy array — anchor xy in grid coords.
|
|
strides: (k,) numpy array — stride per anchor.
|
|
cls_ids: (k,) numpy array — class IDs.
|
|
calib: Dict with fx, fy, cx, cy.
|
|
img_w: Image width in pixels.
|
|
img_h: Image height in pixels.
|
|
face_3d_classes: Set of class IDs with face annotations.
|
|
complete_3d_classes: Set of class IDs with whole-box 3D only.
|
|
|
|
Returns:
|
|
List of decoded dicts (same format as decode_3d_prediction).
|
|
"""
|
|
results = []
|
|
for i in range(len(preds_3d_sel)):
|
|
anchor_xy = anchors[:, i]
|
|
d = decode_3d_prediction(
|
|
preds_3d_sel[i], anchor_xy, float(strides[i]),
|
|
calib, img_w, img_h, face_3d_classes, complete_3d_classes, int(cls_ids[i])
|
|
)
|
|
results.append(d)
|
|
return results
|
|
|
|
|
|
def decode_pred_face_anchor(pred_41, anchor_xy, stride, calib, face_type):
|
|
"""Decode one predicted face center for use as a visualization anchor."""
|
|
if face_type not in range(4):
|
|
return None
|
|
off = FACE_OFFSETS_41[int(face_type)]
|
|
z_face = float(pred_41[off])
|
|
uv_face_offset = np.asarray(pred_41[off + 1 : off + 3], dtype=np.float32)
|
|
if not np.isfinite(z_face) or z_face <= 0 or not np.isfinite(uv_face_offset).all():
|
|
return None
|
|
|
|
u_face = float((anchor_xy[0] + uv_face_offset[0]) * stride)
|
|
v_face = float((anchor_xy[1] + uv_face_offset[1]) * stride)
|
|
center_3d = back_project_2d_to_3d((u_face, v_face), z_face, calib)
|
|
if center_3d is None:
|
|
return None
|
|
center_arr = np.asarray(center_3d, dtype=np.float32)
|
|
if center_arr.shape != (3,) or not np.isfinite(center_arr).all():
|
|
return None
|
|
return {
|
|
"face_type": int(face_type),
|
|
"center_3d": center_arr,
|
|
"center_2d": np.array([u_face, v_face], dtype=np.float32),
|
|
}
|
|
|
|
|
|
def select_best_score_pred_face_anchor(
|
|
pred_41,
|
|
anchor_xy,
|
|
stride,
|
|
calib,
|
|
visible_faces,
|
|
):
|
|
"""Select the predicted face anchor using the highest visible-face score."""
|
|
if not visible_faces:
|
|
return None
|
|
|
|
best_face_type, _ = max(((int(face_type), float(score)) for face_type, score in visible_faces if int(face_type) in range(4)), key=lambda item: item[1], default=(-1, float("-inf")))
|
|
if best_face_type not in range(4):
|
|
return None
|
|
return decode_pred_face_anchor(pred_41, anchor_xy, stride, calib, best_face_type)
|
|
|
|
|
|
def _decode_yaw_from_prediction(pred_41):
|
|
"""Decode whole-box yaw from a 41-dim denormalized prediction."""
|
|
yaw_cls_logits = pred_41[30:34]
|
|
yaw_residual_sin = np.clip(pred_41[34:38], -1.0, 1.0)
|
|
best_bin = int(np.argmax(yaw_cls_logits))
|
|
return np.arcsin(yaw_residual_sin[best_bin]) + YAW_BIN_OFFSETS[best_bin]
|
|
|
|
|
|
def decode_visible_face_yaw_from_prediction(pred_41, pred_edge_60, anchor_xy, stride, face_type, calib):
|
|
"""Decode auxiliary visible-face yaw from sampled bottom-edge predictions."""
|
|
if pred_edge_60 is None or face_type not in range(4):
|
|
return float("nan")
|
|
decoded = decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride)
|
|
points_3d = _decoded_edge_to_points_3d(decoded, calib)
|
|
if points_3d is None:
|
|
return float("nan")
|
|
return edge_points_to_yaw(points_3d, face_type)
|
|
|
|
|
|
def decode_visible_face_yaw_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
face_type,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
bbox_xyxy=None,
|
|
):
|
|
"""Decode GT visible-face yaw from sampled bottom-edge geometry."""
|
|
decoded = decode_visible_face_edge_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
face_type=face_type,
|
|
score_thr=score_thr,
|
|
bbox_xyxy=bbox_xyxy,
|
|
)
|
|
if decoded is None:
|
|
return float("nan")
|
|
return edge_points_to_yaw(decoded["points_3d"], decoded["face_type"])
|
|
|
|
|
|
def decode_edge_yaw_selection_from_prediction(
|
|
pred_41,
|
|
pred_edge_60,
|
|
anchor_xy,
|
|
stride,
|
|
calib,
|
|
score_thr=EDGE_YAW_VALID_VISIBILITY_SCORE_THRESH,
|
|
bbox_xyxy=None,
|
|
img_w=None,
|
|
img_h=None,
|
|
max_lateral_dist_m=None,
|
|
cut_side_min_visible_length_ratio=EDGE_YAW_CUT_SIDE_MIN_VISIBLE_LENGTH_RATIO,
|
|
max_faces=2,
|
|
):
|
|
"""Select the face-edge geometry used for prediction-time edge-yaw re-estimation.
|
|
|
|
The selection intentionally uses a face-based primary face plus an optional strict two-face companion:
|
|
- choose the first face exactly as face-based reconstruction would choose its visible-face anchor
|
|
- then choose at most one companion face from the opposite face family using the stricter threshold
|
|
- for cut states, the cut classification chooses the longitudinal face first
|
|
- for true border-cut objects, prefer the decoded partial side edge over a full side edge
|
|
"""
|
|
empty = {
|
|
"yaw": float("nan"),
|
|
"face_types": (),
|
|
"face_is_partial": (),
|
|
"edge_points_2d": None,
|
|
"edge_points_3d": None,
|
|
"two_face_eligible": False,
|
|
"lateral_distance_m": None,
|
|
"lateral_ok": False if max_lateral_dist_m is not None else True,
|
|
"cut_side_visible_length_m": None,
|
|
"cut_side_visible_length_ratio": None,
|
|
"cut_side_visible_ratio_ok": None,
|
|
"is_valid": False,
|
|
}
|
|
if pred_edge_60 is None:
|
|
return empty
|
|
|
|
inferred_img_w = float(img_w) if img_w is not None else None
|
|
inferred_img_h = float(img_h) if img_h is not None else None
|
|
if inferred_img_w is None:
|
|
if bbox_xyxy is not None:
|
|
inferred_img_w = max(float(np.asarray(bbox_xyxy, dtype=np.float64)[2]), 1.0)
|
|
else:
|
|
inferred_img_w = max(float((anchor_xy[0] + pred_41[25]) * stride) * 2.0, 1.0)
|
|
|
|
decode_visible_faces = list(
|
|
select_pred_visible_faces_for_decode(
|
|
pred_41,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
bbox_xyxy=bbox_xyxy,
|
|
img_w=inferred_img_w,
|
|
)
|
|
)
|
|
anchor_face = select_best_score_pred_face_anchor(pred_41, anchor_xy, stride, calib, decode_visible_faces)
|
|
lateral_distance_m = None if anchor_face is None else _prediction_lateral_distance_m_from_center(anchor_face.get("center_3d"))
|
|
lateral_ok = bool(
|
|
max_lateral_dist_m is None or (lateral_distance_m is not None and lateral_distance_m < float(max_lateral_dist_m))
|
|
)
|
|
primary_candidate_face_type = max(
|
|
((int(face_type), float(score)) for face_type, score in decode_visible_faces if int(face_type) in range(4)),
|
|
key=lambda item: item[1],
|
|
default=(-1, float("-inf")),
|
|
)[0]
|
|
|
|
raw_cut_state = get_pred_cut_state(pred_41)
|
|
primary_face = get_pred_cut_primary_face(raw_cut_state)
|
|
visible_faces = list(select_pred_visible_faces(pred_41, score_thr=score_thr))
|
|
longitudinal_faces = {face_type for face_type, _ in visible_faces if face_type in (0, 1)}
|
|
if primary_face in longitudinal_faces and len(longitudinal_faces) > 1:
|
|
visible_faces = [(face_type, score) for face_type, score in visible_faces if face_type not in (0, 1) or face_type == primary_face]
|
|
|
|
def _decode_face_candidate(face_type, score, require_in_image=True):
|
|
if face_type not in range(4):
|
|
return None
|
|
decoded = decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride)
|
|
if decoded is None:
|
|
return None
|
|
if require_in_image:
|
|
drawable = _decoded_edge_points_are_drawable(decoded["points_2d"], inferred_img_w, inferred_img_h)
|
|
else:
|
|
# The primary edge should follow face-based anchor selection even when one sample lands just
|
|
# outside the image. Companions stay fully in-image so the strict two-face case remains stable.
|
|
drawable = _decoded_edge_points_are_drawable(decoded["points_2d"])
|
|
if not drawable:
|
|
return None
|
|
points_3d = _decoded_edge_to_points_3d(decoded, calib)
|
|
if points_3d is None:
|
|
return None
|
|
return {
|
|
"face_type": int(face_type),
|
|
"score": float(score),
|
|
"is_partial": False,
|
|
"points_2d": np.asarray(decoded["points_2d"], dtype=np.float32),
|
|
"points_3d": np.asarray(points_3d, dtype=np.float32),
|
|
}
|
|
|
|
face_candidates = {}
|
|
for face_type, score in visible_faces:
|
|
candidate = _decode_face_candidate(face_type, score)
|
|
if candidate is not None:
|
|
face_candidates[int(face_type)] = candidate
|
|
|
|
primary_candidate = None
|
|
if primary_candidate_face_type in range(4):
|
|
primary_score = next(
|
|
(float(score) for face_type, score in decode_visible_faces if int(face_type) == int(primary_candidate_face_type)),
|
|
float("-inf"),
|
|
)
|
|
primary_candidate = _decode_face_candidate(
|
|
int(primary_candidate_face_type),
|
|
primary_score,
|
|
require_in_image=False,
|
|
)
|
|
if primary_candidate is not None:
|
|
face_candidates.pop(int(primary_candidate_face_type), None)
|
|
|
|
resolved_cut_state, cut_side = _resolve_pred_cut_state_for_decode(pred_41, bbox_xyxy=bbox_xyxy, img_w=inferred_img_w)
|
|
partial_candidate = None
|
|
cut_side_visible_length_m = None
|
|
cut_side_visible_length_ratio = None
|
|
cut_side_visible_ratio_ok = None
|
|
if resolved_cut_state != CUT_STATE_NORMAL:
|
|
cut_corners = _reconstruct_pred_corners_for_cut_edge(pred_41, anchor_xy, stride, calib, cut_state=resolved_cut_state)
|
|
partial_edge = decode_cut_partial_side_edge_from_prediction(
|
|
pred_41,
|
|
pred_edge_60,
|
|
anchor_xy,
|
|
stride,
|
|
img_w=inferred_img_w,
|
|
cut_side=cut_side,
|
|
corners_3d=cut_corners,
|
|
)
|
|
if partial_edge is not None and not _decoded_edge_points_are_drawable(
|
|
partial_edge["points_2d"], inferred_img_w, inferred_img_h
|
|
):
|
|
partial_edge = None
|
|
partial_points_3d = _decoded_edge_to_points_3d(partial_edge, calib)
|
|
cut_side_visible_length_m = _edge_segment_length_3d(partial_points_3d)
|
|
box_length_m = float(abs(pred_41[27])) if np.isfinite(pred_41[27]) else None
|
|
if cut_side_visible_length_m is not None and box_length_m is not None and box_length_m > 1e-6:
|
|
cut_side_visible_length_ratio = float(cut_side_visible_length_m / box_length_m)
|
|
cut_side_visible_ratio_ok = bool(cut_side_visible_length_ratio > float(cut_side_min_visible_length_ratio))
|
|
else:
|
|
cut_side_visible_ratio_ok = False
|
|
if partial_edge is not None and partial_points_3d is not None:
|
|
partial_face_type = int(partial_edge["face_type"])
|
|
partial_score = face_candidates.get(partial_face_type, {}).get("score", 1.0)
|
|
partial_candidate = {
|
|
"face_type": partial_face_type,
|
|
"score": float(partial_score),
|
|
"is_partial": True,
|
|
"points_2d": np.asarray(partial_edge["points_2d"], dtype=np.float32),
|
|
"points_3d": np.asarray(partial_points_3d, dtype=np.float32),
|
|
}
|
|
face_candidates.pop(partial_face_type, None)
|
|
|
|
if resolved_cut_state != CUT_STATE_NORMAL and not cut_side_visible_ratio_ok:
|
|
partial_candidate = None
|
|
|
|
selected_candidates = []
|
|
|
|
def _best_candidate(candidates):
|
|
if not candidates:
|
|
return None
|
|
return max(candidates, key=lambda item: (float(item["score"]), -int(item["face_type"])))
|
|
|
|
cut_expected_side_face = None
|
|
if raw_cut_state != CUT_STATE_NORMAL:
|
|
cut_corners_for_side = (
|
|
cut_corners
|
|
if resolved_cut_state != CUT_STATE_NORMAL and cut_corners is not None
|
|
else _reconstruct_pred_corners_for_cut_edge(pred_41, anchor_xy, stride, calib, cut_state=raw_cut_state)
|
|
)
|
|
cut_expected_side_face = get_cut_object_side_face(raw_cut_state, corners_3d=cut_corners_for_side)
|
|
|
|
if primary_candidate is not None:
|
|
selected_candidates.append(primary_candidate)
|
|
|
|
if len(selected_candidates) < int(max_faces):
|
|
secondary_candidate = None
|
|
if primary_candidate is not None and int(primary_candidate["face_type"]) in (0, 1):
|
|
secondary_candidate = partial_candidate
|
|
if secondary_candidate is None and cut_expected_side_face in (2, 3) and (resolved_cut_state == CUT_STATE_NORMAL or cut_side_visible_ratio_ok):
|
|
secondary_candidate = face_candidates.pop(int(cut_expected_side_face), None)
|
|
if secondary_candidate is None and (resolved_cut_state == CUT_STATE_NORMAL or cut_side_visible_ratio_ok):
|
|
secondary_candidate = _best_candidate([candidate for candidate in face_candidates.values() if candidate["face_type"] in (2, 3)])
|
|
if secondary_candidate is not None:
|
|
face_candidates.pop(int(secondary_candidate["face_type"]), None)
|
|
elif primary_candidate is not None and int(primary_candidate["face_type"]) in (2, 3):
|
|
longitudinal_candidate = None
|
|
if primary_face is not None:
|
|
longitudinal_candidate = face_candidates.pop(int(primary_face), None)
|
|
if longitudinal_candidate is None:
|
|
longitudinal_candidate = _best_candidate([candidate for candidate in face_candidates.values() if candidate["face_type"] in (0, 1)])
|
|
if longitudinal_candidate is not None:
|
|
face_candidates.pop(int(longitudinal_candidate["face_type"]), None)
|
|
secondary_candidate = longitudinal_candidate
|
|
if secondary_candidate is not None:
|
|
selected_candidates.append(secondary_candidate)
|
|
|
|
if not selected_candidates:
|
|
return {
|
|
**empty,
|
|
"cut_side_visible_length_m": cut_side_visible_length_m,
|
|
"cut_side_visible_length_ratio": cut_side_visible_length_ratio,
|
|
"cut_side_visible_ratio_ok": cut_side_visible_ratio_ok,
|
|
}
|
|
|
|
edge_points_3d = _stack_edge_batches([candidate["points_3d"] for candidate in selected_candidates])
|
|
edge_points_2d = _stack_edge_batches([candidate["points_2d"] for candidate in selected_candidates])
|
|
face_types = tuple(int(candidate["face_type"]) for candidate in selected_candidates)
|
|
face_is_partial = tuple(bool(candidate.get("is_partial", False)) for candidate in selected_candidates)
|
|
|
|
if len(selected_candidates) >= 2:
|
|
longitudinal_selected = next((candidate for candidate in selected_candidates if candidate["face_type"] in (0, 1)), None)
|
|
side_selected = next((candidate for candidate in selected_candidates if candidate["face_type"] in (2, 3)), None)
|
|
yaw = _estimate_two_edge_yaw_from_candidates(
|
|
longitudinal_selected,
|
|
side_selected,
|
|
reference_yaw=_decode_yaw_from_prediction(pred_41),
|
|
)
|
|
if not np.isfinite(yaw):
|
|
yaw = visible_face_edges_to_yaw(
|
|
{candidate["face_type"]: candidate["points_3d"] for candidate in selected_candidates},
|
|
face_scores={candidate["face_type"]: candidate["score"] for candidate in selected_candidates},
|
|
)
|
|
else:
|
|
only_candidate = selected_candidates[0]
|
|
if raw_cut_state in (CUT_STATE_IN, CUT_STATE_OUT):
|
|
yaw = _estimate_single_edge_yaw_with_cut_primary_face(
|
|
only_candidate,
|
|
cut_state=raw_cut_state,
|
|
reference_yaw=_decode_yaw_from_prediction(pred_41),
|
|
)
|
|
if not np.isfinite(yaw):
|
|
yaw = edge_points_to_yaw(only_candidate["points_3d"], only_candidate["face_type"])
|
|
else:
|
|
yaw = edge_points_to_yaw(only_candidate["points_3d"], only_candidate["face_type"])
|
|
|
|
has_longitudinal = any(candidate["face_type"] in (0, 1) for candidate in selected_candidates)
|
|
has_side = any(candidate["face_type"] in (2, 3) for candidate in selected_candidates)
|
|
two_face_eligible = len(selected_candidates) >= 2 and has_longitudinal and has_side
|
|
is_valid = bool(two_face_eligible and np.isfinite(yaw) and lateral_ok)
|
|
return {
|
|
"yaw": float(yaw),
|
|
"face_types": face_types,
|
|
"face_is_partial": face_is_partial,
|
|
"edge_points_2d": edge_points_2d,
|
|
"edge_points_3d": edge_points_3d,
|
|
"two_face_eligible": bool(two_face_eligible),
|
|
"lateral_distance_m": lateral_distance_m,
|
|
"lateral_ok": lateral_ok,
|
|
"cut_side_visible_length_m": cut_side_visible_length_m,
|
|
"cut_side_visible_length_ratio": cut_side_visible_length_ratio,
|
|
"cut_side_visible_ratio_ok": cut_side_visible_ratio_ok,
|
|
"is_valid": bool(is_valid),
|
|
}
|
|
|
|
|
|
def decode_multi_visible_face_yaw_from_prediction(
|
|
pred_41,
|
|
pred_edge_60,
|
|
anchor_xy,
|
|
stride,
|
|
calib,
|
|
fallback_face_type=None,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
bbox_xyxy=None,
|
|
img_w=None,
|
|
):
|
|
"""Decode visible-face yaw using the same direct two-edge logic as prediction-time edge-yaw selection."""
|
|
if pred_edge_60 is None:
|
|
return (
|
|
decode_visible_face_yaw_from_prediction(pred_41, pred_edge_60, anchor_xy, stride, fallback_face_type, calib)
|
|
if fallback_face_type in range(4)
|
|
else float("nan")
|
|
)
|
|
|
|
inferred_img_w = float(img_w) if img_w is not None else None
|
|
if inferred_img_w is None:
|
|
if bbox_xyxy is not None:
|
|
inferred_img_w = max(float(np.asarray(bbox_xyxy, dtype=np.float64)[2]), 1.0)
|
|
else:
|
|
inferred_img_w = max(float((anchor_xy[0] + pred_41[25]) * stride) * 2.0, 1.0)
|
|
|
|
selection = decode_edge_yaw_selection_from_prediction(
|
|
pred_41,
|
|
pred_edge_60,
|
|
anchor_xy,
|
|
stride,
|
|
calib,
|
|
score_thr=score_thr,
|
|
bbox_xyxy=bbox_xyxy,
|
|
img_w=inferred_img_w,
|
|
)
|
|
if selection.get("two_face_eligible") and np.isfinite(selection.get("yaw", float("nan"))):
|
|
return float(selection["yaw"])
|
|
|
|
face_edges_3d, face_scores = {}, {}
|
|
for face_type, score in select_pred_visible_faces_for_decode(
|
|
pred_41, score_thr=score_thr, bbox_xyxy=bbox_xyxy, img_w=inferred_img_w
|
|
):
|
|
decoded = decode_visible_face_edge_from_prediction(pred_edge_60, face_type, anchor_xy, stride)
|
|
points_3d = _decoded_edge_to_points_3d(decoded, calib)
|
|
if points_3d is None:
|
|
continue
|
|
face_edges_3d[face_type] = points_3d
|
|
face_scores[face_type] = float(score)
|
|
|
|
if fallback_face_type in range(4):
|
|
return decode_visible_face_yaw_from_prediction(pred_41, pred_edge_60, anchor_xy, stride, fallback_face_type, calib)
|
|
return visible_face_edges_to_yaw(face_edges_3d, face_scores=face_scores)
|
|
|
|
|
|
def decode_multi_visible_face_yaw_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
fallback_face_type=None,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
bbox_xyxy=None,
|
|
):
|
|
"""Decode visible-face yaw from GT edge geometry with the same direct two-edge logic."""
|
|
face_edges_3d, face_scores = {}, {}
|
|
for face_type, face in select_gt_visible_faces(target_42, score_thr=score_thr):
|
|
decoded = decode_visible_face_edge_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
face_type=face_type,
|
|
score_thr=score_thr,
|
|
bbox_xyxy=bbox_xyxy,
|
|
)
|
|
if decoded is None:
|
|
continue
|
|
face_edges_3d[decoded["face_type"]] = decoded["points_3d"]
|
|
face_scores[decoded["face_type"]] = float(face[6])
|
|
|
|
partial_edge = decode_cut_partial_side_edge_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
bbox_xyxy=bbox_xyxy,
|
|
score_thr=score_thr,
|
|
)
|
|
if partial_edge is not None:
|
|
face_edges_3d[partial_edge["face_type"]] = partial_edge["points_3d"]
|
|
face_scores[partial_edge["face_type"]] = max(face_scores.get(partial_edge["face_type"], 0.0), 1.0)
|
|
|
|
if len(face_edges_3d) >= 2:
|
|
yaw = visible_face_edges_to_yaw(face_edges_3d, face_scores=face_scores)
|
|
if np.isfinite(yaw):
|
|
return yaw
|
|
|
|
if fallback_face_type in range(4):
|
|
return decode_visible_face_yaw_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
fallback_face_type,
|
|
score_thr=score_thr,
|
|
bbox_xyxy=bbox_xyxy,
|
|
)
|
|
return visible_face_edges_to_yaw(face_edges_3d, face_scores=face_scores)
|
|
|
|
|
|
def _back_project_metric_point(u, v, z, calib):
|
|
"""Back-project a metric point to 3D center coordinates."""
|
|
if calib is not None and z > 0:
|
|
center_3d = back_project_2d_to_3d((u, v), z, calib)
|
|
if center_3d is None:
|
|
x3d, y3d = float("nan"), float("nan")
|
|
else:
|
|
x3d, y3d = center_3d[0], center_3d[1]
|
|
else:
|
|
x3d, y3d = float("nan"), float("nan")
|
|
return np.array([x3d, y3d, z], dtype=np.float32)
|
|
|
|
|
|
def select_gt_visible_faces(target_42, score_thr=FACE_VISIBILITY_SCORE_THRESH):
|
|
"""Return GT-visible faces eligible for face-based metrics."""
|
|
selected = []
|
|
for face_type, off in enumerate(FACE_OFFSETS_42):
|
|
face = target_42[off : off + 8]
|
|
is_vis, score = face[7], face[6]
|
|
if is_vis != 1 or np.isnan(score) or score < score_thr:
|
|
continue
|
|
if np.isnan(face[2]) or face[2] <= 0:
|
|
continue
|
|
selected.append((face_type, face))
|
|
return selected
|
|
|
|
|
|
def select_pred_visible_faces(pred_41, score_thr=FACE_VISIBILITY_SCORE_THRESH):
|
|
"""Return predicted visible faces whose scores clear the face-metric threshold."""
|
|
selected = []
|
|
for face_type, off in enumerate(FACE_OFFSETS_41):
|
|
score = float(pred_41[off + 5])
|
|
if np.isnan(score) or score < score_thr:
|
|
continue
|
|
selected.append((face_type, score))
|
|
return selected
|
|
|
|
|
|
def is_gt_face_cut(target_42, face_type):
|
|
"""Return whether a GT face was invalidated by crop handling."""
|
|
if face_type not in range(4):
|
|
return False
|
|
off = FACE_OFFSETS_42[face_type]
|
|
face = target_42[off : off + 8]
|
|
return np.all(face[:6] == -1) and face[7] <= 0
|
|
|
|
|
|
def is_gt_cut_object(target_42):
|
|
"""Return whether a GT face-based object is labeled as cut-in or cut-out."""
|
|
f_cut = is_gt_face_cut(target_42, 0)
|
|
r_cut = is_gt_face_cut(target_42, 1)
|
|
l_cut = is_gt_face_cut(target_42, 2)
|
|
ri_cut = is_gt_face_cut(target_42, 3)
|
|
return (r_cut and l_cut and ri_cut) or (f_cut and l_cut and ri_cut)
|
|
|
|
|
|
def extract_3d_attrs_from_prediction(pred_41, anchor_xy, stride, calib, face_type=None, pred_edge_60=None):
|
|
"""Extract raw 3D attributes from a single 41-dim denormalized prediction.
|
|
|
|
Args:
|
|
pred_41: Denormalized prediction.
|
|
anchor_xy: Anchor point in grid coordinates.
|
|
stride: Anchor stride.
|
|
calib: Per-sample calibration.
|
|
face_type: Optional face index (0-3). When provided, decode depth/UV from the matching face branch.
|
|
pred_edge_60: Optional denormalized auxiliary edge prediction aligned to the same anchor.
|
|
|
|
Returns:
|
|
Dict with center, depth, dims, yaw, uv, and edge_yaw, or None if the requested branch is invalid.
|
|
"""
|
|
p = pred_41
|
|
rot_y = _decode_yaw_from_prediction(p)
|
|
dims = p[27:30].astype(np.float32)
|
|
|
|
if face_type is None:
|
|
z = float(p[24])
|
|
uv_offset = p[25:27]
|
|
edge_yaw = float("nan")
|
|
else:
|
|
off = FACE_OFFSETS_41[face_type]
|
|
z = float(p[off])
|
|
uv_offset = p[off + 1 : off + 3]
|
|
edge_yaw = decode_multi_visible_face_yaw_from_prediction(
|
|
p,
|
|
pred_edge_60,
|
|
anchor_xy,
|
|
stride,
|
|
calib,
|
|
fallback_face_type=face_type,
|
|
)
|
|
|
|
u = float((anchor_xy[0] + uv_offset[0]) * stride)
|
|
v = float((anchor_xy[1] + uv_offset[1]) * stride)
|
|
center = _back_project_metric_point(u, v, z, calib)
|
|
return {
|
|
"center": center,
|
|
"depth": z,
|
|
"dims": dims,
|
|
"yaw": float(rot_y),
|
|
"edge_yaw": float(edge_yaw),
|
|
"uv": np.array([u, v], dtype=np.float32),
|
|
"visible_face_type": None if face_type is None else int(face_type),
|
|
"face_center": None if face_type is None else center,
|
|
}
|
|
|
|
|
|
def face_center_from_corners(corners_3d, face_type):
|
|
"""Return the center point of one face from 3D box corners."""
|
|
if corners_3d is None or face_type not in FACE_CORNERS:
|
|
return None
|
|
corners = np.asarray(corners_3d, dtype=np.float32)
|
|
if corners.shape != (8, 3) or not np.isfinite(corners).all():
|
|
return None
|
|
return corners[list(FACE_CORNERS[face_type])].mean(axis=0)
|
|
|
|
|
|
def rebuild_box_corners_for_visualization(
|
|
corners_3d,
|
|
dims,
|
|
yaw,
|
|
visible_face_type=None,
|
|
face_center_3d=None,
|
|
box_center_3d=None,
|
|
):
|
|
"""Rebuild box corners for visualization while preserving the appropriate anchor.
|
|
|
|
Face-based objects stay anchored on the selected visible face center. Whole-box objects stay anchored
|
|
on the geometric box center.
|
|
"""
|
|
dims_arr = np.asarray(dims, dtype=np.float32)
|
|
if dims_arr.shape != (3,) or not np.isfinite(dims_arr).all() or not np.isfinite(float(yaw)):
|
|
return None
|
|
|
|
if visible_face_type is not None:
|
|
if face_center_3d is None:
|
|
face_center_3d = face_center_from_corners(corners_3d, int(visible_face_type))
|
|
else:
|
|
face_center_3d = np.asarray(face_center_3d, dtype=np.float32)
|
|
if face_center_3d is None or face_center_3d.shape != (3,) or not np.isfinite(face_center_3d).all():
|
|
return None
|
|
return compute_3d_box_corners(face_center_3d, dims_arr, float(yaw), face_type=int(visible_face_type))
|
|
|
|
if box_center_3d is not None:
|
|
box_center_3d = np.asarray(box_center_3d, dtype=np.float32)
|
|
if box_center_3d.shape != (3,) or not np.isfinite(box_center_3d).all():
|
|
return None
|
|
return compute_3d_box_corners(box_center_3d, dims_arr, float(yaw), face_type=-1)
|
|
|
|
corners = np.asarray(corners_3d, dtype=np.float32)
|
|
if corners.shape != (8, 3) or not np.isfinite(corners).all():
|
|
return None
|
|
return compute_3d_box_corners(corners.mean(axis=0), dims_arr, float(yaw), face_type=-1)
|
|
|
|
|
|
def extract_3d_attrs_from_gt(
|
|
target_42,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
face_type=None,
|
|
score_thr=FACE_VISIBILITY_SCORE_THRESH,
|
|
):
|
|
"""Extract raw 3D attributes from a single 42-dim GT label.
|
|
|
|
Args:
|
|
target_42: GT 42-dim label.
|
|
cls_id: Integer class ID.
|
|
calib: Per-sample calibration.
|
|
img_w: Image width in pixels.
|
|
img_h: Image height in pixels.
|
|
face_3d_classes: Class IDs that use face annotations.
|
|
complete_3d_classes: Class IDs with whole-box-only 3D labels.
|
|
face_type: Optional face index (0-3). When provided, decode only that GT-visible face.
|
|
score_thr: Minimum visible-face score used to treat a GT face as valid.
|
|
|
|
Returns:
|
|
Dict with center, depth, dims, yaw, uv, and edge_yaw, or None if the requested representation is invalid.
|
|
"""
|
|
t = target_42
|
|
z3d = t[2]
|
|
if np.isnan(z3d) or z3d <= 0:
|
|
return None
|
|
|
|
if cls_id not in face_3d_classes and cls_id not in complete_3d_classes:
|
|
return None
|
|
|
|
depth_scale = calib.get("depth_scale", 1.0) if calib else 1.0
|
|
dims = t[3:6].astype(np.float32)
|
|
rot_y = float(t[6])
|
|
edge_yaw = float("nan")
|
|
|
|
if face_type is None:
|
|
z = float(z3d * depth_scale)
|
|
u = float(t[7] * img_w)
|
|
v = float(t[8] * img_h)
|
|
else:
|
|
if cls_id not in face_3d_classes or face_type not in range(4):
|
|
return None
|
|
face = t[FACE_OFFSETS_42[face_type] : FACE_OFFSETS_42[face_type] + 8]
|
|
is_vis, score = face[7], face[6]
|
|
if is_vis != 1 or np.isnan(score) or score < score_thr:
|
|
return None
|
|
if np.isnan(face[2]) or face[2] <= 0:
|
|
return None
|
|
z = float(face[2] * depth_scale)
|
|
u = float(face[4] * img_w)
|
|
v = float(face[5] * img_h)
|
|
edge_yaw = decode_multi_visible_face_yaw_from_gt(
|
|
t,
|
|
cls_id,
|
|
calib,
|
|
img_w,
|
|
img_h,
|
|
face_3d_classes,
|
|
complete_3d_classes,
|
|
fallback_face_type=face_type,
|
|
score_thr=score_thr,
|
|
)
|
|
|
|
center = _back_project_metric_point(u, v, z, calib)
|
|
return {
|
|
"center": center,
|
|
"depth": z,
|
|
"dims": dims,
|
|
"yaw": rot_y,
|
|
"edge_yaw": float(edge_yaw),
|
|
"uv": np.array([u, v], dtype=np.float32),
|
|
"visible_face_type": None if face_type is None else int(face_type),
|
|
"face_center": None if face_type is None else center,
|
|
}
|
|
|
|
|
|
# ---- Bird's Eye View (BEV) visualization ----
|
|
|
|
def draw_bev_blank(max_range=200, lateral_range=50):
|
|
"""Create blank BEV canvas with distance grid.
|
|
|
|
Args:
|
|
max_range: Forward range in meters.
|
|
lateral_range: Lateral range in meters (±lateral_range).
|
|
|
|
Returns:
|
|
(bev_img, pixels_per_meter, ego_center_x, ego_center_y) tuple.
|
|
"""
|
|
ppm = 10 # pixels per meter
|
|
w = lateral_range * 2 * ppm
|
|
h = max_range * ppm
|
|
bev = np.ones((h, w, 3), dtype=np.uint8) * 40 # dark gray background
|
|
|
|
ego_cx = w // 2
|
|
ego_cy = h # bottom center
|
|
|
|
# Draw grid lines
|
|
for d in range(0, max_range + 1, 20):
|
|
y = ego_cy - d * ppm
|
|
if 0 <= y < h:
|
|
cv2.line(bev, (0, y), (w, y), (80, 80, 80), 1)
|
|
cv2.putText(bev, f"{d}m", (5, y - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (150, 150, 150), 1)
|
|
|
|
for l in range(-lateral_range, lateral_range + 1, 10):
|
|
x = ego_cx + l * ppm
|
|
if 0 <= x < w:
|
|
cv2.line(bev, (x, 0), (x, h), (80, 80, 80), 1)
|
|
|
|
# Ego vehicle marker
|
|
cv2.rectangle(bev, (ego_cx - 8, ego_cy - 20), (ego_cx + 8, ego_cy), (255, 200, 0), -1)
|
|
|
|
return bev, ppm, ego_cx, ego_cy
|
|
|
|
|
|
def draw_bev_object(bev_img, center_3d, dims, rot_y, ppm, ego_cx, ego_cy, is_pred=True):
|
|
"""Draw a single object on BEV image.
|
|
|
|
Args:
|
|
bev_img: BEV canvas image.
|
|
center_3d: (x, y, z) in camera coordinates (x=right, z=forward).
|
|
dims: (l, h, w) dimensions.
|
|
rot_y: Rotation angle in radians.
|
|
ppm: Pixels per meter.
|
|
ego_cx: Ego center x in pixels.
|
|
ego_cy: Ego center y in pixels.
|
|
is_pred: True for predictions (red), False for GT (green).
|
|
"""
|
|
x, _, z = center_3d
|
|
l, _, w = dims
|
|
|
|
if not (np.isfinite(x) and np.isfinite(z) and z > 0):
|
|
return
|
|
|
|
# Camera coords: x=right, z=forward → BEV: right=+x, up=+z
|
|
bev_x = int(ego_cx + x * ppm)
|
|
bev_y = int(ego_cy - z * ppm)
|
|
|
|
if not (0 <= bev_x < bev_img.shape[1] and 0 <= bev_y < bev_img.shape[0]):
|
|
return
|
|
|
|
color = (0, 0, 255) if is_pred else (0, 200, 0) # Red for pred, green for GT
|
|
|
|
# Draw rotated rectangle
|
|
rect = ((bev_x, bev_y), (int(w * ppm), int(l * ppm)), -np.degrees(rot_y))
|
|
box_pts = cv2.boxPoints(rect).astype(np.intp)
|
|
cv2.drawContours(bev_img, [box_pts], 0, color, 2)
|
|
|
|
# Arrow showing forward direction
|
|
dx = int(l * 0.5 * ppm * np.sin(rot_y))
|
|
dy = int(-l * 0.5 * ppm * np.cos(rot_y))
|
|
cv2.arrowedLine(bev_img, (bev_x, bev_y), (bev_x + dx, bev_y + dy), color, 1, tipLength=0.3)
|
|
|
|
|
|
def create_bev_image(gt_3d_attrs_list, pred_3d_attrs_list, max_range=200, lateral_range=50):
|
|
"""Create BEV visualization with GT (green) and predictions (red).
|
|
|
|
Args:
|
|
gt_3d_attrs_list: List of dicts with center, dims, yaw (from extract_3d_attrs_from_gt).
|
|
pred_3d_attrs_list: List of dicts with center, dims, yaw (from extract_3d_attrs_from_prediction).
|
|
max_range: Forward range in meters.
|
|
lateral_range: Lateral range in meters.
|
|
|
|
Returns:
|
|
RGB numpy image (H, W, 3).
|
|
"""
|
|
bev, ppm, ego_cx, ego_cy = draw_bev_blank(max_range, lateral_range)
|
|
|
|
# Draw GT objects (green, draw first so predictions overlay)
|
|
for attrs in gt_3d_attrs_list:
|
|
if attrs is not None:
|
|
draw_bev_object(bev, attrs["center"], attrs["dims"], attrs["yaw"],
|
|
ppm, ego_cx, ego_cy, is_pred=False)
|
|
|
|
# Draw predicted objects (red)
|
|
for attrs in pred_3d_attrs_list:
|
|
if attrs is not None:
|
|
draw_bev_object(bev, attrs["center"], attrs["dims"], attrs["yaw"],
|
|
ppm, ego_cx, ego_cy, is_pred=True)
|
|
|
|
# Add legend
|
|
cv2.putText(bev, "GT", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2)
|
|
cv2.putText(bev, "Pred", (10, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
|
|
|
|
return cv2.cvtColor(bev, cv2.COLOR_BGR2RGB)
|