85 lines
4.3 KiB
YAML
Executable File
85 lines
4.3 KiB
YAML
Executable File
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
||
|
||
# Ground 3D Detection Dataset for Mono3D
|
||
# Joint 2D + 3D detection with fisheye camera support
|
||
# Label format: 19-col (complete_3d) or 51-col (face_3d) per object
|
||
|
||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||
path: /mnt/nfs/mono3d/xdzhu_data/Mono3d/Mono3d_4face_2m_g1m3/driving_png_20260202 # dataset root dir of images
|
||
train: /mnt/nfs/mono3d/xdzhu_data/Mono3d/Mono3d_4face_2m_g1m3/driving_png_20260320/train.txt # train GT list
|
||
val: /mnt/nfs/mono3d/xdzhu_data/Mono3d/Mono3d_4face_2m_g1m3/driving_png_20260320/val.txt # val GT list
|
||
test: # test images (optional)
|
||
|
||
# Class mapping: string class names to numeric IDs
|
||
# Format: class_name: class_id (allows easy merging, e.g., car: 0, van: 0)
|
||
class_map:
|
||
car: 0
|
||
suv: 1
|
||
pickup: 2
|
||
medium_car: 3
|
||
van: 4
|
||
bus: 5
|
||
truck: 6
|
||
tanker: 6
|
||
large_truck: 6
|
||
construction_vehicle: 6
|
||
special_vehicle: 7
|
||
unknown: 8
|
||
pedestrian: 9
|
||
bicyclist: 10
|
||
motorcyclist: 10
|
||
bicycle: 11
|
||
motorcycle: 11
|
||
tricycle: 12
|
||
tricyclist: 12
|
||
traffic_sign: 13
|
||
wheel: 14
|
||
plate: 15
|
||
face: 16
|
||
car_fake: 17
|
||
bicyclist_fake: 18
|
||
pedestrian_fake: 19
|
||
car_carrier: 6
|
||
platform_truck: 6
|
||
|
||
# Training parameters
|
||
min_wh: 8.0 # Keep boxes whose width or height is at least this many pixels
|
||
|
||
# Color space of input images
|
||
use_yuv444: false # Standard RGB/BGR images (not YUV444)
|
||
|
||
# Difficulty-based loss weighting:
|
||
difficulty_weights: [1.0, 1.0, 0.7, 0.3]
|
||
|
||
# 3D Detection parameters
|
||
# Class groups for 3D label parsing (by mapped class ID)
|
||
face_3d_classes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 17] # vehicles with 4-face annotations (51-col labels)
|
||
complete_3d_classes: [9, 10, 11, 12, 18, 19] # pedestrian/bike with whole-box 3D only (19-col labels)
|
||
fake_3d_classes: [17, 18, 19] # fake classes with an additional dedicated 3D prediction head
|
||
|
||
# Camera
|
||
ori_img_size: [1920, 1080] # Original image size [width, height]
|
||
|
||
# ROI-specific presets resolved by train_mono3d.py via --roi=<name>
|
||
default_roi: roi0
|
||
roi_configs:
|
||
roi0:
|
||
roi: [1920, 880] # ROI size [width, height], crop from [w//2, vanishing_point_y] 保留整宽,只裁掉一部分上下区域,适合大视野。
|
||
virtual_fx: 537 # Target focal length x for virtual camera (pixels) 537 本身不是代码实时算出来的,是人为选定的目标焦距/归一化基准。一般来自目标输入尺寸、ROI 后典型焦距或历史模型设定
|
||
virtual_camera_prob: -1.0 # -1 = always use virtual_fx only, >0 = probability of virtual camera augmentation 不会走 virtual-camera 分支,只走普通 ROI crop + resize + depth normalize
|
||
crop_center_mode: cxvy # Crop center mode: cxvy = crop around image center x and vanishing point y
|
||
roi1:
|
||
roi: [768, 352] # ROI size [width, height], crop from [vanishing_point_x, vanishing_point_y]
|
||
virtual_fx: 537 # Target focal length x for virtual camera (pixels)
|
||
virtual_camera_prob: 0.5 # 有50% 概率走 virtual-camera 分支
|
||
virtual_camera_val_zoom: true # Enable virtual-camera random zoom during validation/TensorBoard while keeping crop center fixed
|
||
crop_center_mode: vxvy # Crop center mode: vxvy = crop around vanishing point x and vanishing point y
|
||
|
||
# 3D Normalization scales (for loss computation) 通常应来自离线统计,比如训练集 z 深度和尺寸的均值/尺度,或者沿用历史模型经验值 【1. 给 3D head 一个合理物理量先验; 2. 让 z、size loss 的量级更稳定; 3. 保持训练输出和 decode/可视化使用同一套尺度】
|
||
norm_scales_3d:
|
||
z3d_scale: 24.415 # z3d_norm = (z3d - z3d_offset) / z3d_scale 真实深度为39.937 米时,模型预测0, 真值每多预测24.415米,模型预测值+1
|
||
z3d_offset: 39.937
|
||
size_scale: 1.945 # size_norm = (size - size_offset) / size_scale 长度为3.78米时,模型预测0, 真值每多1.945米,模型预测值+1
|
||
size_offset: 3.780
|
||
yaw_scale: 1.5707963 # pi/2
|