Files
yolov26_3d/tests/test_train_mono3d.py
2026-06-24 09:35:46 +08:00

656 lines
21 KiB
Python
Executable File

import json
from pathlib import Path
import cv2
import numpy as np
import pytest
from PIL import Image
from train_mono3d import resolve_data_yaml_for_roi
from ultralytics.data.dataset import Ground3DCalibrationError, YOLOGround3DDataset
from ultralytics.data.ground3d_augment import read_calib_from_path
from ultralytics.utils import YAML
def write_dataset_yaml(path: Path) -> None:
YAML.save(
file=path,
data={
"path": "/tmp/dataset",
"train": "train.txt",
"val": "val.txt",
"class_map": {"car": 0},
"default_roi": "roi0",
"roi_configs": {
"roi0": {
"roi": [1920, 880],
"virtual_fx": 537,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
"roi1": {
"roi": [768, 352],
"virtual_fx": 537,
"virtual_camera_prob": 0.5,
"virtual_camera_val_zoom": True,
"crop_center_mode": "vxvy",
},
},
},
)
def write_clip_level_camera4(calib_dir: Path, image_size: tuple[int, int], focal_u: float = 50.0) -> Path:
camera4_file = calib_dir / "L2_calib" / "camera4.json"
camera4_file.parent.mkdir(parents=True, exist_ok=True)
camera4_file.write_text(
json.dumps(
{
"focal_u": focal_u,
"focal_v": focal_u,
"cu": image_size[0] / 2,
"cv": image_size[1] / 2,
"pitch": 0.0,
"distort_coeffs": [],
}
),
encoding="utf-8",
)
return camera4_file
def create_ground3d_dataset(
tmp_path: Path,
image_sizes: list[tuple[int, int]],
imgsz: tuple[int, int] = (64, 32),
roi: tuple[int, int] | None = None,
ori_img_size: tuple[int, int] | None = None,
) -> tuple[YOLOGround3DDataset, list[str]]:
gt_root = tmp_path / "gt"
image_root = tmp_path / "dataset"
rel_labels = []
image_files = []
roi = roi or imgsz
ori_img_size = ori_img_size or imgsz
for idx, image_size in enumerate(image_sizes, start=1):
rel_label = Path(f"labels/seq{idx}/frame_{idx:04d}.txt")
label_file = gt_root / rel_label
image_file = image_root / "images" / f"seq{idx}" / f"frame_{idx:04d}.png"
clip_calib_dir = gt_root / "calib" / f"seq{idx}"
label_file.parent.mkdir(parents=True, exist_ok=True)
image_file.parent.mkdir(parents=True, exist_ok=True)
label_file.write_text("car 0.5 0.5 0.25 0.25 0\n", encoding="utf-8")
Image.new("RGB", image_size, color=(32, 64, 96)).save(image_file)
write_clip_level_camera4(clip_calib_dir, image_size)
rel_labels.append(rel_label.as_posix())
image_files.append(str(image_file.resolve()))
(gt_root / "train.txt").write_text("\n".join(rel_labels) + "\n", encoding="utf-8")
dataset = YOLOGround3DDataset(
img_path=str(gt_root / "train.txt"),
imgsz=list(imgsz),
batch_size=1,
augment=False,
rect=False,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
data={
"path": str(image_root),
"class_map": {"car": 0},
"roi": list(roi),
"ori_img_size": list(ori_img_size),
"virtual_fx": 50,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
)
return dataset, image_files
def test_resolve_data_yaml_for_roi_uses_default_roi(tmp_path):
data_yaml = tmp_path / "mono3d_ground.yaml"
write_dataset_yaml(data_yaml)
resolved_path, selected_roi = resolve_data_yaml_for_roi(str(data_yaml), None)
assert selected_roi == "roi0"
assert resolved_path != str(data_yaml)
resolved_cfg = YAML.load(resolved_path)
assert resolved_cfg["roi"] == [1920, 880]
assert resolved_cfg["virtual_camera_prob"] == -1.0
assert resolved_cfg["crop_center_mode"] == "cxvy"
assert "default_roi" not in resolved_cfg
assert "roi_configs" not in resolved_cfg
def test_resolve_data_yaml_for_roi_supports_explicit_override(tmp_path):
data_yaml = tmp_path / "mono3d_ground.yaml"
write_dataset_yaml(data_yaml)
resolved_path, selected_roi = resolve_data_yaml_for_roi(str(data_yaml), "roi1")
assert selected_roi == "roi1"
resolved_cfg = YAML.load(resolved_path)
assert resolved_cfg["roi"] == [768, 352]
assert resolved_cfg["virtual_camera_prob"] == 0.5
assert resolved_cfg["virtual_camera_val_zoom"] is True
assert resolved_cfg["crop_center_mode"] == "vxvy"
def test_resolve_data_yaml_for_roi_uses_unique_temp_paths(tmp_path):
data_yaml = tmp_path / "mono3d_ground.yaml"
write_dataset_yaml(data_yaml)
resolved_path_a, selected_roi_a = resolve_data_yaml_for_roi(str(data_yaml), "roi1")
resolved_path_b, selected_roi_b = resolve_data_yaml_for_roi(str(data_yaml), "roi1")
assert selected_roi_a == "roi1"
assert selected_roi_b == "roi1"
assert resolved_path_a != resolved_path_b
def test_resolve_data_yaml_for_roi_rejects_unknown_preset(tmp_path):
data_yaml = tmp_path / "mono3d_ground.yaml"
write_dataset_yaml(data_yaml)
with pytest.raises(ValueError, match="Available presets: roi0, roi1"):
resolve_data_yaml_for_roi(str(data_yaml), "roi2")
def test_resolve_data_yaml_for_roi_rejects_missing_required_ground3d_fields(tmp_path):
data_yaml = tmp_path / "mono3d_ground.yaml"
write_dataset_yaml(data_yaml)
data_cfg = YAML.load(data_yaml)
del data_cfg["roi_configs"]["roi1"]["crop_center_mode"]
YAML.save(data_yaml, data_cfg)
with pytest.raises(ValueError, match="crop_center_mode"):
resolve_data_yaml_for_roi(str(data_yaml), "roi1")
def test_ground3d_dataset_resolves_gt_list_to_image_and_calib(tmp_path):
gt_root = tmp_path / "gt"
image_root = tmp_path / "dataset"
rel_label = Path("labels/seq0/frame_0001.txt")
label_file = gt_root / rel_label
image_file = image_root / "images" / "seq0" / "frame_0001.png"
clip_calib_dir = gt_root / "calib" / "seq0"
label_file.parent.mkdir(parents=True, exist_ok=True)
image_file.parent.mkdir(parents=True, exist_ok=True)
label_file.write_text("car 0.5 0.5 0.25 0.25 0\n", encoding="utf-8")
Image.new("RGB", (64, 32), color=(32, 64, 96)).save(image_file)
write_clip_level_camera4(clip_calib_dir, (64, 32))
(gt_root / "train.txt").write_text(f"{rel_label.as_posix()}\n", encoding="utf-8")
dataset = YOLOGround3DDataset(
img_path=str(gt_root / "train.txt"),
imgsz=[64, 32],
batch_size=1,
augment=False,
rect=False,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
data={
"path": str(image_root),
"class_map": {"car": 0},
"roi": [64, 32],
"ori_img_size": [64, 32],
"virtual_fx": 50,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
)
assert len(dataset.labels) == 1
assert dataset.labels[0] == (str(gt_root.resolve()), rel_label.as_posix())
raw_calib = read_calib_from_path(
str(image_file.resolve()),
image_root=image_root,
extra_calib_candidates=[str((gt_root / "calib" / "seq0" / "frame_0001.json").resolve())],
)
assert raw_calib["focal_u"] == 50.0
sample = dataset.get_image_and_label(0)
assert sample["im_file"] == str(image_file.resolve())
assert sample["img"].shape[:2] == (32, 64)
assert sample["calib"]["fx"] == pytest.approx(50.0)
def test_ground3d_dataset_prefers_label_root_calibration_over_image_root(tmp_path):
gt_root = tmp_path / "gt"
image_root = tmp_path / "dataset"
rel_label = Path("labels/seq0/frame_0001.txt")
label_file = gt_root / rel_label
image_file = image_root / "images" / "seq0" / "frame_0001.png"
label_calib_dir = gt_root / "calib" / "seq0"
image_calib_file = image_root / "calib" / "seq0" / "frame_0001.json"
label_file.parent.mkdir(parents=True, exist_ok=True)
image_file.parent.mkdir(parents=True, exist_ok=True)
image_calib_file.parent.mkdir(parents=True, exist_ok=True)
label_file.write_text("car 0.5 0.5 0.25 0.25 0\n", encoding="utf-8")
Image.new("RGB", (64, 32), color=(32, 64, 96)).save(image_file)
write_clip_level_camera4(label_calib_dir, (64, 32), focal_u=80.0)
image_calib_file.write_text(
json.dumps({"focal_u": 50.0, "focal_v": 50.0, "cu": 32.0, "cv": 16.0, "pitch": 0.0, "distort_coeffs": []}),
encoding="utf-8",
)
(gt_root / "train.txt").write_text(f"{rel_label.as_posix()}\n", encoding="utf-8")
dataset = YOLOGround3DDataset(
img_path=str(gt_root / "train.txt"),
imgsz=[64, 32],
batch_size=1,
augment=False,
rect=False,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
data={
"path": str(image_root),
"class_map": {"car": 0},
"roi": [64, 32],
"ori_img_size": [64, 32],
"virtual_fx": 50,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
)
sample = dataset.get_image_and_label(0)
assert sample["calib"]["fx"] == pytest.approx(80.0)
def test_ground3d_dataset_reads_clip_level_camera4_from_label_root(tmp_path):
gt_root = tmp_path / "gt_20260320"
image_root = tmp_path / "dataset_20260202"
rel_label = Path("seq0/clip0/labels/frame_0001.txt")
label_file = gt_root / rel_label
image_file = image_root / "seq0" / "clip0" / "images" / "frame_0001.png"
clip_calib_file = gt_root / "seq0" / "clip0" / "calib" / "L2_calib" / "camera4.json"
label_file.parent.mkdir(parents=True, exist_ok=True)
image_file.parent.mkdir(parents=True, exist_ok=True)
clip_calib_file.parent.mkdir(parents=True, exist_ok=True)
label_file.write_text(
"car 0.5 0.5 0.25 0.25 1 2 3 4 5 6 0.1 0.2 0.3 9 10 11 12 0\n",
encoding="utf-8",
)
Image.new("RGB", (1920, 1080), color=(32, 64, 96)).save(image_file)
clip_calib_file.write_text(
json.dumps(
{
"focal_u": 1450.9230324555967,
"focal_v": 1458.0023697476843,
"cu": 949.5149041625389,
"cv": 569.9146363123367,
"distort_coeffs": [-0.6, 0.7, -0.5, 0.2],
"pitch": 0.214,
"roll": 1.077,
"yaw": -0.643,
}
),
encoding="utf-8",
)
(gt_root / "train.txt").write_text(f"./{rel_label.as_posix()}\n", encoding="utf-8")
raw_calib = read_calib_from_path(str(image_file.resolve()), image_root=image_root, extra_calib_candidates=[
str((gt_root / "seq0" / "clip0" / "calib" / "frame_0001.json").resolve())
])
assert raw_calib is not None
assert raw_calib["focal_u"] == pytest.approx(1450.9230324555967)
assert raw_calib["pitch"] == pytest.approx(np.deg2rad(0.214))
dataset = YOLOGround3DDataset(
img_path=str(gt_root / "train.txt"),
imgsz=[768, 352],
batch_size=1,
augment=False,
rect=False,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
data={
"path": str(image_root),
"class_map": {"car": 0},
"complete_3d_classes": [0],
"roi": [768, 352],
"ori_img_size": [1920, 1080],
"virtual_fx": 537,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
)
sample = dataset.get_image_and_label(0)
assert sample["im_file"] == str(image_file.resolve())
assert sample["calib"]["fx"] > 0
def test_ground3d_dataset_applies_class_filter_and_rect_lazily(tmp_path):
gt_root = tmp_path / "gt"
image_root = tmp_path / "dataset"
rel_label = Path("labels/seq0/frame_0001.txt")
label_file = gt_root / rel_label
image_file = image_root / "images" / "seq0" / "frame_0001.png"
clip_calib_dir = gt_root / "calib" / "seq0"
label_file.parent.mkdir(parents=True, exist_ok=True)
image_file.parent.mkdir(parents=True, exist_ok=True)
label_file.write_text(
"car 0.5 0.5 0.25 0.25 0\ntruck 0.4 0.4 0.2 0.2 0\n",
encoding="utf-8",
)
Image.new("RGB", (64, 32), color=(32, 64, 96)).save(image_file)
write_clip_level_camera4(clip_calib_dir, (64, 32))
(gt_root / "train.txt").write_text(f"{rel_label.as_posix()}\n", encoding="utf-8")
dataset = YOLOGround3DDataset(
img_path=str(gt_root / "train.txt"),
imgsz=[64, 32],
batch_size=1,
augment=False,
rect=True,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
classes=[1],
data={
"path": str(image_root),
"class_map": {"car": 0, "truck": 1},
"roi": [64, 32],
"ori_img_size": [64, 32],
"virtual_fx": 50,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
)
assert dataset.batch.shape == (1,)
assert dataset.batch_shapes.shape == (1, 2)
sample = dataset.get_image_and_label(0)
assert sample["cls"].reshape(-1).tolist() == [1.0]
def test_ground3d_dataset_keeps_missing_3d_targets_as_nan_for_2d_only_labels(tmp_path):
gt_root = tmp_path / "gt"
image_root = tmp_path / "dataset"
rel_label = Path("labels/seq0/frame_0001.txt")
label_file = gt_root / rel_label
image_file = image_root / "images" / "seq0" / "frame_0001.png"
clip_calib_dir = gt_root / "calib" / "seq0"
label_file.parent.mkdir(parents=True, exist_ok=True)
image_file.parent.mkdir(parents=True, exist_ok=True)
label_file.write_text("car 0.5 0.5 0.25 0.25 1 0\n", encoding="utf-8")
Image.new("RGB", (64, 32), color=(32, 64, 96)).save(image_file)
write_clip_level_camera4(clip_calib_dir, (64, 32))
(gt_root / "train.txt").write_text(f"{rel_label.as_posix()}\n", encoding="utf-8")
dataset = YOLOGround3DDataset(
img_path=str(gt_root / "train.txt"),
imgsz=[64, 32],
batch_size=1,
augment=False,
rect=False,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
data={
"path": str(image_root),
"class_map": {"car": 0},
"roi": [64, 32],
"ori_img_size": [64, 32],
"virtual_fx": 50,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
)
raw_sample = dataset.get_image_and_label(0)
assert raw_sample["labels_3d"].shape == (1, 42)
assert np.isnan(raw_sample["labels_3d"]).all()
sample = dataset[0]
assert sample["labels_3d"].shape == (1, 42)
assert sample["labels_3d"].isnan().all().item()
def test_ground3d_dataset_falls_back_to_jpg_when_png_is_missing(tmp_path):
gt_root = tmp_path / "gt"
image_root = tmp_path / "dataset"
rel_label = Path("labels/seq0/frame_0001.txt")
label_file = gt_root / rel_label
image_file = image_root / "images" / "seq0" / "frame_0001.jpg"
clip_calib_dir = gt_root / "calib" / "seq0"
label_file.parent.mkdir(parents=True, exist_ok=True)
image_file.parent.mkdir(parents=True, exist_ok=True)
label_file.write_text("car 0.5 0.5 0.25 0.25 0\n", encoding="utf-8")
Image.new("RGB", (64, 32), color=(32, 64, 96)).save(image_file)
write_clip_level_camera4(clip_calib_dir, (64, 32))
(gt_root / "train.txt").write_text(f"{rel_label.as_posix()}\n", encoding="utf-8")
dataset = YOLOGround3DDataset(
img_path=str(gt_root / "train.txt"),
imgsz=[64, 32],
batch_size=1,
augment=False,
rect=False,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
data={
"path": str(image_root),
"class_map": {"car": 0},
"roi": [64, 32],
"ori_img_size": [64, 32],
"virtual_fx": 50,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
)
sample = dataset.get_image_and_label(0)
assert sample["im_file"] == str(image_file.resolve())
assert sample["img"].shape[:2] == (32, 64)
def test_ground3d_dataset_skips_to_next_image_when_imread_fails(tmp_path, monkeypatch):
dataset, image_files = create_ground3d_dataset(tmp_path, [(64, 32), (64, 32)])
original_imread = cv2.imread
def fake_imread(path, flags):
if str(path) == image_files[0]:
return None
return original_imread(path, flags)
monkeypatch.setattr(cv2, "imread", fake_imread)
sample = dataset[0]
assert sample["im_file"] == image_files[1]
assert dataset._bad_image_mask[0]
def test_ground3d_dataset_allows_missing_calibration_for_2d_only_samples(tmp_path):
dataset, image_files = create_ground3d_dataset(
tmp_path,
[(128, 64), (128, 64)],
imgsz=(64, 32),
roi=(64, 32),
ori_img_size=(128, 64),
)
first_calib = tmp_path / "gt" / "calib" / "seq1" / "L2_calib" / "camera4.json"
first_calib.unlink()
sample = dataset.get_image_and_label(0)
assert sample["im_file"] == image_files[0]
assert sample["ori_shape"] == (32, 64)
assert sample["img"].shape[:2] == (32, 64)
assert sample["camera_mode"] == "roi"
def test_ground3d_dataset_fails_on_missing_calibration_for_3d_samples(tmp_path):
gt_root = tmp_path / "gt"
image_root = tmp_path / "dataset"
rel_label = Path("labels/seq0/frame_0001.txt")
label_file = gt_root / rel_label
image_file = image_root / "images" / "seq0" / "frame_0001.png"
clip_calib_file = gt_root / "calib" / "seq0" / "L2_calib" / "camera4.json"
label_file.parent.mkdir(parents=True, exist_ok=True)
image_file.parent.mkdir(parents=True, exist_ok=True)
clip_calib_file.parent.mkdir(parents=True, exist_ok=True)
# 19-col complete_3d label: class + 18 numeric fields.
label_file.write_text("car 0.5 0.5 0.25 0.25 1 2 3 4 5 6 0.1 0.2 0.3 9 10 11 12 0\n", encoding="utf-8")
Image.new("RGB", (64, 32), color=(32, 64, 96)).save(image_file)
clip_calib_file.write_text(
json.dumps({"focal_u": 50.0, "focal_v": 50.0, "cu": 32.0, "cv": 16.0, "pitch": 0.0, "distort_coeffs": []}),
encoding="utf-8",
)
(gt_root / "train.txt").write_text(f"{rel_label.as_posix()}\n", encoding="utf-8")
clip_calib_file.unlink()
dataset = YOLOGround3DDataset(
img_path=str(gt_root / "train.txt"),
imgsz=[64, 32],
batch_size=1,
augment=False,
rect=False,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
data={
"path": str(image_root),
"class_map": {"car": 0},
"complete_3d_classes": [0],
"roi": [64, 32],
"ori_img_size": [64, 32],
"virtual_fx": 50,
"virtual_camera_prob": -1.0,
"crop_center_mode": "cxvy",
},
)
with pytest.raises(Ground3DCalibrationError, match="calibration file not found"):
dataset[0]
def test_ground3d_dataset_rejects_missing_required_ground3d_fields(tmp_path):
with pytest.raises(ValueError, match="virtual_camera_prob, crop_center_mode"):
YOLOGround3DDataset(
img_path=str(tmp_path / "unused.txt"),
imgsz=[64, 32],
batch_size=1,
augment=False,
rect=False,
stride=32,
pad=0.5,
prefix="test: ",
task="detect",
data={
"path": str(tmp_path / "dataset"),
"class_map": {"car": 0},
"roi": [64, 32],
"ori_img_size": [64, 32],
"virtual_fx": 50,
},
)
def test_ground3d_dataset_skips_images_with_invalid_decoded_shape(tmp_path):
dataset, image_files = create_ground3d_dataset(tmp_path, [(32, 16), (64, 32)])
sample = dataset[0]
assert sample["im_file"] == image_files[1]
assert dataset._bad_image_mask[0]
def test_ground3d_dataset_resizes_in_half_steps_for_quarter_scale(tmp_path, monkeypatch):
dataset, _ = create_ground3d_dataset(
tmp_path,
[(256, 128)],
imgsz=(64, 32),
roi=(256, 128),
ori_img_size=(256, 128),
)
original_resize = cv2.resize
resize_calls = []
def tracked_resize(img, dsize, *args, **kwargs):
resize_calls.append(dsize)
return original_resize(img, dsize, *args, **kwargs)
monkeypatch.setattr(cv2, "resize", tracked_resize)
sample = dataset.get_image_and_label(0)
assert resize_calls == [(128, 64), (64, 32)]
assert sample["img"].shape[:2] == (32, 64)
assert sample["calib"]["fx"] == pytest.approx(12.5)
def test_ground3d_dataset_resizes_in_half_steps_then_remainder(tmp_path, monkeypatch):
dataset, _ = create_ground3d_dataset(
tmp_path,
[(160, 80)],
imgsz=(64, 32),
roi=(160, 80),
ori_img_size=(160, 80),
)
original_resize = cv2.resize
resize_calls = []
def tracked_resize(img, dsize, *args, **kwargs):
resize_calls.append(dsize)
return original_resize(img, dsize, *args, **kwargs)
monkeypatch.setattr(cv2, "resize", tracked_resize)
sample = dataset.get_image_and_label(0)
assert resize_calls == [(80, 40), (64, 32)]
assert sample["img"].shape[:2] == (32, 64)
assert sample["calib"]["fx"] == pytest.approx(20.0)