#!/usr/bin/env python3 """ Visualize 2D FN cases from analyze_2d_fp_fn.py results on source images. This script is designed for image-based inspection of false negatives, especially FN-localization. It reads ``analysis_report.json``, reloads the corresponding GT/detections using the same evaluator pipeline, and saves: - frame-level overlays (all GT / active detections / highlighted FN targets) - per-example panels (full-frame + local crop) - a summary index JSON """ import argparse import json import sys from collections import defaultdict from pathlib import Path import cv2 import numpy as np REPO_ROOT = Path(__file__).resolve().parents[2] if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) from eval_tools.analysis.analyze_2d_fp_fn import build_config, class_name, parse_class_ids from eval_tools.evaluator.evaluator import Evaluator BOX_COLORS = { "gt_all": (80, 220, 80), # Keep normal detections visually quiet so highlighted error targets stand out. "det_all": (150, 150, 150), "fn_gt": (40, 40, 255), "fn_det": (0, 215, 255), "fp_det": (255, 0, 220), "fp_ref_gt": (255, 255, 0), "title_bg": (30, 30, 30), } def parse_args(): parser = argparse.ArgumentParser( description="Visualize FN cases from analysis_report.json on source images." ) parser.add_argument( "--analysis-report", type=str, required=True, help="Path to analysis_report.json generated by analyze_2d_fp_fn.py", ) parser.add_argument("--config", type=str, help="Path to YAML evaluation config") parser.add_argument("--det-path", type=str, help="Detection results root directory") parser.add_argument("--gt-path", type=str, help="Ground-truth labels root directory") parser.add_argument("--path-depth", type=int, choices=[1, 2], help="Directory depth") parser.add_argument( "--det-format", type=str, choices=["auto", "json", "txt"], help="Detection file format", ) parser.add_argument( "--gt-format", type=str, choices=["auto", "json", "txt"], help="Ground-truth file format", ) parser.add_argument("--img-width", type=int, help="Image width") parser.add_argument("--img-height", type=int, help="Image height") parser.add_argument( "--coord-system", type=str, choices=["camera", "ego"], help="Coordinate system used by the parser/evaluator", ) parser.add_argument( "--iou-threshold", type=float, help="IoU threshold used for evaluator loading", ) parser.add_argument( "--conf-threshold", type=float, help="Confidence threshold for active detections shown on overlays", ) parser.add_argument( "--mode", type=str, default="fn", choices=["fn", "fp", "both"], help="Which example pool to visualize", ) parser.add_argument( "--error-types", nargs="+", default=["localization"], help="Error types to visualize. Default: localization", ) parser.add_argument( "--classes", nargs="+", default=None, help="Optional class filter, e.g. vehicle pedestrian bicycle rider", ) parser.add_argument( "--case-names", nargs="+", default=None, help="Optional case-name filter", ) parser.add_argument( "--min-confidence", type=float, default=None, help="Minimum confidence for the associated detection (best det for FN, det confidence for FP)", ) parser.add_argument( "--max-confidence", type=float, default=None, help="Maximum confidence for the associated detection", ) parser.add_argument( "--min-distance", type=float, default=None, help="Minimum target distance in metres", ) parser.add_argument( "--max-distance", type=float, default=None, help="Maximum target distance in metres", ) parser.add_argument( "--max-best-iou", type=float, default=None, help="Maximum best IoU. Useful for focusing on badly localized examples.", ) parser.add_argument( "--top-k", type=int, default=200, help="Maximum number of examples to visualize after filtering", ) parser.add_argument( "--dedup-frame", action="store_true", help="Keep at most one example per case/frame/class/error combination", ) parser.add_argument( "--line-thickness", type=int, default=2, help="Base line thickness for non-highlight boxes", ) parser.add_argument( "--crop-scale", type=float, default=1.8, help="Expand crop window around GT/det union box by this factor", ) parser.add_argument( "--jpeg-quality", type=int, default=92, help="JPEG quality for saved visualizations", ) parser.add_argument( "--output-dir", type=str, default=None, help="Output directory. Defaults to evaluation_results/fn_vis_", ) return parser.parse_args() def get_confidence(example): if example.get("confidence") is not None: return float(example["confidence"]) if example.get("best_det_confidence") is not None: return float(example["best_det_confidence"]) return None def get_best_iou(example): if example.get("best_det_iou") is not None: return float(example["best_det_iou"]) return max( float(example.get("best_same_class_iou", 0.0)), float(example.get("best_other_class_iou", 0.0)), ) def normalize_token_set(values): if not values: return None return {str(v).strip().lower() for v in values if str(v).strip()} def rank_examples(examples): def key(item): conf = get_confidence(item) or 0.0 best_iou = get_best_iou(item) distance = item.get("distance_m") distance = float(distance) if distance is not None else -1.0 area = float(item.get("gt_bbox_area", item.get("det_bbox_area", 0.0)) or 0.0) return (conf, -best_iou, area, distance) return sorted(examples, key=key, reverse=True) def filter_examples(report, args): pools = [] if args.mode in ("fn", "both"): pools.extend(report.get("false_negative_examples", [])) if args.mode in ("fp", "both"): pools.extend(report.get("false_positive_examples", [])) class_filter = normalize_token_set(args.classes) error_filter = normalize_token_set(args.error_types) case_filter = set(args.case_names) if args.case_names else None filtered = [] for item in pools: class_str = str(item.get("class_name", "")).lower() error_type = str(item.get("error_type", "")).lower() case_name = item.get("case_name") conf = get_confidence(item) distance = item.get("distance_m") best_iou = get_best_iou(item) if class_filter and class_str not in class_filter: continue if error_filter and error_type not in error_filter: continue if case_filter and case_name not in case_filter: continue if args.min_confidence is not None and (conf is None or conf < args.min_confidence): continue if args.max_confidence is not None and (conf is None or conf > args.max_confidence): continue if args.min_distance is not None and (distance is None or float(distance) < args.min_distance): continue if args.max_distance is not None and (distance is None or float(distance) > args.max_distance): continue if args.max_best_iou is not None and best_iou > args.max_best_iou: continue filtered.append(item) filtered = rank_examples(filtered) if args.dedup_frame: deduped = [] seen = set() for item in filtered: key = ( item.get("case_name"), item.get("frame_name"), item.get("class_name"), item.get("error_type"), ) if key in seen: continue seen.add(key) deduped.append(item) filtered = deduped if args.top_k is not None: filtered = filtered[: args.top_k] return filtered def bbox_to_int(bbox): return [int(round(float(v))) for v in bbox] def get_example_gt_bbox(example): return example.get("gt_bbox") def get_example_det_bbox(example): if example.get("best_det_bbox") is not None: return example.get("best_det_bbox") return example.get("det_bbox") def is_fn_example(example): return example.get("gt_bbox") is not None def parse_generated_gt_index(gt_id): if not gt_id: return None gt_id = str(gt_id) if gt_id.startswith("gt_") and gt_id[3:].isdigit(): return int(gt_id[3:]) return None def resolve_reference_gt(example, gts): if not gts: return None, None, None def find_by_explicit_id(target_id): if target_id is None: return None for gt in gts: if gt.get("id") is not None and str(gt.get("id")) == str(target_id): return gt return None best_same_gt_id = example.get("best_same_gt_id") best_other_gt_id = example.get("best_other_gt_id") gt = find_by_explicit_id(best_same_gt_id) if gt is not None: return gt.get("bbox_2d"), class_name(gt["label"]), best_same_gt_id gt = find_by_explicit_id(best_other_gt_id) if gt is not None: return gt.get("bbox_2d"), class_name(gt["label"]), best_other_gt_id same_idx = parse_generated_gt_index(best_same_gt_id) if same_idx is not None: same_class_gts = [gt for gt in gts if gt["label"] == example.get("class_id")] if 0 <= same_idx < len(same_class_gts): gt = same_class_gts[same_idx] return gt.get("bbox_2d"), class_name(gt["label"]), best_same_gt_id other_idx = parse_generated_gt_index(best_other_gt_id) if other_idx is not None and 0 <= other_idx < len(gts): gt = gts[other_idx] return gt.get("bbox_2d"), class_name(gt["label"]), best_other_gt_id return None, None, None def get_target_box_color(example, kind): if is_fn_example(example): return BOX_COLORS["fn_gt"] if kind == "gt" else BOX_COLORS["fn_det"] if kind == "det": return BOX_COLORS["fp_det"] return BOX_COLORS["fp_ref_gt"] def draw_box(image, bbox, color, label=None, thickness=2): x1, y1, x2, y2 = bbox_to_int(bbox) cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness, cv2.LINE_AA) if label: (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 1) y_text = max(0, y1 - th - 8) cv2.rectangle(image, (x1, y_text), (x1 + tw + 8, y_text + th + 8), color, -1) cv2.putText( image, label, (x1 + 4, y_text + th + 2), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 1, cv2.LINE_AA, ) def add_header(image, text): h, w = image.shape[:2] overlay = image.copy() cv2.rectangle(overlay, (0, 0), (w, 42), BOX_COLORS["title_bg"], -1) cv2.addWeighted(overlay, 0.55, image, 0.45, 0, image) cv2.putText( image, text, (10, 28), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA, ) def make_crop(image, boxes, scale=1.8): h, w = image.shape[:2] valid = [bbox for bbox in boxes if bbox is not None] if not valid: return image.copy(), (0, 0) x1 = min(float(b[0]) for b in valid) y1 = min(float(b[1]) for b in valid) x2 = max(float(b[2]) for b in valid) y2 = max(float(b[3]) for b in valid) cx = 0.5 * (x1 + x2) cy = 0.5 * (y1 + y2) bw = max(32.0, (x2 - x1) * scale) bh = max(32.0, (y2 - y1) * scale) crop_x1 = max(0, int(round(cx - bw / 2))) crop_y1 = max(0, int(round(cy - bh / 2))) crop_x2 = min(w, int(round(cx + bw / 2))) crop_y2 = min(h, int(round(cy + bh / 2))) return image[crop_y1:crop_y2, crop_x1:crop_x2].copy(), (crop_x1, crop_y1) def draw_crop_panel(image, example, gts, crop_scale): gt_bbox = get_example_gt_bbox(example) det_bbox = get_example_det_bbox(example) ref_gt_bbox, ref_gt_class, ref_gt_id = resolve_reference_gt(example, gts) crop, (off_x, off_y) = make_crop( image, [gt_bbox, det_bbox, ref_gt_bbox], scale=crop_scale ) def shift_box(box): if box is None: return None return [ float(box[0]) - off_x, float(box[1]) - off_y, float(box[2]) - off_x, float(box[3]) - off_y, ] gt_local = shift_box(gt_bbox) det_local = shift_box(det_bbox) ref_gt_local = shift_box(ref_gt_bbox) if gt_local is not None: draw_box( crop, gt_local, get_target_box_color(example, "gt"), label=f"GT {example['class_name']}", thickness=3, ) elif ref_gt_local is not None: draw_box( crop, ref_gt_local, get_target_box_color(example, "gt"), label=f"RefGT {ref_gt_class or '-'}", thickness=3, ) if det_local is not None: conf = get_confidence(example) iou = get_best_iou(example) if example.get("best_det_bbox") is not None: label = f"BestDet {example.get('best_det_class', '-')}" if conf is not None: label += f" {conf:.2f}" label += f" IoU {iou:.3f}" else: label = f"FP Det {example.get('class_name', '-')}" if conf is not None: label += f" {conf:.2f}" label += f" IoU {iou:.3f}" draw_box(crop, det_local, get_target_box_color(example, "det"), label=label, thickness=3) add_header( crop, f"crop | {'FN' if is_fn_example(example) else 'FP'} | {example['class_name']} | {example['error_type']} | dist={example.get('distance_m')}", ) return crop def add_sidebar(panel, example): h, _ = panel.shape[:2] sidebar = np.full((h, 360, 3), 28, dtype=np.uint8) lines = [ f"case: {example.get('case_name')}", f"frame: {example.get('frame_name')}", f"class: {example.get('class_name')}", f"error: {example.get('error_type')}", f"mode: {'fn' if is_fn_example(example) else 'fp'}", f"gt_id: {example.get('gt_id', '-')}", f"ref_gt_id: {example.get('best_same_gt_id') or example.get('best_other_gt_id') or '-'}", f"best_det_id: {example.get('best_det_id', '-')}", f"best_det_cls: {example.get('best_det_class', '-')}", f"det_id: {example.get('det_id', '-')}", f"conf: {get_confidence(example)}", f"best_iou: {get_best_iou(example):.4f}", f"distance_m: {example.get('distance_m')}", f"lateral_m: {example.get('lateral_m')}", f"gt_area: {example.get('gt_bbox_area')}", f"det_area: {example.get('det_bbox_area')}", ] y = 36 for line in lines: cv2.putText( sidebar, str(line), (12, y), cv2.FONT_HERSHEY_SIMPLEX, 0.56, (235, 235, 235), 1, cv2.LINE_AA, ) y += 30 return np.hstack([panel, sidebar]) def resize_to_height(image, target_height): h, w = image.shape[:2] if h == target_height: return image scale = target_height / max(h, 1) return cv2.resize(image, (max(1, int(round(w * scale))), target_height)) def combine_full_and_crop(full_image, crop_image, example): target_h = max(full_image.shape[0], crop_image.shape[0]) full_resized = resize_to_height(full_image, target_h) crop_resized = resize_to_height(crop_image, target_h) panel = np.hstack([full_resized, crop_resized]) return add_sidebar(panel, example) def find_pair_map(config): evaluator = Evaluator( config=config, iou_threshold=float(config.get("matching", {}).get("iou_threshold", 0.5)), num_workers=1, save_detailed_matches=False, ) dataset_cfg = config["dataset"] image_cfg = config["image"] evaluator.load_data_from_paths( det_root=dataset_cfg["det_path"], gt_root=dataset_cfg["gt_path"], img_width=image_cfg.get("width", 1920), img_height=image_cfg.get("height", 1080), path_depth=dataset_cfg.get("path_depth", 1), det_format=dataset_cfg.get("det_format", "auto"), gt_format=dataset_cfg.get("gt_format", "auto"), ) pair_map = {} for pair in evaluator.image_pairs: level1_name = pair.get("level1_name") if level1_name: case_key = f"{level1_name}/{pair['case']}" else: case_key = pair["case"] pair_map[(case_key, pair["frame"])] = pair return pair_map, evaluator def find_image_path(pair): gt_file = Path(pair["gt_file"]) case_dir = gt_file.parent.parent images_dir = case_dir / "images" stem = gt_file.stem for suffix in (".png", ".jpg", ".jpeg", ".bmp"): candidate = images_dir / f"{stem}{suffix}" if candidate.exists(): return candidate matches = list(images_dir.glob(f"{stem}.*")) return matches[0] if matches else None def render_frame_overlay(image, gts, active_dets, frame_examples, class_ids, line_thickness): canvas = image.copy() selected_class_ids = set(class_ids) for gt in gts: if gt["label"] not in selected_class_ids: continue draw_box( canvas, gt["bbox_2d"], BOX_COLORS["gt_all"], label=f"GT {class_name(gt['label'])}", thickness=line_thickness, ) for det in active_dets: if det["label"] not in selected_class_ids: continue conf = float(det.get("confidence", 0.0)) draw_box( canvas, det["bbox_2d"], BOX_COLORS["det_all"], label=f"Det {class_name(det['label'])} {conf:.2f}", thickness=line_thickness, ) for idx, example in enumerate(frame_examples, 1): gt_bbox = get_example_gt_bbox(example) det_bbox = get_example_det_bbox(example) ref_gt_bbox, ref_gt_class, _ref_gt_id = resolve_reference_gt(example, gts) if gt_bbox is not None: draw_box( canvas, gt_bbox, get_target_box_color(example, "gt"), label=f"FN#{idx} GT {example['class_name']}", thickness=max(3, line_thickness + 1), ) elif ref_gt_bbox is not None: draw_box( canvas, ref_gt_bbox, get_target_box_color(example, "gt"), label=f"FP#{idx} RefGT {ref_gt_class or '-'}", thickness=max(3, line_thickness + 1), ) if det_bbox is not None: conf = get_confidence(example) iou = get_best_iou(example) if example.get("best_det_bbox") is not None: label = f"FN#{idx} BestDet {example.get('best_det_class', '-')}" if conf is not None: label += f" {conf:.2f}" label += f" IoU {iou:.3f}" else: label = f"FP#{idx} Det {example.get('class_name', '-')}" if conf is not None: label += f" {conf:.2f}" label += f" IoU {iou:.3f}" draw_box( canvas, det_bbox, get_target_box_color(example, "det"), label=label, thickness=max(3, line_thickness + 1), ) example_modes = {("FN" if is_fn_example(example) else "FP") for example in frame_examples} if len(example_modes) == 1: mode_label = next(iter(example_modes)) else: mode_label = "MIXED" headline = ( f"2D error visualization | mode={mode_label} | examples={len(frame_examples)} | " f"GT=green Det=orange FN-GT=red FN-det=yellow FP-det=magenta FP-refGT=cyan" ) add_header(canvas, headline) return canvas def ensure_dir(path): path.mkdir(parents=True, exist_ok=True) return path def sanitize_token(value): return str(value).replace("/", "__").replace("\\", "__").replace(" ", "_") def default_output_dir(report_path): report_path = Path(report_path) return report_path.parent / f"fn_vis_{report_path.stem}" def main(): args = parse_args() with open(args.analysis_report, "r") as file: report = json.load(file) config = build_config(args) class_ids = parse_class_ids(args.classes) if args.classes else parse_class_ids(report["summary"]["classes"]) filtered_examples = filter_examples(report, args) if not filtered_examples: print("No examples matched the current filters.") return pair_map, evaluator = find_pair_map(config) output_dir = Path(args.output_dir) if args.output_dir else default_output_dir(args.analysis_report) frame_dir = ensure_dir(output_dir / "frames") example_dir = ensure_dir(output_dir / "examples") by_frame = defaultdict(list) for item in filtered_examples: by_frame[(item["case_name"], item["frame_name"])].append(item) index = { "analysis_report": str(Path(args.analysis_report).resolve()), "num_examples": len(filtered_examples), "num_frames": len(by_frame), "mode": args.mode, "error_types": args.error_types, "classes": [class_name(cid) for cid in class_ids], "frames": [], } conf_threshold = float(config.get("metrics_2d", {}).get("conf_threshold", 0.5)) for frame_idx, ((case_name, frame_name), frame_examples) in enumerate(by_frame.items(), 1): pair = pair_map.get((case_name, frame_name)) if pair is None: print(f"Warning: failed to locate pair for {case_name}/{frame_name}, skipping") continue image_path = find_image_path(pair) if image_path is None or not image_path.exists(): print(f"Warning: image not found for {case_name}/{frame_name}, skipping") continue image = cv2.imread(str(image_path)) if image is None: print(f"Warning: failed to read image: {image_path}") continue gts = Evaluator._parse_ground_truths_for_pair(pair, evaluator.coord_system) dets = Evaluator._parse_detections_for_pair(pair, evaluator.coord_system) active_dets = [det for det in dets if float(det.get("confidence", 0.0)) >= conf_threshold] frame_overlay = render_frame_overlay( image, gts, active_dets, frame_examples, class_ids, line_thickness=args.line_thickness, ) frame_rel = Path("frames") / ( f"{frame_idx:04d}_{sanitize_token(case_name)}_{sanitize_token(frame_name)}.jpg" ) frame_path = output_dir / frame_rel cv2.imwrite( str(frame_path), frame_overlay, [int(cv2.IMWRITE_JPEG_QUALITY), int(args.jpeg_quality)], ) frame_entry = { "case_name": case_name, "frame_name": frame_name, "image_path": str(image_path), "frame_visualization": str(frame_rel), "num_examples": len(frame_examples), "examples": [], } for ex_idx, example in enumerate(frame_examples, 1): crop_image = draw_crop_panel( image.copy(), example, gts, crop_scale=args.crop_scale ) panel = combine_full_and_crop(frame_overlay.copy(), crop_image, example) rel = Path("examples") / ( f"{frame_idx:04d}_{ex_idx:02d}_" f"{sanitize_token(case_name)}_{sanitize_token(frame_name)}_" f"{sanitize_token(example['class_name'])}_{sanitize_token(example['error_type'])}.jpg" ) panel_path = output_dir / rel cv2.imwrite( str(panel_path), panel, [int(cv2.IMWRITE_JPEG_QUALITY), int(args.jpeg_quality)], ) example_record = dict(example) example_record["visualization"] = str(rel) frame_entry["examples"].append(example_record) index["frames"].append(frame_entry) index_path = output_dir / "index.json" with open(index_path, "w") as file: json.dump(index, file, indent=2) print(f"Saved visualization index to: {index_path}") print(f"Saved frame overlays to: {frame_dir}") print(f"Saved example panels to: {example_dir}") if __name__ == "__main__": main()