yolov26_3d/eval_tools/analysis/visualize_2d_fn_cases.py

#!/usr/bin/env python3
"""
Visualize 2D FN cases from analyze_2d_fp_fn.py results on source images.

This script is designed for image-based inspection of false negatives,
especially FN-localization. It reads ``analysis_report.json``, reloads the
corresponding GT/detections using the same evaluator pipeline, and saves:

  - frame-level overlays (all GT / active detections / highlighted FN targets)
  - per-example panels (full-frame + local crop)
  - a summary index JSON
"""

import argparse
import json
import sys
from collections import defaultdict
from pathlib import Path

import cv2
import numpy as np


REPO_ROOT = Path(__file__).resolve().parents[2]
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from eval_tools.analysis.analyze_2d_fp_fn import build_config, class_name, parse_class_ids
from eval_tools.evaluator.evaluator import Evaluator


BOX_COLORS = {
    "gt_all": (80, 220, 80),
    # Keep normal detections visually quiet so highlighted error targets stand out.
    "det_all": (150, 150, 150),
    "fn_gt": (40, 40, 255),
    "fn_det": (0, 215, 255),
    "fp_det": (255, 0, 220),
    "fp_ref_gt": (255, 255, 0),
    "title_bg": (30, 30, 30),
}


def parse_args():
    parser = argparse.ArgumentParser(
        description="Visualize FN cases from analysis_report.json on source images."
    )
    parser.add_argument(
        "--analysis-report",
        type=str,
        required=True,
        help="Path to analysis_report.json generated by analyze_2d_fp_fn.py",
    )
    parser.add_argument("--config", type=str, help="Path to YAML evaluation config")
    parser.add_argument("--det-path", type=str, help="Detection results root directory")
    parser.add_argument("--gt-path", type=str, help="Ground-truth labels root directory")
    parser.add_argument("--path-depth", type=int, choices=[1, 2], help="Directory depth")
    parser.add_argument(
        "--det-format",
        type=str,
        choices=["auto", "json", "txt"],
        help="Detection file format",
    )
    parser.add_argument(
        "--gt-format",
        type=str,
        choices=["auto", "json", "txt"],
        help="Ground-truth file format",
    )
    parser.add_argument("--img-width", type=int, help="Image width")
    parser.add_argument("--img-height", type=int, help="Image height")
    parser.add_argument(
        "--coord-system",
        type=str,
        choices=["camera", "ego"],
        help="Coordinate system used by the parser/evaluator",
    )
    parser.add_argument(
        "--iou-threshold",
        type=float,
        help="IoU threshold used for evaluator loading",
    )
    parser.add_argument(
        "--conf-threshold",
        type=float,
        help="Confidence threshold for active detections shown on overlays",
    )
    parser.add_argument(
        "--mode",
        type=str,
        default="fn",
        choices=["fn", "fp", "both"],
        help="Which example pool to visualize",
    )
    parser.add_argument(
        "--error-types",
        nargs="+",
        default=["localization"],
        help="Error types to visualize. Default: localization",
    )
    parser.add_argument(
        "--classes",
        nargs="+",
        default=None,
        help="Optional class filter, e.g. vehicle pedestrian bicycle rider",
    )
    parser.add_argument(
        "--case-names",
        nargs="+",
        default=None,
        help="Optional case-name filter",
    )
    parser.add_argument(
        "--min-confidence",
        type=float,
        default=None,
        help="Minimum confidence for the associated detection (best det for FN, det confidence for FP)",
    )
    parser.add_argument(
        "--max-confidence",
        type=float,
        default=None,
        help="Maximum confidence for the associated detection",
    )
    parser.add_argument(
        "--min-distance",
        type=float,
        default=None,
        help="Minimum target distance in metres",
    )
    parser.add_argument(
        "--max-distance",
        type=float,
        default=None,
        help="Maximum target distance in metres",
    )
    parser.add_argument(
        "--max-best-iou",
        type=float,
        default=None,
        help="Maximum best IoU. Useful for focusing on badly localized examples.",
    )
    parser.add_argument(
        "--top-k",
        type=int,
        default=200,
        help="Maximum number of examples to visualize after filtering",
    )
    parser.add_argument(
        "--dedup-frame",
        action="store_true",
        help="Keep at most one example per case/frame/class/error combination",
    )
    parser.add_argument(
        "--line-thickness",
        type=int,
        default=2,
        help="Base line thickness for non-highlight boxes",
    )
    parser.add_argument(
        "--crop-scale",
        type=float,
        default=1.8,
        help="Expand crop window around GT/det union box by this factor",
    )
    parser.add_argument(
        "--jpeg-quality",
        type=int,
        default=92,
        help="JPEG quality for saved visualizations",
    )
    parser.add_argument(
        "--output-dir",
        type=str,
        default=None,
        help="Output directory. Defaults to evaluation_results/fn_vis_<report_name>",
    )
    return parser.parse_args()


def get_confidence(example):
    if example.get("confidence") is not None:
        return float(example["confidence"])
    if example.get("best_det_confidence") is not None:
        return float(example["best_det_confidence"])
    return None


def get_best_iou(example):
    if example.get("best_det_iou") is not None:
        return float(example["best_det_iou"])
    return max(
        float(example.get("best_same_class_iou", 0.0)),
        float(example.get("best_other_class_iou", 0.0)),
    )


def normalize_token_set(values):
    if not values:
        return None
    return {str(v).strip().lower() for v in values if str(v).strip()}


def rank_examples(examples):
    def key(item):
        conf = get_confidence(item) or 0.0
        best_iou = get_best_iou(item)
        distance = item.get("distance_m")
        distance = float(distance) if distance is not None else -1.0
        area = float(item.get("gt_bbox_area", item.get("det_bbox_area", 0.0)) or 0.0)
        return (conf, -best_iou, area, distance)

    return sorted(examples, key=key, reverse=True)


def filter_examples(report, args):
    pools = []
    if args.mode in ("fn", "both"):
        pools.extend(report.get("false_negative_examples", []))
    if args.mode in ("fp", "both"):
        pools.extend(report.get("false_positive_examples", []))

    class_filter = normalize_token_set(args.classes)
    error_filter = normalize_token_set(args.error_types)
    case_filter = set(args.case_names) if args.case_names else None

    filtered = []
    for item in pools:
        class_str = str(item.get("class_name", "")).lower()
        error_type = str(item.get("error_type", "")).lower()
        case_name = item.get("case_name")
        conf = get_confidence(item)
        distance = item.get("distance_m")
        best_iou = get_best_iou(item)

        if class_filter and class_str not in class_filter:
            continue
        if error_filter and error_type not in error_filter:
            continue
        if case_filter and case_name not in case_filter:
            continue
        if args.min_confidence is not None and (conf is None or conf < args.min_confidence):
            continue
        if args.max_confidence is not None and (conf is None or conf > args.max_confidence):
            continue
        if args.min_distance is not None and (distance is None or float(distance) < args.min_distance):
            continue
        if args.max_distance is not None and (distance is None or float(distance) > args.max_distance):
            continue
        if args.max_best_iou is not None and best_iou > args.max_best_iou:
            continue
        filtered.append(item)

    filtered = rank_examples(filtered)

    if args.dedup_frame:
        deduped = []
        seen = set()
        for item in filtered:
            key = (
                item.get("case_name"),
                item.get("frame_name"),
                item.get("class_name"),
                item.get("error_type"),
            )
            if key in seen:
                continue
            seen.add(key)
            deduped.append(item)
        filtered = deduped

    if args.top_k is not None:
        filtered = filtered[: args.top_k]

    return filtered


def bbox_to_int(bbox):
    return [int(round(float(v))) for v in bbox]


def get_example_gt_bbox(example):
    return example.get("gt_bbox")


def get_example_det_bbox(example):
    if example.get("best_det_bbox") is not None:
        return example.get("best_det_bbox")
    return example.get("det_bbox")


def is_fn_example(example):
    return example.get("gt_bbox") is not None


def parse_generated_gt_index(gt_id):
    if not gt_id:
        return None
    gt_id = str(gt_id)
    if gt_id.startswith("gt_") and gt_id[3:].isdigit():
        return int(gt_id[3:])
    return None


def resolve_reference_gt(example, gts):
    if not gts:
        return None, None, None

    def find_by_explicit_id(target_id):
        if target_id is None:
            return None
        for gt in gts:
            if gt.get("id") is not None and str(gt.get("id")) == str(target_id):
                return gt
        return None

    best_same_gt_id = example.get("best_same_gt_id")
    best_other_gt_id = example.get("best_other_gt_id")

    gt = find_by_explicit_id(best_same_gt_id)
    if gt is not None:
        return gt.get("bbox_2d"), class_name(gt["label"]), best_same_gt_id

    gt = find_by_explicit_id(best_other_gt_id)
    if gt is not None:
        return gt.get("bbox_2d"), class_name(gt["label"]), best_other_gt_id

    same_idx = parse_generated_gt_index(best_same_gt_id)
    if same_idx is not None:
        same_class_gts = [gt for gt in gts if gt["label"] == example.get("class_id")]
        if 0 <= same_idx < len(same_class_gts):
            gt = same_class_gts[same_idx]
            return gt.get("bbox_2d"), class_name(gt["label"]), best_same_gt_id

    other_idx = parse_generated_gt_index(best_other_gt_id)
    if other_idx is not None and 0 <= other_idx < len(gts):
        gt = gts[other_idx]
        return gt.get("bbox_2d"), class_name(gt["label"]), best_other_gt_id

    return None, None, None


def get_target_box_color(example, kind):
    if is_fn_example(example):
        return BOX_COLORS["fn_gt"] if kind == "gt" else BOX_COLORS["fn_det"]
    if kind == "det":
        return BOX_COLORS["fp_det"]
    return BOX_COLORS["fp_ref_gt"]


def draw_box(image, bbox, color, label=None, thickness=2):
    x1, y1, x2, y2 = bbox_to_int(bbox)
    cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness, cv2.LINE_AA)
    if label:
        (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 1)
        y_text = max(0, y1 - th - 8)
        cv2.rectangle(image, (x1, y_text), (x1 + tw + 8, y_text + th + 8), color, -1)
        cv2.putText(
            image,
            label,
            (x1 + 4, y_text + th + 2),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.55,
            (255, 255, 255),
            1,
            cv2.LINE_AA,
        )


def add_header(image, text):
    h, w = image.shape[:2]
    overlay = image.copy()
    cv2.rectangle(overlay, (0, 0), (w, 42), BOX_COLORS["title_bg"], -1)
    cv2.addWeighted(overlay, 0.55, image, 0.45, 0, image)
    cv2.putText(
        image,
        text,
        (10, 28),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )


def make_crop(image, boxes, scale=1.8):
    h, w = image.shape[:2]
    valid = [bbox for bbox in boxes if bbox is not None]
    if not valid:
        return image.copy(), (0, 0)

    x1 = min(float(b[0]) for b in valid)
    y1 = min(float(b[1]) for b in valid)
    x2 = max(float(b[2]) for b in valid)
    y2 = max(float(b[3]) for b in valid)

    cx = 0.5 * (x1 + x2)
    cy = 0.5 * (y1 + y2)
    bw = max(32.0, (x2 - x1) * scale)
    bh = max(32.0, (y2 - y1) * scale)

    crop_x1 = max(0, int(round(cx - bw / 2)))
    crop_y1 = max(0, int(round(cy - bh / 2)))
    crop_x2 = min(w, int(round(cx + bw / 2)))
    crop_y2 = min(h, int(round(cy + bh / 2)))

    return image[crop_y1:crop_y2, crop_x1:crop_x2].copy(), (crop_x1, crop_y1)


def draw_crop_panel(image, example, gts, crop_scale):
    gt_bbox = get_example_gt_bbox(example)
    det_bbox = get_example_det_bbox(example)
    ref_gt_bbox, ref_gt_class, ref_gt_id = resolve_reference_gt(example, gts)
    crop, (off_x, off_y) = make_crop(
        image, [gt_bbox, det_bbox, ref_gt_bbox], scale=crop_scale
    )

    def shift_box(box):
        if box is None:
            return None
        return [
            float(box[0]) - off_x,
            float(box[1]) - off_y,
            float(box[2]) - off_x,
            float(box[3]) - off_y,
        ]

    gt_local = shift_box(gt_bbox)
    det_local = shift_box(det_bbox)
    ref_gt_local = shift_box(ref_gt_bbox)

    if gt_local is not None:
        draw_box(
            crop,
            gt_local,
            get_target_box_color(example, "gt"),
            label=f"GT {example['class_name']}",
            thickness=3,
        )
    elif ref_gt_local is not None:
        draw_box(
            crop,
            ref_gt_local,
            get_target_box_color(example, "gt"),
            label=f"RefGT {ref_gt_class or '-'}",
            thickness=3,
        )

    if det_local is not None:
        conf = get_confidence(example)
        iou = get_best_iou(example)
        if example.get("best_det_bbox") is not None:
            label = f"BestDet {example.get('best_det_class', '-')}"
            if conf is not None:
                label += f" {conf:.2f}"
            label += f" IoU {iou:.3f}"
        else:
            label = f"FP Det {example.get('class_name', '-')}"
            if conf is not None:
                label += f" {conf:.2f}"
            label += f" IoU {iou:.3f}"
        draw_box(crop, det_local, get_target_box_color(example, "det"), label=label, thickness=3)

    add_header(
        crop,
        f"crop | {'FN' if is_fn_example(example) else 'FP'} | {example['class_name']} | {example['error_type']} | dist={example.get('distance_m')}",
    )
    return crop


def add_sidebar(panel, example):
    h, _ = panel.shape[:2]
    sidebar = np.full((h, 360, 3), 28, dtype=np.uint8)
    lines = [
        f"case: {example.get('case_name')}",
        f"frame: {example.get('frame_name')}",
        f"class: {example.get('class_name')}",
        f"error: {example.get('error_type')}",
        f"mode: {'fn' if is_fn_example(example) else 'fp'}",
        f"gt_id: {example.get('gt_id', '-')}",
        f"ref_gt_id: {example.get('best_same_gt_id') or example.get('best_other_gt_id') or '-'}",
        f"best_det_id: {example.get('best_det_id', '-')}",
        f"best_det_cls: {example.get('best_det_class', '-')}",
        f"det_id: {example.get('det_id', '-')}",
        f"conf: {get_confidence(example)}",
        f"best_iou: {get_best_iou(example):.4f}",
        f"distance_m: {example.get('distance_m')}",
        f"lateral_m: {example.get('lateral_m')}",
        f"gt_area: {example.get('gt_bbox_area')}",
        f"det_area: {example.get('det_bbox_area')}",
    ]

    y = 36
    for line in lines:
        cv2.putText(
            sidebar,
            str(line),
            (12, y),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.56,
            (235, 235, 235),
            1,
            cv2.LINE_AA,
        )
        y += 30

    return np.hstack([panel, sidebar])


def resize_to_height(image, target_height):
    h, w = image.shape[:2]
    if h == target_height:
        return image
    scale = target_height / max(h, 1)
    return cv2.resize(image, (max(1, int(round(w * scale))), target_height))


def combine_full_and_crop(full_image, crop_image, example):
    target_h = max(full_image.shape[0], crop_image.shape[0])
    full_resized = resize_to_height(full_image, target_h)
    crop_resized = resize_to_height(crop_image, target_h)
    panel = np.hstack([full_resized, crop_resized])
    return add_sidebar(panel, example)


def find_pair_map(config):
    evaluator = Evaluator(
        config=config,
        iou_threshold=float(config.get("matching", {}).get("iou_threshold", 0.5)),
        num_workers=1,
        save_detailed_matches=False,
    )
    dataset_cfg = config["dataset"]
    image_cfg = config["image"]
    evaluator.load_data_from_paths(
        det_root=dataset_cfg["det_path"],
        gt_root=dataset_cfg["gt_path"],
        img_width=image_cfg.get("width", 1920),
        img_height=image_cfg.get("height", 1080),
        path_depth=dataset_cfg.get("path_depth", 1),
        det_format=dataset_cfg.get("det_format", "auto"),
        gt_format=dataset_cfg.get("gt_format", "auto"),
    )

    pair_map = {}
    for pair in evaluator.image_pairs:
        level1_name = pair.get("level1_name")
        if level1_name:
            case_key = f"{level1_name}/{pair['case']}"
        else:
            case_key = pair["case"]
        pair_map[(case_key, pair["frame"])] = pair
    return pair_map, evaluator


def find_image_path(pair):
    gt_file = Path(pair["gt_file"])
    case_dir = gt_file.parent.parent
    images_dir = case_dir / "images"
    stem = gt_file.stem
    for suffix in (".png", ".jpg", ".jpeg", ".bmp"):
        candidate = images_dir / f"{stem}{suffix}"
        if candidate.exists():
            return candidate
    matches = list(images_dir.glob(f"{stem}.*"))
    return matches[0] if matches else None


def render_frame_overlay(image, gts, active_dets, frame_examples, class_ids, line_thickness):
    canvas = image.copy()

    selected_class_ids = set(class_ids)
    for gt in gts:
        if gt["label"] not in selected_class_ids:
            continue
        draw_box(
            canvas,
            gt["bbox_2d"],
            BOX_COLORS["gt_all"],
            label=f"GT {class_name(gt['label'])}",
            thickness=line_thickness,
        )

    for det in active_dets:
        if det["label"] not in selected_class_ids:
            continue
        conf = float(det.get("confidence", 0.0))
        draw_box(
            canvas,
            det["bbox_2d"],
            BOX_COLORS["det_all"],
            label=f"Det {class_name(det['label'])} {conf:.2f}",
            thickness=line_thickness,
        )

    for idx, example in enumerate(frame_examples, 1):
        gt_bbox = get_example_gt_bbox(example)
        det_bbox = get_example_det_bbox(example)
        ref_gt_bbox, ref_gt_class, _ref_gt_id = resolve_reference_gt(example, gts)
        if gt_bbox is not None:
            draw_box(
                canvas,
                gt_bbox,
                get_target_box_color(example, "gt"),
                label=f"FN#{idx} GT {example['class_name']}",
                thickness=max(3, line_thickness + 1),
            )
        elif ref_gt_bbox is not None:
            draw_box(
                canvas,
                ref_gt_bbox,
                get_target_box_color(example, "gt"),
                label=f"FP#{idx} RefGT {ref_gt_class or '-'}",
                thickness=max(3, line_thickness + 1),
            )
        if det_bbox is not None:
            conf = get_confidence(example)
            iou = get_best_iou(example)
            if example.get("best_det_bbox") is not None:
                label = f"FN#{idx} BestDet {example.get('best_det_class', '-')}"
                if conf is not None:
                    label += f" {conf:.2f}"
                label += f" IoU {iou:.3f}"
            else:
                label = f"FP#{idx} Det {example.get('class_name', '-')}"
                if conf is not None:
                    label += f" {conf:.2f}"
                label += f" IoU {iou:.3f}"
            draw_box(
                canvas,
                det_bbox,
                get_target_box_color(example, "det"),
                label=label,
                thickness=max(3, line_thickness + 1),
            )

    example_modes = {("FN" if is_fn_example(example) else "FP") for example in frame_examples}
    if len(example_modes) == 1:
        mode_label = next(iter(example_modes))
    else:
        mode_label = "MIXED"

    headline = (
        f"2D error visualization | mode={mode_label} | examples={len(frame_examples)} | "
        f"GT=green Det=orange FN-GT=red FN-det=yellow FP-det=magenta FP-refGT=cyan"
    )
    add_header(canvas, headline)
    return canvas


def ensure_dir(path):
    path.mkdir(parents=True, exist_ok=True)
    return path


def sanitize_token(value):
    return str(value).replace("/", "__").replace("\\", "__").replace(" ", "_")


def default_output_dir(report_path):
    report_path = Path(report_path)
    return report_path.parent / f"fn_vis_{report_path.stem}"


def main():
    args = parse_args()

    with open(args.analysis_report, "r") as file:
        report = json.load(file)

    config = build_config(args)
    class_ids = parse_class_ids(args.classes) if args.classes else parse_class_ids(report["summary"]["classes"])
    filtered_examples = filter_examples(report, args)

    if not filtered_examples:
        print("No examples matched the current filters.")
        return

    pair_map, evaluator = find_pair_map(config)

    output_dir = Path(args.output_dir) if args.output_dir else default_output_dir(args.analysis_report)
    frame_dir = ensure_dir(output_dir / "frames")
    example_dir = ensure_dir(output_dir / "examples")

    by_frame = defaultdict(list)
    for item in filtered_examples:
        by_frame[(item["case_name"], item["frame_name"])].append(item)

    index = {
        "analysis_report": str(Path(args.analysis_report).resolve()),
        "num_examples": len(filtered_examples),
        "num_frames": len(by_frame),
        "mode": args.mode,
        "error_types": args.error_types,
        "classes": [class_name(cid) for cid in class_ids],
        "frames": [],
    }

    conf_threshold = float(config.get("metrics_2d", {}).get("conf_threshold", 0.5))

    for frame_idx, ((case_name, frame_name), frame_examples) in enumerate(by_frame.items(), 1):
        pair = pair_map.get((case_name, frame_name))
        if pair is None:
            print(f"Warning: failed to locate pair for {case_name}/{frame_name}, skipping")
            continue

        image_path = find_image_path(pair)
        if image_path is None or not image_path.exists():
            print(f"Warning: image not found for {case_name}/{frame_name}, skipping")
            continue

        image = cv2.imread(str(image_path))
        if image is None:
            print(f"Warning: failed to read image: {image_path}")
            continue

        gts = Evaluator._parse_ground_truths_for_pair(pair, evaluator.coord_system)
        dets = Evaluator._parse_detections_for_pair(pair, evaluator.coord_system)
        active_dets = [det for det in dets if float(det.get("confidence", 0.0)) >= conf_threshold]

        frame_overlay = render_frame_overlay(
            image,
            gts,
            active_dets,
            frame_examples,
            class_ids,
            line_thickness=args.line_thickness,
        )

        frame_rel = Path("frames") / (
            f"{frame_idx:04d}_{sanitize_token(case_name)}_{sanitize_token(frame_name)}.jpg"
        )
        frame_path = output_dir / frame_rel
        cv2.imwrite(
            str(frame_path),
            frame_overlay,
            [int(cv2.IMWRITE_JPEG_QUALITY), int(args.jpeg_quality)],
        )

        frame_entry = {
            "case_name": case_name,
            "frame_name": frame_name,
            "image_path": str(image_path),
            "frame_visualization": str(frame_rel),
            "num_examples": len(frame_examples),
            "examples": [],
        }

        for ex_idx, example in enumerate(frame_examples, 1):
            crop_image = draw_crop_panel(
                image.copy(), example, gts, crop_scale=args.crop_scale
            )
            panel = combine_full_and_crop(frame_overlay.copy(), crop_image, example)
            rel = Path("examples") / (
                f"{frame_idx:04d}_{ex_idx:02d}_"
                f"{sanitize_token(case_name)}_{sanitize_token(frame_name)}_"
                f"{sanitize_token(example['class_name'])}_{sanitize_token(example['error_type'])}.jpg"
            )
            panel_path = output_dir / rel
            cv2.imwrite(
                str(panel_path),
                panel,
                [int(cv2.IMWRITE_JPEG_QUALITY), int(args.jpeg_quality)],
            )

            example_record = dict(example)
            example_record["visualization"] = str(rel)
            frame_entry["examples"].append(example_record)

        index["frames"].append(frame_entry)

    index_path = output_dir / "index.json"
    with open(index_path, "w") as file:
        json.dump(index, file, indent=2)

    print(f"Saved visualization index to: {index_path}")
    print(f"Saved frame overlays to:      {frame_dir}")
    print(f"Saved example panels to:      {example_dir}")


if __name__ == "__main__":
    main()