platform/as_platform/audit/review.py

"""标注质检 — 逐张审核标注质量（Good/Fine/Bad 评分 + PIL 优化渲染）。"""
from __future__ import annotations

import io
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

from PIL import Image, ImageDraw, ImageFont
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text

from as_platform.data.batch import IMG_EXTS
from as_platform.db.engine import session_scope
from as_platform.db.models import Base

IMAGE_EXTS = tuple(ext.lower() for ext in IMG_EXTS)

# ── PIL font cache ──
_font_cache: dict[int, ImageFont.FreeTypeFont | ImageFont.ImageFont] = {}

def _get_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
    if size not in _font_cache:
        try:
            _font_cache[size] = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", size)
        except Exception:
            try:
                _font_cache[size] = ImageFont.truetype("/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", size)
            except Exception:
                _font_cache[size] = ImageFont.load_default()
    return _font_cache[size]


# ── YOLO bbox utils ──

def _parse_yolo_line(line: str) -> dict[str, Any] | None:
    parts = line.strip().split()
    if len(parts) < 5:
        return None
    try:
        return {"class_id": int(float(parts[0])), "bbox": tuple(map(float, parts[1:5]))}
    except Exception:
        return None


def _bbox_to_xyxy(bbox: tuple[float, ...], w: int, h: int) -> tuple[int, int, int, int]:
    cx, cy, bw, bh = bbox[:4]
    x1 = int((cx - bw / 2) * w)
    y1 = int((cy - bh / 2) * h)
    x2 = int((cx + bw / 2) * w)
    y2 = int((cy + bh / 2) * h)
    return max(0, x1), max(0, y1), min(w, x2), min(h, y2)


def _parse_labels(label_path: Path) -> list[dict[str, Any]]:
    if not label_path or not label_path.is_file():
        return []
    results = []
    for line in label_path.read_text().strip().splitlines():
        ann = _parse_yolo_line(line)
        if ann and ann["bbox"][2] > 0 and ann["bbox"][3] > 0:
            results.append(ann)
    return results


def _class_names_for_campaign(camp) -> dict[int, str]:
    """campaign task → class_id → name。"""
    import yaml
    from as_platform.data.core import load_wf, proj_root

    if not camp or camp.project != "dms":
        return {}
    wf = load_wf()
    root = proj_root(wf, "dms")
    reg = yaml.safe_load((root / wf["projects"]["dms"]["registry"]).read_text(encoding="utf-8")) or {}
    tcfg = (reg.get("tasks") or {}).get(camp.task) or {}
    if camp.mode and tcfg.get("type") == "multi":
        mcfg = (tcfg.get("modes") or {}).get(camp.mode) or {}
        names = mcfg.get("names")
    else:
        names = tcfg.get("names")
    if isinstance(names, list):
        return {i: str(n) for i, n in enumerate(names)}
    if isinstance(names, dict):
        return {int(k): str(v) for k, v in names.items()}
    return {}


def _name_to_class_id(name: str, class_names: dict[int, str]) -> int:
    rev = {v.lower(): k for k, v in class_names.items()}
    return rev.get(name.lower(), 0)


def _resolve_yolo_label_path(batch_dir: Path, img_path: Path) -> Path | None:
    stem = img_path.stem
    for rel in (
        f"labels/{stem}.txt",
        f"labels/train/{stem}.txt",
        f"labels/val/{stem}.txt",
        f"labels/yolo/{stem}.txt",
    ):
        p = batch_dir / rel
        if p.is_file():
            return p
    return None


def _parse_ls_annotations(path: Path, class_names: dict[int, str]) -> list[dict[str, Any]]:
    import json

    try:
        data = json.loads(path.read_text(encoding="utf-8"))
    except (OSError, json.JSONDecodeError):
        return []
    out: list[dict[str, Any]] = []
    for item in data.get("result") or []:
        if item.get("type") not in ("rectanglelabels", "rectangle"):
            continue
        val = item.get("value") or {}
        w_pct = float(val.get("width") or 0)
        h_pct = float(val.get("height") or 0)
        if w_pct <= 0 or h_pct <= 0:
            continue
        x_pct = float(val.get("x") or 0)
        y_pct = float(val.get("y") or 0)
        labels = val.get("rectanglelabels") or val.get("labels") or []
        label = labels[0] if labels else "unknown"
        cid = _name_to_class_id(str(label), class_names)
        cx = (x_pct + w_pct / 2) / 100.0
        cy = (y_pct + h_pct / 2) / 100.0
        out.append({"class_id": cid, "bbox": (cx, cy, w_pct / 100.0, h_pct / 100.0)})
    return out


def _load_image_annotations(
    batch_dir: Path,
    img_path: Path,
    class_names: dict[int, str],
) -> list[dict[str, Any]]:
    yolo = _resolve_yolo_label_path(batch_dir, img_path)
    if yolo:
        anns = _parse_labels(yolo)
        if anns:
            return anns
    from as_platform.labeling.annotate import _task_id_for_image

    ann_json = batch_dir / "labels" / "ls_annotations" / f"{_task_id_for_image(img_path, batch_dir)}.json"
    if ann_json.is_file():
        return _parse_ls_annotations(ann_json, class_names)
    return []


def _image_has_labels(batch_dir: Path, img_path: Path, class_names: dict[int, str]) -> bool:
    return bool(_load_image_annotations(batch_dir, img_path, class_names))


def _list_review_images(batch_dir: Path) -> list[Path]:
    from as_platform.labeling.annotate import _iter_batch_images

    return list(_iter_batch_images(batch_dir))

# ── Optimized overlay render ──

PALETTE = [(220, 20, 60), (30, 144, 255), (50, 205, 50), (255, 165, 0), (186, 85, 211), (0, 206, 209)]


def render_review_overlay(
    image_path: Path,
    batch_dir: Path,
    class_names: dict[int, str],
    *,
    max_size: int = 800,
    quality: int = 85,
) -> bytes:
    """PIL optimized: single pass resize + draw, no copy. Returns JPEG bytes."""
    with Image.open(image_path) as im:
        if im.mode != "RGB":
            im = im.convert("RGB")
        # Resize first for faster drawing
        if max_size and max(im.size) > max_size:
            im.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
        w, h = im.size
        draw = ImageDraw.Draw(im)
        font = _get_font(max(12, min(16, w // 50)))
        line_w = max(1, w // 400)

        anns = _load_image_annotations(batch_dir, image_path, class_names)
        for ann in anns:
            cid = ann["class_id"]
            color = PALETTE[cid % len(PALETTE)]
            x1, y1, x2, y2 = _bbox_to_xyxy(ann["bbox"], w, h)
            draw.rectangle((x1, y1, x2, y2), outline=color, width=line_w)
            label = class_names.get(cid, f"cls_{cid}")
            draw.text((x1 + 2, max(0, y1 - 16)), label, fill=color, font=font)

        buf = io.BytesIO()
        im.save(buf, format="JPEG", quality=quality)
        return buf.getvalue()


# ── Quality Review Model ──

class LabelingReview(Base):
    __tablename__ = "labeling_reviews"
    id = Column(Integer, primary_key=True, autoincrement=True)
    campaign_id = Column(String(64), nullable=False, index=True)
    image_path = Column(String(512), nullable=False)
    score = Column(String(16), nullable=False, default="pending")  # good / fine / bad
    reviewer_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
    reviewer_name = Column(String(128), nullable=True)
    comment = Column(Text, nullable=True)
    reviewed_at = Column(DateTime(timezone=True), nullable=True)

    def to_dict(self) -> dict:
        return {
            "id": self.id,
            "campaign_id": self.campaign_id,
            "image_path": self.image_path,
            "score": self.score,
            "reviewer_user_id": self.reviewer_user_id,
            "reviewer_name": self.reviewer_name,
            "comment": self.comment,
            "reviewed_at": self.reviewed_at.isoformat() if self.reviewed_at else None,
        }


# ── Review operations ──

def get_review_queue(campaign_id: str, offset: int = 0, limit: int = 20) -> dict[str, Any]:
    from as_platform.labeling.annotate import resolve_campaign_batch_dir
    from as_platform.db.engine import session_scope
    from as_platform.db.models import LabelingCampaign

    with session_scope() as db:
        camp = db.get(LabelingCampaign, campaign_id)
        if not camp:
            return {"items": [], "total": 0, "hint": "Campaign 不存在"}
        batch_dir = resolve_campaign_batch_dir(camp)
        class_names = _class_names_for_campaign(camp)
    if not batch_dir or not batch_dir.is_dir():
        return {"items": [], "total": 0, "hint": "批次目录不存在"}

    all_images = _list_review_images(batch_dir)
    if not all_images:
        return {"items": [], "total": 0, "hint": "无 images 目录"}

    # Get existing reviews
    with session_scope() as db:
        reviewed = {
            r.image_path: r.score
            for r in db.query(LabelingReview).filter(LabelingReview.campaign_id == campaign_id).all()
        }

    total = len(all_images)
    page = all_images[offset:offset + limit]
    items = []
    for img in page:
        rel = str(img.relative_to(batch_dir))
        score = reviewed.get(rel, "pending")
        items.append({
            "id": rel, "image_path": rel,
            "fileName": img.name,
            "score": score,
            "has_label": _image_has_labels(batch_dir, img, class_names),
        })

    with session_scope() as db:
        db_counts = _review_db_counts(db, campaign_id)
    reviewed_n = sum(db_counts.values())
    score_counts = {
        "good": db_counts.get("good", 0),
        "fine": db_counts.get("fine", 0),
        "bad": db_counts.get("bad", 0),
        "pending": max(0, total - reviewed_n),
    }

    return {
        "items": items, "total": total,
        "offset": offset, "limit": limit,
        "scores": score_counts,
    }


def get_review_image(campaign_id: str, image_rel_path: str) -> bytes:
    from as_platform.labeling.annotate import resolve_campaign_batch_dir
    from as_platform.db.engine import session_scope
    from as_platform.db.models import LabelingCampaign
    with session_scope() as db:
        camp = db.get(LabelingCampaign, campaign_id)
        if not camp:
            raise FileNotFoundError("Campaign 不存在")
        batch_dir = resolve_campaign_batch_dir(camp)
        class_names = _class_names_for_campaign(camp)
    if not batch_dir:
        raise FileNotFoundError("批次不存在")
    img_path = batch_dir / image_rel_path
    if not img_path.is_file():
        raise FileNotFoundError(f"图片不存在: {image_rel_path}")
    return render_review_overlay(img_path, batch_dir, class_names)


def submit_review_scores(
    campaign_id: str,
    scores: list[dict[str, str]],
    reviewer_user_id: int | None = None,
    reviewer_name: str | None = None,
) -> dict[str, Any]:
    now = datetime.now(timezone.utc)
    updated = 0
    with session_scope() as db:
        for item in scores:
            img_path = item["image_path"]
            score = item["score"]
            rec = db.query(LabelingReview).filter(
                LabelingReview.campaign_id == campaign_id,
                LabelingReview.image_path == img_path,
            ).first()
            if rec:
                rec.score = score
                rec.reviewer_user_id = reviewer_user_id
                rec.reviewer_name = reviewer_name
                rec.reviewed_at = now
                rec.comment = item.get("comment")
            else:
                db.add(LabelingReview(
                    campaign_id=campaign_id, image_path=img_path, score=score,
                    reviewer_user_id=reviewer_user_id, reviewer_name=reviewer_name,
                    reviewed_at=now, comment=item.get("comment"),
                ))
            updated += 1
        db.commit()

        # Check if all images are reviewed and auto-advance stage
        counts = _review_db_counts(db, campaign_id)
        from as_platform.labeling.annotate import resolve_campaign_batch_dir
        from as_platform.data.batch import IMG_EXTS
        from as_platform.db.engine import session_scope as _scope
        from as_platform.db.models import LabelingCampaign as _LC
        with _scope() as _db:
            _camp = _db.get(_LC, campaign_id)
            batch_dir = resolve_campaign_batch_dir(_camp) if _camp else None
        total_images = 0
        if batch_dir and (batch_dir / "images").is_dir():
            for ext in IMG_EXTS:
                total_images += len(list((batch_dir / "images").rglob(f"*{ext}")))

        reviewed = sum(counts.values())
        if reviewed >= total_images and total_images > 0:
            new_stage = _effective_stage_from_review(
                counts.get("good", 0), counts.get("fine", 0), counts.get("bad", 0), total_images,
            )
            if new_stage and new_stage != "in_review":
                raw = "review_approved" if new_stage == "labeling_submitted" else new_stage
                _update_campaign_stage(db, campaign_id, raw)

    auto_advanced = reviewed >= total_images if total_images > 0 else False
    acceptable = counts.get("good", 0) + counts.get("fine", 0) if total_images > 0 else 0
    final_stage = None
    if auto_advanced and total_images > 0:
        eff = _effective_stage_from_review(
            counts.get("good", 0), counts.get("fine", 0), counts.get("bad", 0), total_images,
        )
        final_stage = "review_approved" if eff == "labeling_submitted" else eff
    return {
        "ok": True,
        "updated": updated,
        "auto_advanced": auto_advanced,
        "stage": final_stage,
    }


def _review_db_counts(db, campaign_id: str) -> dict[str, int]:
    from sqlalchemy import func
    rows = db.query(LabelingReview.score, func.count()).filter(
        LabelingReview.campaign_id == campaign_id
    ).group_by(LabelingReview.score).all()
    return {score: cnt for score, cnt in rows}


PASS_RATE_THRESHOLD = 0.8


def _effective_stage_from_review(good: int, fine: int, bad: int, total: int) -> str | None:
    """Return campaign status after QA is complete; None if images remain unreviewed."""
    if total <= 0:
        return None
    reviewed = good + fine + bad
    if reviewed < total:
        return "in_review"
    acceptable = good + fine
    approved = acceptable / total >= PASS_RATE_THRESHOLD
    return "labeling_submitted" if approved else "review_rejected"


def reconcile_review_stage(campaign_id: str) -> str | None:
    """Align stored campaign stage with current review scores (fixes stale rejections)."""
    summary = _review_summary(campaign_id)
    if not summary.get("complete"):
        return summary.get("stage")
    expected = _effective_stage_from_review(
        summary["good"], summary["fine"], summary["bad"], summary["total"],
    )
    if not expected:
        return summary.get("stage")
    with session_scope() as db:
        from as_platform.db.models import LabelingCampaign
        camp = db.get(LabelingCampaign, campaign_id)
        if not camp:
            return None
        if camp.status == expected:
            return expected
        camp.status = expected
        from as_platform.labeling.batch_stage import update_campaign_batch_meta_stage
        update_campaign_batch_meta_stage(camp, expected)
        db.commit()
        return expected


def _update_campaign_stage(db, campaign_id: str, new_stage: str) -> None:
    from as_platform.db.models import LabelingCampaign
    from as_platform.labeling.batch_stage import update_campaign_batch_meta_stage
    camp = db.get(LabelingCampaign, campaign_id)
    if camp:
        effective = "labeling_submitted" if new_stage == "review_approved" else new_stage
        camp.status = effective
        db.flush()
        update_campaign_batch_meta_stage(camp, effective)


def _review_summary(campaign_id: str) -> dict[str, Any]:
    from as_platform.labeling.annotate import resolve_campaign_batch_dir
    from as_platform.db.models import LabelingCampaign

    with session_scope() as db:
        camp = db.get(LabelingCampaign, campaign_id)
        if not camp:
            return {"good": 0, "fine": 0, "bad": 0, "pending": 0, "total": 0, "reviewed": 0, "pass_rate": 0, "complete": False, "stage": ""}
        batch_dir = resolve_campaign_batch_dir(camp)
        stage = camp.status or ""
        if not batch_dir or not batch_dir.is_dir():
            counts = _review_db_counts(db, campaign_id)
            reviewed = sum(counts.values())
            return {
                **{k: counts.get(k, 0) for k in ("good", "fine", "bad")},
                "pending": 0,
                "total": reviewed,
                "reviewed": reviewed,
                "pass_rate": round((counts.get("good", 0) + counts.get("fine", 0)) / max(reviewed, 1) * 100),
                "complete": reviewed > 0,
                "stage": stage,
            }

        all_images = _list_review_images(batch_dir)
        db_counts = _review_db_counts(db, campaign_id)

    total = len(all_images)
    good = db_counts.get("good", 0)
    fine = db_counts.get("fine", 0)
    bad = db_counts.get("bad", 0)
    reviewed = good + fine + bad
    acceptable = good + fine
    return {
        "good": good,
        "fine": fine,
        "bad": bad,
        "pending": max(0, total - reviewed),
        "total": total,
        "reviewed": reviewed,
        "pass_rate": round(acceptable / max(total, 1) * 100),
        "complete": reviewed >= total and total > 0,
        "stage": stage,
    }


def review_progress(campaign_id: str) -> dict[str, Any]:
    result = _review_summary(campaign_id)
    if result.get("complete"):
        reconciled = reconcile_review_stage(campaign_id)
        if reconciled:
            result["stage"] = reconciled
    return result


def review_progress_batch(campaign_ids: list[str]) -> dict[str, Any]:
    ids = [c.strip() for c in campaign_ids if c and c.strip()][:50]
    items: dict[str, Any] = {}
    for cid in ids:
        items[cid] = review_progress(cid)
    return {"items": items}