HSAP/platform/as_platform/audit/review.py

"""标注质检 — 逐张审核标注质量（Good/Fine/Bad 评分 + PIL 优化渲染）。"""
from __future__ import annotations

import io
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

from PIL import Image, ImageDraw, ImageFont
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text

from as_platform.data.batch import IMG_EXTS
from as_platform.db.engine import session_scope
from as_platform.db.models import Base

IMAGE_EXTS = tuple(ext.lower() for ext in IMG_EXTS)

# ── PIL font cache ──
_font_cache: dict[int, ImageFont.FreeTypeFont | ImageFont.ImageFont] = {}

def _get_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
    if size not in _font_cache:
        try:
            _font_cache[size] = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", size)
        except Exception:
            try:
                _font_cache[size] = ImageFont.truetype("/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", size)
            except Exception:
                _font_cache[size] = ImageFont.load_default()
    return _font_cache[size]


# ── YOLO bbox utils ──

def _parse_yolo_line(line: str) -> dict[str, Any] | None:
    parts = line.strip().split()
    if len(parts) < 5:
        return None
    try:
        return {"class_id": int(float(parts[0])), "bbox": tuple(map(float, parts[1:5]))}
    except Exception:
        return None


def _bbox_to_xyxy(bbox: tuple[float, ...], w: int, h: int) -> tuple[int, int, int, int]:
    cx, cy, bw, bh = bbox[:4]
    x1 = int((cx - bw / 2) * w)
    y1 = int((cy - bh / 2) * h)
    x2 = int((cx + bw / 2) * w)
    y2 = int((cy + bh / 2) * h)
    return max(0, x1), max(0, y1), min(w, x2), min(h, y2)


def _parse_labels(label_path: Path) -> list[dict[str, Any]]:
    if not label_path or not label_path.is_file():
        return []
    results = []
    for line in label_path.read_text().strip().splitlines():
        ann = _parse_yolo_line(line)
        if ann:
            results.append(ann)
    return results


# ── Optimized overlay render ──

PALETTE = [(220, 20, 60), (30, 144, 255), (50, 205, 50), (255, 165, 0), (186, 85, 211), (0, 206, 209)]


def render_review_overlay(
    image_path: Path,
    label_path: Path | None,
    class_names: dict[int, str],
    *,
    max_size: int = 800,
    quality: int = 85,
) -> bytes:
    """PIL optimized: single pass resize + draw, no copy. Returns JPEG bytes."""
    with Image.open(image_path) as im:
        if im.mode != "RGB":
            im = im.convert("RGB")
        # Resize first for faster drawing
        if max_size and max(im.size) > max_size:
            im.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
        w, h = im.size
        draw = ImageDraw.Draw(im)
        font = _get_font(max(12, min(16, w // 50)))
        line_w = max(1, w // 400)

        anns = _parse_labels(label_path) if label_path else []
        for ann in anns:
            cid = ann["class_id"]
            color = PALETTE[cid % len(PALETTE)]
            x1, y1, x2, y2 = _bbox_to_xyxy(ann["bbox"], w, h)
            draw.rectangle((x1, y1, x2, y2), outline=color, width=line_w)
            label = class_names.get(cid, f"cls_{cid}")
            draw.text((x1 + 2, max(0, y1 - 16)), label, fill=color, font=font)

        buf = io.BytesIO()
        im.save(buf, format="JPEG", quality=quality)
        return buf.getvalue()


# ── Quality Review Model ──

class LabelingReview(Base):
    __tablename__ = "labeling_reviews"
    id = Column(Integer, primary_key=True, autoincrement=True)
    campaign_id = Column(String(64), nullable=False, index=True)
    image_path = Column(String(512), nullable=False)
    score = Column(String(16), nullable=False, default="pending")  # good / fine / bad
    reviewer_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
    reviewer_name = Column(String(128), nullable=True)
    comment = Column(Text, nullable=True)
    reviewed_at = Column(DateTime(timezone=True), nullable=True)

    def to_dict(self) -> dict:
        return {
            "id": self.id,
            "campaign_id": self.campaign_id,
            "image_path": self.image_path,
            "score": self.score,
            "reviewer_user_id": self.reviewer_user_id,
            "reviewer_name": self.reviewer_name,
            "comment": self.comment,
            "reviewed_at": self.reviewed_at.isoformat() if self.reviewed_at else None,
        }


# ── Review operations ──

def get_review_queue(campaign_id: str, offset: int = 0, limit: int = 20) -> dict[str, Any]:
    from as_platform.labeling.annotate import resolve_campaign_batch_dir
    from as_platform.db.engine import session_scope
    from as_platform.db.models import LabelingCampaign

    with session_scope() as db:
        camp = db.get(LabelingCampaign, campaign_id)
        if not camp:
            return {"items": [], "total": 0, "hint": "Campaign 不存在"}
        batch_dir = resolve_campaign_batch_dir(camp)
    if not batch_dir or not batch_dir.is_dir():
        return {"items": [], "total": 0, "hint": "批次目录不存在"}

    img_dir = batch_dir / "images"
    if not img_dir.is_dir():
        return {"items": [], "total": 0, "hint": "无 images 目录"}

    all_images: list[Path] = []
    for ext in IMAGE_EXTS:
        all_images.extend(sorted(img_dir.rglob(f"*{ext}")))

    # Get existing reviews
    with session_scope() as db:
        reviewed = {
            r.image_path: r.score
            for r in db.query(LabelingReview).filter(LabelingReview.campaign_id == campaign_id).all()
        }

    total = len(all_images)
    page = all_images[offset:offset + limit]
    score_counts = {"good": 0, "fine": 0, "bad": 0, "pending": 0}
    items = []
    for img in page:
        rel = str(img.relative_to(batch_dir))
        score = reviewed.get(rel, "pending")
        score_counts[score] += 1
        label_path = batch_dir / "labels" / (img.stem + ".txt")
        items.append({
            "id": rel, "image_path": rel,
            "fileName": img.name,
            "score": score,
            "has_label": label_path.is_file(),
        })

    # Fill remaining counts
    for img in all_images:
        rel = str(img.relative_to(batch_dir))
        s = reviewed.get(rel, "pending")
        if s not in score_counts:
            score_counts[s] = 0

    return {
        "items": items, "total": total,
        "offset": offset, "limit": limit,
        "scores": score_counts,
    }


def get_review_image(campaign_id: str, image_rel_path: str, class_names: dict[int, str]) -> bytes:
    from as_platform.labeling.annotate import resolve_campaign_batch_dir
    from as_platform.db.engine import session_scope
    from as_platform.db.models import LabelingCampaign
    with session_scope() as db:
        camp = db.get(LabelingCampaign, campaign_id)
        if not camp:
            raise FileNotFoundError("Campaign 不存在")
        batch_dir = resolve_campaign_batch_dir(camp)
    if not batch_dir:
        raise FileNotFoundError("批次不存在")
    img_path = batch_dir / image_rel_path
    lbl_path = batch_dir / "labels" / (img_path.stem + ".txt")
    return render_review_overlay(img_path, lbl_path if lbl_path.is_file() else None, class_names)


def submit_review_scores(
    campaign_id: str,
    scores: list[dict[str, str]],
    reviewer_user_id: int | None = None,
    reviewer_name: str | None = None,
) -> dict[str, Any]:
    now = datetime.now(timezone.utc)
    updated = 0
    with session_scope() as db:
        for item in scores:
            img_path = item["image_path"]
            score = item["score"]
            rec = db.query(LabelingReview).filter(
                LabelingReview.campaign_id == campaign_id,
                LabelingReview.image_path == img_path,
            ).first()
            if rec:
                rec.score = score
                rec.reviewer_user_id = reviewer_user_id
                rec.reviewer_name = reviewer_name
                rec.reviewed_at = now
                rec.comment = item.get("comment")
            else:
                db.add(LabelingReview(
                    campaign_id=campaign_id, image_path=img_path, score=score,
                    reviewer_user_id=reviewer_user_id, reviewer_name=reviewer_name,
                    reviewed_at=now, comment=item.get("comment"),
                ))
            updated += 1
        db.commit()

        # Check if all images are reviewed and auto-advance stage
        counts = _review_db_counts(db, campaign_id)
        from as_platform.labeling.annotate import resolve_campaign_batch_dir
        from as_platform.data.batch import IMG_EXTS
        from as_platform.db.engine import session_scope as _scope
        from as_platform.db.models import LabelingCampaign as _LC
        with _scope() as _db:
            _camp = _db.get(_LC, campaign_id)
            batch_dir = resolve_campaign_batch_dir(_camp) if _camp else None
        total_images = 0
        if batch_dir and (batch_dir / "images").is_dir():
            for ext in IMG_EXTS:
                total_images += len(list((batch_dir / "images").rglob(f"*{ext}")))

        reviewed = sum(counts.values())
        if reviewed >= total_images and total_images > 0:
            pass_rate = counts.get("good", 0) / max(total_images, 1)
            new_stage = "review_approved" if pass_rate >= 0.8 else "review_rejected"
            _update_campaign_stage(db, campaign_id, new_stage)

    return {"ok": True, "updated": updated, "auto_advanced": reviewed >= total_images if total_images > 0 else False}


def _review_db_counts(db, campaign_id: str) -> dict[str, int]:
    from sqlalchemy import func
    from collections import Counter
    rows = db.query(LabelingReview.score, func.count()).filter(
        LabelingReview.campaign_id == campaign_id
    ).group_by(LabelingReview.score).all()
    return {score: cnt for score, cnt in rows}


def _update_campaign_stage(db, campaign_id: str, new_stage: str) -> None:
    from as_platform.db.models import LabelingCampaign
    from as_platform.labeling.batch_stage import update_campaign_batch_meta_stage
    camp = db.get(LabelingCampaign, campaign_id)
    if camp:
        camp.status = new_stage
        db.flush()
        update_campaign_batch_meta_stage(camp, new_stage)


def review_progress(campaign_id: str) -> dict[str, int]:
    with session_scope() as db:
        rows = db.query(LabelingReview).filter(LabelingReview.campaign_id == campaign_id).all()
    counts = {"good": 0, "fine": 0, "bad": 0, "pending": 0}
    for r in rows:
        counts[r.score] = counts.get(r.score, 0) + 1
    return counts