Files
HSAP/platform/as_platform/audit/review.py

490 lines
18 KiB
Python
Raw Normal View History

"""标注质检 — 逐张审核标注质量Good/Fine/Bad 评分 + PIL 优化渲染)。"""
from __future__ import annotations
import io
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from PIL import Image, ImageDraw, ImageFont
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text
from as_platform.data.batch import IMG_EXTS
from as_platform.db.engine import session_scope
from as_platform.db.models import Base
IMAGE_EXTS = tuple(ext.lower() for ext in IMG_EXTS)
# ── PIL font cache ──
_font_cache: dict[int, ImageFont.FreeTypeFont | ImageFont.ImageFont] = {}
def _get_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
if size not in _font_cache:
try:
_font_cache[size] = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", size)
except Exception:
try:
_font_cache[size] = ImageFont.truetype("/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", size)
except Exception:
_font_cache[size] = ImageFont.load_default()
return _font_cache[size]
# ── YOLO bbox utils ──
def _parse_yolo_line(line: str) -> dict[str, Any] | None:
parts = line.strip().split()
if len(parts) < 5:
return None
try:
return {"class_id": int(float(parts[0])), "bbox": tuple(map(float, parts[1:5]))}
except Exception:
return None
def _bbox_to_xyxy(bbox: tuple[float, ...], w: int, h: int) -> tuple[int, int, int, int]:
cx, cy, bw, bh = bbox[:4]
x1 = int((cx - bw / 2) * w)
y1 = int((cy - bh / 2) * h)
x2 = int((cx + bw / 2) * w)
y2 = int((cy + bh / 2) * h)
return max(0, x1), max(0, y1), min(w, x2), min(h, y2)
def _parse_labels(label_path: Path) -> list[dict[str, Any]]:
if not label_path or not label_path.is_file():
return []
results = []
for line in label_path.read_text().strip().splitlines():
ann = _parse_yolo_line(line)
if ann and ann["bbox"][2] > 0 and ann["bbox"][3] > 0:
results.append(ann)
return results
def _class_names_for_campaign(camp) -> dict[int, str]:
"""campaign task → class_id → name。"""
import yaml
from as_platform.data.core import load_wf, proj_root
if not camp or camp.project != "dms":
return {}
wf = load_wf()
root = proj_root(wf, "dms")
reg = yaml.safe_load((root / wf["projects"]["dms"]["registry"]).read_text(encoding="utf-8")) or {}
tcfg = (reg.get("tasks") or {}).get(camp.task) or {}
if camp.mode and tcfg.get("type") == "multi":
mcfg = (tcfg.get("modes") or {}).get(camp.mode) or {}
names = mcfg.get("names")
else:
names = tcfg.get("names")
if isinstance(names, list):
return {i: str(n) for i, n in enumerate(names)}
if isinstance(names, dict):
return {int(k): str(v) for k, v in names.items()}
return {}
def _name_to_class_id(name: str, class_names: dict[int, str]) -> int:
rev = {v.lower(): k for k, v in class_names.items()}
return rev.get(name.lower(), 0)
def _resolve_yolo_label_path(batch_dir: Path, img_path: Path) -> Path | None:
stem = img_path.stem
for rel in (
f"labels/{stem}.txt",
f"labels/train/{stem}.txt",
f"labels/val/{stem}.txt",
f"labels/yolo/{stem}.txt",
):
p = batch_dir / rel
if p.is_file():
return p
return None
def _parse_ls_annotations(path: Path, class_names: dict[int, str]) -> list[dict[str, Any]]:
import json
try:
data = json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return []
out: list[dict[str, Any]] = []
for item in data.get("result") or []:
if item.get("type") not in ("rectanglelabels", "rectangle"):
continue
val = item.get("value") or {}
w_pct = float(val.get("width") or 0)
h_pct = float(val.get("height") or 0)
if w_pct <= 0 or h_pct <= 0:
continue
x_pct = float(val.get("x") or 0)
y_pct = float(val.get("y") or 0)
labels = val.get("rectanglelabels") or val.get("labels") or []
label = labels[0] if labels else "unknown"
cid = _name_to_class_id(str(label), class_names)
cx = (x_pct + w_pct / 2) / 100.0
cy = (y_pct + h_pct / 2) / 100.0
out.append({"class_id": cid, "bbox": (cx, cy, w_pct / 100.0, h_pct / 100.0)})
return out
def _load_image_annotations(
batch_dir: Path,
img_path: Path,
class_names: dict[int, str],
) -> list[dict[str, Any]]:
yolo = _resolve_yolo_label_path(batch_dir, img_path)
if yolo:
anns = _parse_labels(yolo)
if anns:
return anns
from as_platform.labeling.annotate import _task_id_for_image
ann_json = batch_dir / "labels" / "ls_annotations" / f"{_task_id_for_image(img_path, batch_dir)}.json"
if ann_json.is_file():
return _parse_ls_annotations(ann_json, class_names)
return []
def _image_has_labels(batch_dir: Path, img_path: Path, class_names: dict[int, str]) -> bool:
return bool(_load_image_annotations(batch_dir, img_path, class_names))
def _list_review_images(batch_dir: Path) -> list[Path]:
from as_platform.labeling.annotate import _iter_batch_images
return list(_iter_batch_images(batch_dir))
# ── Optimized overlay render ──
PALETTE = [(220, 20, 60), (30, 144, 255), (50, 205, 50), (255, 165, 0), (186, 85, 211), (0, 206, 209)]
def render_review_overlay(
image_path: Path,
batch_dir: Path,
class_names: dict[int, str],
*,
max_size: int = 800,
quality: int = 85,
) -> bytes:
"""PIL optimized: single pass resize + draw, no copy. Returns JPEG bytes."""
with Image.open(image_path) as im:
if im.mode != "RGB":
im = im.convert("RGB")
# Resize first for faster drawing
if max_size and max(im.size) > max_size:
im.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
w, h = im.size
draw = ImageDraw.Draw(im)
font = _get_font(max(12, min(16, w // 50)))
line_w = max(1, w // 400)
anns = _load_image_annotations(batch_dir, image_path, class_names)
for ann in anns:
cid = ann["class_id"]
color = PALETTE[cid % len(PALETTE)]
x1, y1, x2, y2 = _bbox_to_xyxy(ann["bbox"], w, h)
draw.rectangle((x1, y1, x2, y2), outline=color, width=line_w)
label = class_names.get(cid, f"cls_{cid}")
draw.text((x1 + 2, max(0, y1 - 16)), label, fill=color, font=font)
buf = io.BytesIO()
im.save(buf, format="JPEG", quality=quality)
return buf.getvalue()
# ── Quality Review Model ──
class LabelingReview(Base):
__tablename__ = "labeling_reviews"
id = Column(Integer, primary_key=True, autoincrement=True)
campaign_id = Column(String(64), nullable=False, index=True)
image_path = Column(String(512), nullable=False)
score = Column(String(16), nullable=False, default="pending") # good / fine / bad
reviewer_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
reviewer_name = Column(String(128), nullable=True)
comment = Column(Text, nullable=True)
reviewed_at = Column(DateTime(timezone=True), nullable=True)
def to_dict(self) -> dict:
return {
"id": self.id,
"campaign_id": self.campaign_id,
"image_path": self.image_path,
"score": self.score,
"reviewer_user_id": self.reviewer_user_id,
"reviewer_name": self.reviewer_name,
"comment": self.comment,
"reviewed_at": self.reviewed_at.isoformat() if self.reviewed_at else None,
}
# ── Review operations ──
def get_review_queue(campaign_id: str, offset: int = 0, limit: int = 20) -> dict[str, Any]:
from as_platform.labeling.annotate import resolve_campaign_batch_dir
from as_platform.db.engine import session_scope
from as_platform.db.models import LabelingCampaign
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
return {"items": [], "total": 0, "hint": "Campaign 不存在"}
batch_dir = resolve_campaign_batch_dir(camp)
class_names = _class_names_for_campaign(camp)
if not batch_dir or not batch_dir.is_dir():
return {"items": [], "total": 0, "hint": "批次目录不存在"}
all_images = _list_review_images(batch_dir)
if not all_images:
return {"items": [], "total": 0, "hint": "无 images 目录"}
# Get existing reviews
with session_scope() as db:
reviewed = {
r.image_path: r.score
for r in db.query(LabelingReview).filter(LabelingReview.campaign_id == campaign_id).all()
}
total = len(all_images)
page = all_images[offset:offset + limit]
items = []
for img in page:
rel = str(img.relative_to(batch_dir))
score = reviewed.get(rel, "pending")
items.append({
"id": rel, "image_path": rel,
"fileName": img.name,
"score": score,
"has_label": _image_has_labels(batch_dir, img, class_names),
})
with session_scope() as db:
db_counts = _review_db_counts(db, campaign_id)
reviewed_n = sum(db_counts.values())
score_counts = {
"good": db_counts.get("good", 0),
"fine": db_counts.get("fine", 0),
"bad": db_counts.get("bad", 0),
"pending": max(0, total - reviewed_n),
}
return {
"items": items, "total": total,
"offset": offset, "limit": limit,
"scores": score_counts,
}
def get_review_image(campaign_id: str, image_rel_path: str) -> bytes:
from as_platform.labeling.annotate import resolve_campaign_batch_dir
from as_platform.db.engine import session_scope
from as_platform.db.models import LabelingCampaign
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("Campaign 不存在")
batch_dir = resolve_campaign_batch_dir(camp)
class_names = _class_names_for_campaign(camp)
if not batch_dir:
raise FileNotFoundError("批次不存在")
img_path = batch_dir / image_rel_path
if not img_path.is_file():
raise FileNotFoundError(f"图片不存在: {image_rel_path}")
return render_review_overlay(img_path, batch_dir, class_names)
def submit_review_scores(
campaign_id: str,
scores: list[dict[str, str]],
reviewer_user_id: int | None = None,
reviewer_name: str | None = None,
) -> dict[str, Any]:
now = datetime.now(timezone.utc)
updated = 0
with session_scope() as db:
for item in scores:
img_path = item["image_path"]
score = item["score"]
rec = db.query(LabelingReview).filter(
LabelingReview.campaign_id == campaign_id,
LabelingReview.image_path == img_path,
).first()
if rec:
rec.score = score
rec.reviewer_user_id = reviewer_user_id
rec.reviewer_name = reviewer_name
rec.reviewed_at = now
rec.comment = item.get("comment")
else:
db.add(LabelingReview(
campaign_id=campaign_id, image_path=img_path, score=score,
reviewer_user_id=reviewer_user_id, reviewer_name=reviewer_name,
reviewed_at=now, comment=item.get("comment"),
))
updated += 1
db.commit()
# Check if all images are reviewed and auto-advance stage
counts = _review_db_counts(db, campaign_id)
from as_platform.labeling.annotate import resolve_campaign_batch_dir
from as_platform.data.batch import IMG_EXTS
from as_platform.db.engine import session_scope as _scope
from as_platform.db.models import LabelingCampaign as _LC
with _scope() as _db:
_camp = _db.get(_LC, campaign_id)
batch_dir = resolve_campaign_batch_dir(_camp) if _camp else None
total_images = 0
if batch_dir and (batch_dir / "images").is_dir():
for ext in IMG_EXTS:
total_images += len(list((batch_dir / "images").rglob(f"*{ext}")))
reviewed = sum(counts.values())
if reviewed >= total_images and total_images > 0:
new_stage = _effective_stage_from_review(
counts.get("good", 0), counts.get("fine", 0), counts.get("bad", 0), total_images,
)
if new_stage and new_stage != "in_review":
raw = "review_approved" if new_stage == "labeling_submitted" else new_stage
_update_campaign_stage(db, campaign_id, raw)
auto_advanced = reviewed >= total_images if total_images > 0 else False
acceptable = counts.get("good", 0) + counts.get("fine", 0) if total_images > 0 else 0
final_stage = None
if auto_advanced and total_images > 0:
eff = _effective_stage_from_review(
counts.get("good", 0), counts.get("fine", 0), counts.get("bad", 0), total_images,
)
final_stage = "review_approved" if eff == "labeling_submitted" else eff
return {
"ok": True,
"updated": updated,
"auto_advanced": auto_advanced,
"stage": final_stage,
}
def _review_db_counts(db, campaign_id: str) -> dict[str, int]:
from sqlalchemy import func
rows = db.query(LabelingReview.score, func.count()).filter(
LabelingReview.campaign_id == campaign_id
).group_by(LabelingReview.score).all()
return {score: cnt for score, cnt in rows}
PASS_RATE_THRESHOLD = 0.8
def _effective_stage_from_review(good: int, fine: int, bad: int, total: int) -> str | None:
"""Return campaign status after QA is complete; None if images remain unreviewed."""
if total <= 0:
return None
reviewed = good + fine + bad
if reviewed < total:
return "in_review"
acceptable = good + fine
approved = acceptable / total >= PASS_RATE_THRESHOLD
return "labeling_submitted" if approved else "review_rejected"
def reconcile_review_stage(campaign_id: str) -> str | None:
"""Align stored campaign stage with current review scores (fixes stale rejections)."""
summary = _review_summary(campaign_id)
if not summary.get("complete"):
return summary.get("stage")
expected = _effective_stage_from_review(
summary["good"], summary["fine"], summary["bad"], summary["total"],
)
if not expected:
return summary.get("stage")
with session_scope() as db:
from as_platform.db.models import LabelingCampaign
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
return None
if camp.status == expected:
return expected
camp.status = expected
from as_platform.labeling.batch_stage import update_campaign_batch_meta_stage
update_campaign_batch_meta_stage(camp, expected)
db.commit()
return expected
def _update_campaign_stage(db, campaign_id: str, new_stage: str) -> None:
from as_platform.db.models import LabelingCampaign
from as_platform.labeling.batch_stage import update_campaign_batch_meta_stage
camp = db.get(LabelingCampaign, campaign_id)
if camp:
effective = "labeling_submitted" if new_stage == "review_approved" else new_stage
camp.status = effective
db.flush()
update_campaign_batch_meta_stage(camp, effective)
def _review_summary(campaign_id: str) -> dict[str, Any]:
from as_platform.labeling.annotate import resolve_campaign_batch_dir
from as_platform.db.models import LabelingCampaign
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
return {"good": 0, "fine": 0, "bad": 0, "pending": 0, "total": 0, "reviewed": 0, "pass_rate": 0, "complete": False, "stage": ""}
batch_dir = resolve_campaign_batch_dir(camp)
stage = camp.status or ""
if not batch_dir or not batch_dir.is_dir():
counts = _review_db_counts(db, campaign_id)
reviewed = sum(counts.values())
return {
**{k: counts.get(k, 0) for k in ("good", "fine", "bad")},
"pending": 0,
"total": reviewed,
"reviewed": reviewed,
"pass_rate": round((counts.get("good", 0) + counts.get("fine", 0)) / max(reviewed, 1) * 100),
"complete": reviewed > 0,
"stage": stage,
}
all_images = _list_review_images(batch_dir)
db_counts = _review_db_counts(db, campaign_id)
total = len(all_images)
good = db_counts.get("good", 0)
fine = db_counts.get("fine", 0)
bad = db_counts.get("bad", 0)
reviewed = good + fine + bad
acceptable = good + fine
return {
"good": good,
"fine": fine,
"bad": bad,
"pending": max(0, total - reviewed),
"total": total,
"reviewed": reviewed,
"pass_rate": round(acceptable / max(total, 1) * 100),
"complete": reviewed >= total and total > 0,
"stage": stage,
}
def review_progress(campaign_id: str) -> dict[str, Any]:
result = _review_summary(campaign_id)
if result.get("complete"):
reconciled = reconcile_review_stage(campaign_id)
if reconciled:
result["stage"] = reconciled
return result
def review_progress_batch(campaign_ids: list[str]) -> dict[str, Any]:
ids = [c.strip() for c in campaign_ids if c and c.strip()][:50]
items: dict[str, Any] = {}
for cid in ids:
items[cid] = review_progress(cid)
return {"items": items}