Replace subprocess build with promote_batch SDK, add ADAS cuboid export/fit/validate pipeline, stage normalization, and offline unit tests wired into smoke_labeling_api. Co-authored-by: Cursor <cursoragent@cursor.com>
288 lines
11 KiB
Python
288 lines
11 KiB
Python
"""标注质检 — 逐张审核标注质量(Good/Fine/Bad 评分 + PIL 优化渲染)。"""
|
||
from __future__ import annotations
|
||
|
||
import io
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
from PIL import Image, ImageDraw, ImageFont
|
||
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text
|
||
|
||
from as_platform.data.batch import IMG_EXTS
|
||
from as_platform.db.engine import session_scope
|
||
from as_platform.db.models import Base
|
||
|
||
IMAGE_EXTS = tuple(ext.lower() for ext in IMG_EXTS)
|
||
|
||
# ── PIL font cache ──
|
||
_font_cache: dict[int, ImageFont.FreeTypeFont | ImageFont.ImageFont] = {}
|
||
|
||
def _get_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
|
||
if size not in _font_cache:
|
||
try:
|
||
_font_cache[size] = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", size)
|
||
except Exception:
|
||
try:
|
||
_font_cache[size] = ImageFont.truetype("/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", size)
|
||
except Exception:
|
||
_font_cache[size] = ImageFont.load_default()
|
||
return _font_cache[size]
|
||
|
||
|
||
# ── YOLO bbox utils ──
|
||
|
||
def _parse_yolo_line(line: str) -> dict[str, Any] | None:
|
||
parts = line.strip().split()
|
||
if len(parts) < 5:
|
||
return None
|
||
try:
|
||
return {"class_id": int(float(parts[0])), "bbox": tuple(map(float, parts[1:5]))}
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def _bbox_to_xyxy(bbox: tuple[float, ...], w: int, h: int) -> tuple[int, int, int, int]:
|
||
cx, cy, bw, bh = bbox[:4]
|
||
x1 = int((cx - bw / 2) * w)
|
||
y1 = int((cy - bh / 2) * h)
|
||
x2 = int((cx + bw / 2) * w)
|
||
y2 = int((cy + bh / 2) * h)
|
||
return max(0, x1), max(0, y1), min(w, x2), min(h, y2)
|
||
|
||
|
||
def _parse_labels(label_path: Path) -> list[dict[str, Any]]:
|
||
if not label_path or not label_path.is_file():
|
||
return []
|
||
results = []
|
||
for line in label_path.read_text().strip().splitlines():
|
||
ann = _parse_yolo_line(line)
|
||
if ann:
|
||
results.append(ann)
|
||
return results
|
||
|
||
|
||
# ── Optimized overlay render ──
|
||
|
||
PALETTE = [(220, 20, 60), (30, 144, 255), (50, 205, 50), (255, 165, 0), (186, 85, 211), (0, 206, 209)]
|
||
|
||
|
||
def render_review_overlay(
|
||
image_path: Path,
|
||
label_path: Path | None,
|
||
class_names: dict[int, str],
|
||
*,
|
||
max_size: int = 800,
|
||
quality: int = 85,
|
||
) -> bytes:
|
||
"""PIL optimized: single pass resize + draw, no copy. Returns JPEG bytes."""
|
||
with Image.open(image_path) as im:
|
||
if im.mode != "RGB":
|
||
im = im.convert("RGB")
|
||
# Resize first for faster drawing
|
||
if max_size and max(im.size) > max_size:
|
||
im.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
|
||
w, h = im.size
|
||
draw = ImageDraw.Draw(im)
|
||
font = _get_font(max(12, min(16, w // 50)))
|
||
line_w = max(1, w // 400)
|
||
|
||
anns = _parse_labels(label_path) if label_path else []
|
||
for ann in anns:
|
||
cid = ann["class_id"]
|
||
color = PALETTE[cid % len(PALETTE)]
|
||
x1, y1, x2, y2 = _bbox_to_xyxy(ann["bbox"], w, h)
|
||
draw.rectangle((x1, y1, x2, y2), outline=color, width=line_w)
|
||
label = class_names.get(cid, f"cls_{cid}")
|
||
draw.text((x1 + 2, max(0, y1 - 16)), label, fill=color, font=font)
|
||
|
||
buf = io.BytesIO()
|
||
im.save(buf, format="JPEG", quality=quality)
|
||
return buf.getvalue()
|
||
|
||
|
||
# ── Quality Review Model ──
|
||
|
||
class LabelingReview(Base):
|
||
__tablename__ = "labeling_reviews"
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
campaign_id = Column(String(64), nullable=False, index=True)
|
||
image_path = Column(String(512), nullable=False)
|
||
score = Column(String(16), nullable=False, default="pending") # good / fine / bad
|
||
reviewer_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
|
||
reviewer_name = Column(String(128), nullable=True)
|
||
comment = Column(Text, nullable=True)
|
||
reviewed_at = Column(DateTime(timezone=True), nullable=True)
|
||
|
||
def to_dict(self) -> dict:
|
||
return {
|
||
"id": self.id,
|
||
"campaign_id": self.campaign_id,
|
||
"image_path": self.image_path,
|
||
"score": self.score,
|
||
"reviewer_user_id": self.reviewer_user_id,
|
||
"reviewer_name": self.reviewer_name,
|
||
"comment": self.comment,
|
||
"reviewed_at": self.reviewed_at.isoformat() if self.reviewed_at else None,
|
||
}
|
||
|
||
|
||
# ── Review operations ──
|
||
|
||
def get_review_queue(campaign_id: str, offset: int = 0, limit: int = 20) -> dict[str, Any]:
|
||
from as_platform.labeling.annotate import resolve_campaign_batch_dir
|
||
from as_platform.db.engine import session_scope
|
||
from as_platform.db.models import LabelingCampaign
|
||
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
return {"items": [], "total": 0, "hint": "Campaign 不存在"}
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
if not batch_dir or not batch_dir.is_dir():
|
||
return {"items": [], "total": 0, "hint": "批次目录不存在"}
|
||
|
||
img_dir = batch_dir / "images"
|
||
if not img_dir.is_dir():
|
||
return {"items": [], "total": 0, "hint": "无 images 目录"}
|
||
|
||
all_images: list[Path] = []
|
||
for ext in IMAGE_EXTS:
|
||
all_images.extend(sorted(img_dir.rglob(f"*{ext}")))
|
||
|
||
# Get existing reviews
|
||
with session_scope() as db:
|
||
reviewed = {
|
||
r.image_path: r.score
|
||
for r in db.query(LabelingReview).filter(LabelingReview.campaign_id == campaign_id).all()
|
||
}
|
||
|
||
total = len(all_images)
|
||
page = all_images[offset:offset + limit]
|
||
score_counts = {"good": 0, "fine": 0, "bad": 0, "pending": 0}
|
||
items = []
|
||
for img in page:
|
||
rel = str(img.relative_to(batch_dir))
|
||
score = reviewed.get(rel, "pending")
|
||
score_counts[score] += 1
|
||
label_path = batch_dir / "labels" / (img.stem + ".txt")
|
||
items.append({
|
||
"id": rel, "image_path": rel,
|
||
"fileName": img.name,
|
||
"score": score,
|
||
"has_label": label_path.is_file(),
|
||
})
|
||
|
||
# Fill remaining counts
|
||
for img in all_images:
|
||
rel = str(img.relative_to(batch_dir))
|
||
s = reviewed.get(rel, "pending")
|
||
if s not in score_counts:
|
||
score_counts[s] = 0
|
||
|
||
return {
|
||
"items": items, "total": total,
|
||
"offset": offset, "limit": limit,
|
||
"scores": score_counts,
|
||
}
|
||
|
||
|
||
def get_review_image(campaign_id: str, image_rel_path: str, class_names: dict[int, str]) -> bytes:
|
||
from as_platform.labeling.annotate import resolve_campaign_batch_dir
|
||
from as_platform.db.engine import session_scope
|
||
from as_platform.db.models import LabelingCampaign
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
raise FileNotFoundError("Campaign 不存在")
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
if not batch_dir:
|
||
raise FileNotFoundError("批次不存在")
|
||
img_path = batch_dir / image_rel_path
|
||
lbl_path = batch_dir / "labels" / (img_path.stem + ".txt")
|
||
return render_review_overlay(img_path, lbl_path if lbl_path.is_file() else None, class_names)
|
||
|
||
|
||
def submit_review_scores(
|
||
campaign_id: str,
|
||
scores: list[dict[str, str]],
|
||
reviewer_user_id: int | None = None,
|
||
reviewer_name: str | None = None,
|
||
) -> dict[str, Any]:
|
||
now = datetime.now(timezone.utc)
|
||
updated = 0
|
||
with session_scope() as db:
|
||
for item in scores:
|
||
img_path = item["image_path"]
|
||
score = item["score"]
|
||
rec = db.query(LabelingReview).filter(
|
||
LabelingReview.campaign_id == campaign_id,
|
||
LabelingReview.image_path == img_path,
|
||
).first()
|
||
if rec:
|
||
rec.score = score
|
||
rec.reviewer_user_id = reviewer_user_id
|
||
rec.reviewer_name = reviewer_name
|
||
rec.reviewed_at = now
|
||
rec.comment = item.get("comment")
|
||
else:
|
||
db.add(LabelingReview(
|
||
campaign_id=campaign_id, image_path=img_path, score=score,
|
||
reviewer_user_id=reviewer_user_id, reviewer_name=reviewer_name,
|
||
reviewed_at=now, comment=item.get("comment"),
|
||
))
|
||
updated += 1
|
||
db.commit()
|
||
|
||
# Check if all images are reviewed and auto-advance stage
|
||
counts = _review_db_counts(db, campaign_id)
|
||
from as_platform.labeling.annotate import resolve_campaign_batch_dir
|
||
from as_platform.data.batch import IMG_EXTS
|
||
from as_platform.db.engine import session_scope as _scope
|
||
from as_platform.db.models import LabelingCampaign as _LC
|
||
with _scope() as _db:
|
||
_camp = _db.get(_LC, campaign_id)
|
||
batch_dir = resolve_campaign_batch_dir(_camp) if _camp else None
|
||
total_images = 0
|
||
if batch_dir and (batch_dir / "images").is_dir():
|
||
for ext in IMG_EXTS:
|
||
total_images += len(list((batch_dir / "images").rglob(f"*{ext}")))
|
||
|
||
reviewed = sum(counts.values())
|
||
if reviewed >= total_images and total_images > 0:
|
||
pass_rate = counts.get("good", 0) / max(total_images, 1)
|
||
new_stage = "review_approved" if pass_rate >= 0.8 else "review_rejected"
|
||
_update_campaign_stage(db, campaign_id, new_stage)
|
||
|
||
return {"ok": True, "updated": updated, "auto_advanced": reviewed >= total_images if total_images > 0 else False}
|
||
|
||
|
||
def _review_db_counts(db, campaign_id: str) -> dict[str, int]:
|
||
from sqlalchemy import func
|
||
from collections import Counter
|
||
rows = db.query(LabelingReview.score, func.count()).filter(
|
||
LabelingReview.campaign_id == campaign_id
|
||
).group_by(LabelingReview.score).all()
|
||
return {score: cnt for score, cnt in rows}
|
||
|
||
|
||
def _update_campaign_stage(db, campaign_id: str, new_stage: str) -> None:
|
||
from as_platform.db.models import LabelingCampaign
|
||
from as_platform.labeling.batch_stage import update_campaign_batch_meta_stage
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if camp:
|
||
effective = "labeling_submitted" if new_stage == "review_approved" else new_stage
|
||
camp.status = effective
|
||
db.flush()
|
||
update_campaign_batch_meta_stage(camp, effective)
|
||
|
||
|
||
def review_progress(campaign_id: str) -> dict[str, int]:
|
||
with session_scope() as db:
|
||
rows = db.query(LabelingReview).filter(LabelingReview.campaign_id == campaign_id).all()
|
||
counts = {"good": 0, "fine": 0, "bad": 0, "pending": 0}
|
||
for r in rows:
|
||
counts[r.score] = counts.get(r.score, 0) + 1
|
||
return counts
|