将 platform + CVAT 合并为单文件 docker-compose.yml,完善 .env 与 init/dev_up 脚本; 新增 docs/DEPLOY.md 与更新 README 以支持新机器部署;含数据湖示例、车队地图、 紧凑表格 UI、ADAS det_7cls 路径与批次台账等近期改动。 Co-authored-by: Cursor <cursoragent@cursor.com>
490 lines
18 KiB
Python
490 lines
18 KiB
Python
"""标注质检 — 逐张审核标注质量(Good/Fine/Bad 评分 + PIL 优化渲染)。"""
|
||
from __future__ import annotations
|
||
|
||
import io
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
from PIL import Image, ImageDraw, ImageFont
|
||
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text
|
||
|
||
from as_platform.data.batch import IMG_EXTS
|
||
from as_platform.db.engine import session_scope
|
||
from as_platform.db.models import Base
|
||
|
||
IMAGE_EXTS = tuple(ext.lower() for ext in IMG_EXTS)
|
||
|
||
# ── PIL font cache ──
|
||
_font_cache: dict[int, ImageFont.FreeTypeFont | ImageFont.ImageFont] = {}
|
||
|
||
def _get_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
|
||
if size not in _font_cache:
|
||
try:
|
||
_font_cache[size] = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", size)
|
||
except Exception:
|
||
try:
|
||
_font_cache[size] = ImageFont.truetype("/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", size)
|
||
except Exception:
|
||
_font_cache[size] = ImageFont.load_default()
|
||
return _font_cache[size]
|
||
|
||
|
||
# ── YOLO bbox utils ──
|
||
|
||
def _parse_yolo_line(line: str) -> dict[str, Any] | None:
|
||
parts = line.strip().split()
|
||
if len(parts) < 5:
|
||
return None
|
||
try:
|
||
return {"class_id": int(float(parts[0])), "bbox": tuple(map(float, parts[1:5]))}
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def _bbox_to_xyxy(bbox: tuple[float, ...], w: int, h: int) -> tuple[int, int, int, int]:
|
||
cx, cy, bw, bh = bbox[:4]
|
||
x1 = int((cx - bw / 2) * w)
|
||
y1 = int((cy - bh / 2) * h)
|
||
x2 = int((cx + bw / 2) * w)
|
||
y2 = int((cy + bh / 2) * h)
|
||
return max(0, x1), max(0, y1), min(w, x2), min(h, y2)
|
||
|
||
|
||
def _parse_labels(label_path: Path) -> list[dict[str, Any]]:
|
||
if not label_path or not label_path.is_file():
|
||
return []
|
||
results = []
|
||
for line in label_path.read_text().strip().splitlines():
|
||
ann = _parse_yolo_line(line)
|
||
if ann and ann["bbox"][2] > 0 and ann["bbox"][3] > 0:
|
||
results.append(ann)
|
||
return results
|
||
|
||
|
||
def _class_names_for_campaign(camp) -> dict[int, str]:
|
||
"""campaign task → class_id → name。"""
|
||
import yaml
|
||
from as_platform.data.core import load_wf, proj_root
|
||
|
||
if not camp or camp.project != "dms":
|
||
return {}
|
||
wf = load_wf()
|
||
root = proj_root(wf, "dms")
|
||
reg = yaml.safe_load((root / wf["projects"]["dms"]["registry"]).read_text(encoding="utf-8")) or {}
|
||
tcfg = (reg.get("tasks") or {}).get(camp.task) or {}
|
||
if camp.mode and tcfg.get("type") == "multi":
|
||
mcfg = (tcfg.get("modes") or {}).get(camp.mode) or {}
|
||
names = mcfg.get("names")
|
||
else:
|
||
names = tcfg.get("names")
|
||
if isinstance(names, list):
|
||
return {i: str(n) for i, n in enumerate(names)}
|
||
if isinstance(names, dict):
|
||
return {int(k): str(v) for k, v in names.items()}
|
||
return {}
|
||
|
||
|
||
def _name_to_class_id(name: str, class_names: dict[int, str]) -> int:
|
||
rev = {v.lower(): k for k, v in class_names.items()}
|
||
return rev.get(name.lower(), 0)
|
||
|
||
|
||
def _resolve_yolo_label_path(batch_dir: Path, img_path: Path) -> Path | None:
|
||
stem = img_path.stem
|
||
for rel in (
|
||
f"labels/{stem}.txt",
|
||
f"labels/train/{stem}.txt",
|
||
f"labels/val/{stem}.txt",
|
||
f"labels/yolo/{stem}.txt",
|
||
):
|
||
p = batch_dir / rel
|
||
if p.is_file():
|
||
return p
|
||
return None
|
||
|
||
|
||
def _parse_ls_annotations(path: Path, class_names: dict[int, str]) -> list[dict[str, Any]]:
|
||
import json
|
||
|
||
try:
|
||
data = json.loads(path.read_text(encoding="utf-8"))
|
||
except (OSError, json.JSONDecodeError):
|
||
return []
|
||
out: list[dict[str, Any]] = []
|
||
for item in data.get("result") or []:
|
||
if item.get("type") not in ("rectanglelabels", "rectangle"):
|
||
continue
|
||
val = item.get("value") or {}
|
||
w_pct = float(val.get("width") or 0)
|
||
h_pct = float(val.get("height") or 0)
|
||
if w_pct <= 0 or h_pct <= 0:
|
||
continue
|
||
x_pct = float(val.get("x") or 0)
|
||
y_pct = float(val.get("y") or 0)
|
||
labels = val.get("rectanglelabels") or val.get("labels") or []
|
||
label = labels[0] if labels else "unknown"
|
||
cid = _name_to_class_id(str(label), class_names)
|
||
cx = (x_pct + w_pct / 2) / 100.0
|
||
cy = (y_pct + h_pct / 2) / 100.0
|
||
out.append({"class_id": cid, "bbox": (cx, cy, w_pct / 100.0, h_pct / 100.0)})
|
||
return out
|
||
|
||
|
||
def _load_image_annotations(
|
||
batch_dir: Path,
|
||
img_path: Path,
|
||
class_names: dict[int, str],
|
||
) -> list[dict[str, Any]]:
|
||
yolo = _resolve_yolo_label_path(batch_dir, img_path)
|
||
if yolo:
|
||
anns = _parse_labels(yolo)
|
||
if anns:
|
||
return anns
|
||
from as_platform.labeling.annotate import _task_id_for_image
|
||
|
||
ann_json = batch_dir / "labels" / "ls_annotations" / f"{_task_id_for_image(img_path, batch_dir)}.json"
|
||
if ann_json.is_file():
|
||
return _parse_ls_annotations(ann_json, class_names)
|
||
return []
|
||
|
||
|
||
def _image_has_labels(batch_dir: Path, img_path: Path, class_names: dict[int, str]) -> bool:
|
||
return bool(_load_image_annotations(batch_dir, img_path, class_names))
|
||
|
||
|
||
def _list_review_images(batch_dir: Path) -> list[Path]:
|
||
from as_platform.labeling.annotate import _iter_batch_images
|
||
|
||
return list(_iter_batch_images(batch_dir))
|
||
|
||
# ── Optimized overlay render ──
|
||
|
||
PALETTE = [(220, 20, 60), (30, 144, 255), (50, 205, 50), (255, 165, 0), (186, 85, 211), (0, 206, 209)]
|
||
|
||
|
||
def render_review_overlay(
|
||
image_path: Path,
|
||
batch_dir: Path,
|
||
class_names: dict[int, str],
|
||
*,
|
||
max_size: int = 800,
|
||
quality: int = 85,
|
||
) -> bytes:
|
||
"""PIL optimized: single pass resize + draw, no copy. Returns JPEG bytes."""
|
||
with Image.open(image_path) as im:
|
||
if im.mode != "RGB":
|
||
im = im.convert("RGB")
|
||
# Resize first for faster drawing
|
||
if max_size and max(im.size) > max_size:
|
||
im.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
|
||
w, h = im.size
|
||
draw = ImageDraw.Draw(im)
|
||
font = _get_font(max(12, min(16, w // 50)))
|
||
line_w = max(1, w // 400)
|
||
|
||
anns = _load_image_annotations(batch_dir, image_path, class_names)
|
||
for ann in anns:
|
||
cid = ann["class_id"]
|
||
color = PALETTE[cid % len(PALETTE)]
|
||
x1, y1, x2, y2 = _bbox_to_xyxy(ann["bbox"], w, h)
|
||
draw.rectangle((x1, y1, x2, y2), outline=color, width=line_w)
|
||
label = class_names.get(cid, f"cls_{cid}")
|
||
draw.text((x1 + 2, max(0, y1 - 16)), label, fill=color, font=font)
|
||
|
||
buf = io.BytesIO()
|
||
im.save(buf, format="JPEG", quality=quality)
|
||
return buf.getvalue()
|
||
|
||
|
||
# ── Quality Review Model ──
|
||
|
||
class LabelingReview(Base):
|
||
__tablename__ = "labeling_reviews"
|
||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||
campaign_id = Column(String(64), nullable=False, index=True)
|
||
image_path = Column(String(512), nullable=False)
|
||
score = Column(String(16), nullable=False, default="pending") # good / fine / bad
|
||
reviewer_user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
|
||
reviewer_name = Column(String(128), nullable=True)
|
||
comment = Column(Text, nullable=True)
|
||
reviewed_at = Column(DateTime(timezone=True), nullable=True)
|
||
|
||
def to_dict(self) -> dict:
|
||
return {
|
||
"id": self.id,
|
||
"campaign_id": self.campaign_id,
|
||
"image_path": self.image_path,
|
||
"score": self.score,
|
||
"reviewer_user_id": self.reviewer_user_id,
|
||
"reviewer_name": self.reviewer_name,
|
||
"comment": self.comment,
|
||
"reviewed_at": self.reviewed_at.isoformat() if self.reviewed_at else None,
|
||
}
|
||
|
||
|
||
# ── Review operations ──
|
||
|
||
def get_review_queue(campaign_id: str, offset: int = 0, limit: int = 20) -> dict[str, Any]:
|
||
from as_platform.labeling.annotate import resolve_campaign_batch_dir
|
||
from as_platform.db.engine import session_scope
|
||
from as_platform.db.models import LabelingCampaign
|
||
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
return {"items": [], "total": 0, "hint": "Campaign 不存在"}
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
class_names = _class_names_for_campaign(camp)
|
||
if not batch_dir or not batch_dir.is_dir():
|
||
return {"items": [], "total": 0, "hint": "批次目录不存在"}
|
||
|
||
all_images = _list_review_images(batch_dir)
|
||
if not all_images:
|
||
return {"items": [], "total": 0, "hint": "无 images 目录"}
|
||
|
||
# Get existing reviews
|
||
with session_scope() as db:
|
||
reviewed = {
|
||
r.image_path: r.score
|
||
for r in db.query(LabelingReview).filter(LabelingReview.campaign_id == campaign_id).all()
|
||
}
|
||
|
||
total = len(all_images)
|
||
page = all_images[offset:offset + limit]
|
||
items = []
|
||
for img in page:
|
||
rel = str(img.relative_to(batch_dir))
|
||
score = reviewed.get(rel, "pending")
|
||
items.append({
|
||
"id": rel, "image_path": rel,
|
||
"fileName": img.name,
|
||
"score": score,
|
||
"has_label": _image_has_labels(batch_dir, img, class_names),
|
||
})
|
||
|
||
with session_scope() as db:
|
||
db_counts = _review_db_counts(db, campaign_id)
|
||
reviewed_n = sum(db_counts.values())
|
||
score_counts = {
|
||
"good": db_counts.get("good", 0),
|
||
"fine": db_counts.get("fine", 0),
|
||
"bad": db_counts.get("bad", 0),
|
||
"pending": max(0, total - reviewed_n),
|
||
}
|
||
|
||
return {
|
||
"items": items, "total": total,
|
||
"offset": offset, "limit": limit,
|
||
"scores": score_counts,
|
||
}
|
||
|
||
|
||
def get_review_image(campaign_id: str, image_rel_path: str) -> bytes:
|
||
from as_platform.labeling.annotate import resolve_campaign_batch_dir
|
||
from as_platform.db.engine import session_scope
|
||
from as_platform.db.models import LabelingCampaign
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
raise FileNotFoundError("Campaign 不存在")
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
class_names = _class_names_for_campaign(camp)
|
||
if not batch_dir:
|
||
raise FileNotFoundError("批次不存在")
|
||
img_path = batch_dir / image_rel_path
|
||
if not img_path.is_file():
|
||
raise FileNotFoundError(f"图片不存在: {image_rel_path}")
|
||
return render_review_overlay(img_path, batch_dir, class_names)
|
||
|
||
|
||
def submit_review_scores(
|
||
campaign_id: str,
|
||
scores: list[dict[str, str]],
|
||
reviewer_user_id: int | None = None,
|
||
reviewer_name: str | None = None,
|
||
) -> dict[str, Any]:
|
||
now = datetime.now(timezone.utc)
|
||
updated = 0
|
||
with session_scope() as db:
|
||
for item in scores:
|
||
img_path = item["image_path"]
|
||
score = item["score"]
|
||
rec = db.query(LabelingReview).filter(
|
||
LabelingReview.campaign_id == campaign_id,
|
||
LabelingReview.image_path == img_path,
|
||
).first()
|
||
if rec:
|
||
rec.score = score
|
||
rec.reviewer_user_id = reviewer_user_id
|
||
rec.reviewer_name = reviewer_name
|
||
rec.reviewed_at = now
|
||
rec.comment = item.get("comment")
|
||
else:
|
||
db.add(LabelingReview(
|
||
campaign_id=campaign_id, image_path=img_path, score=score,
|
||
reviewer_user_id=reviewer_user_id, reviewer_name=reviewer_name,
|
||
reviewed_at=now, comment=item.get("comment"),
|
||
))
|
||
updated += 1
|
||
db.commit()
|
||
|
||
# Check if all images are reviewed and auto-advance stage
|
||
counts = _review_db_counts(db, campaign_id)
|
||
from as_platform.labeling.annotate import resolve_campaign_batch_dir
|
||
from as_platform.data.batch import IMG_EXTS
|
||
from as_platform.db.engine import session_scope as _scope
|
||
from as_platform.db.models import LabelingCampaign as _LC
|
||
with _scope() as _db:
|
||
_camp = _db.get(_LC, campaign_id)
|
||
batch_dir = resolve_campaign_batch_dir(_camp) if _camp else None
|
||
total_images = 0
|
||
if batch_dir and (batch_dir / "images").is_dir():
|
||
for ext in IMG_EXTS:
|
||
total_images += len(list((batch_dir / "images").rglob(f"*{ext}")))
|
||
|
||
reviewed = sum(counts.values())
|
||
if reviewed >= total_images and total_images > 0:
|
||
new_stage = _effective_stage_from_review(
|
||
counts.get("good", 0), counts.get("fine", 0), counts.get("bad", 0), total_images,
|
||
)
|
||
if new_stage and new_stage != "in_review":
|
||
raw = "review_approved" if new_stage == "labeling_submitted" else new_stage
|
||
_update_campaign_stage(db, campaign_id, raw)
|
||
|
||
auto_advanced = reviewed >= total_images if total_images > 0 else False
|
||
acceptable = counts.get("good", 0) + counts.get("fine", 0) if total_images > 0 else 0
|
||
final_stage = None
|
||
if auto_advanced and total_images > 0:
|
||
eff = _effective_stage_from_review(
|
||
counts.get("good", 0), counts.get("fine", 0), counts.get("bad", 0), total_images,
|
||
)
|
||
final_stage = "review_approved" if eff == "labeling_submitted" else eff
|
||
return {
|
||
"ok": True,
|
||
"updated": updated,
|
||
"auto_advanced": auto_advanced,
|
||
"stage": final_stage,
|
||
}
|
||
|
||
|
||
def _review_db_counts(db, campaign_id: str) -> dict[str, int]:
|
||
from sqlalchemy import func
|
||
rows = db.query(LabelingReview.score, func.count()).filter(
|
||
LabelingReview.campaign_id == campaign_id
|
||
).group_by(LabelingReview.score).all()
|
||
return {score: cnt for score, cnt in rows}
|
||
|
||
|
||
PASS_RATE_THRESHOLD = 0.8
|
||
|
||
|
||
def _effective_stage_from_review(good: int, fine: int, bad: int, total: int) -> str | None:
|
||
"""Return campaign status after QA is complete; None if images remain unreviewed."""
|
||
if total <= 0:
|
||
return None
|
||
reviewed = good + fine + bad
|
||
if reviewed < total:
|
||
return "in_review"
|
||
acceptable = good + fine
|
||
approved = acceptable / total >= PASS_RATE_THRESHOLD
|
||
return "labeling_submitted" if approved else "review_rejected"
|
||
|
||
|
||
def reconcile_review_stage(campaign_id: str) -> str | None:
|
||
"""Align stored campaign stage with current review scores (fixes stale rejections)."""
|
||
summary = _review_summary(campaign_id)
|
||
if not summary.get("complete"):
|
||
return summary.get("stage")
|
||
expected = _effective_stage_from_review(
|
||
summary["good"], summary["fine"], summary["bad"], summary["total"],
|
||
)
|
||
if not expected:
|
||
return summary.get("stage")
|
||
with session_scope() as db:
|
||
from as_platform.db.models import LabelingCampaign
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
return None
|
||
if camp.status == expected:
|
||
return expected
|
||
camp.status = expected
|
||
from as_platform.labeling.batch_stage import update_campaign_batch_meta_stage
|
||
update_campaign_batch_meta_stage(camp, expected)
|
||
db.commit()
|
||
return expected
|
||
|
||
|
||
def _update_campaign_stage(db, campaign_id: str, new_stage: str) -> None:
|
||
from as_platform.db.models import LabelingCampaign
|
||
from as_platform.labeling.batch_stage import update_campaign_batch_meta_stage
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if camp:
|
||
effective = "labeling_submitted" if new_stage == "review_approved" else new_stage
|
||
camp.status = effective
|
||
db.flush()
|
||
update_campaign_batch_meta_stage(camp, effective)
|
||
|
||
|
||
def _review_summary(campaign_id: str) -> dict[str, Any]:
|
||
from as_platform.labeling.annotate import resolve_campaign_batch_dir
|
||
from as_platform.db.models import LabelingCampaign
|
||
|
||
with session_scope() as db:
|
||
camp = db.get(LabelingCampaign, campaign_id)
|
||
if not camp:
|
||
return {"good": 0, "fine": 0, "bad": 0, "pending": 0, "total": 0, "reviewed": 0, "pass_rate": 0, "complete": False, "stage": ""}
|
||
batch_dir = resolve_campaign_batch_dir(camp)
|
||
stage = camp.status or ""
|
||
if not batch_dir or not batch_dir.is_dir():
|
||
counts = _review_db_counts(db, campaign_id)
|
||
reviewed = sum(counts.values())
|
||
return {
|
||
**{k: counts.get(k, 0) for k in ("good", "fine", "bad")},
|
||
"pending": 0,
|
||
"total": reviewed,
|
||
"reviewed": reviewed,
|
||
"pass_rate": round((counts.get("good", 0) + counts.get("fine", 0)) / max(reviewed, 1) * 100),
|
||
"complete": reviewed > 0,
|
||
"stage": stage,
|
||
}
|
||
|
||
all_images = _list_review_images(batch_dir)
|
||
db_counts = _review_db_counts(db, campaign_id)
|
||
|
||
total = len(all_images)
|
||
good = db_counts.get("good", 0)
|
||
fine = db_counts.get("fine", 0)
|
||
bad = db_counts.get("bad", 0)
|
||
reviewed = good + fine + bad
|
||
acceptable = good + fine
|
||
return {
|
||
"good": good,
|
||
"fine": fine,
|
||
"bad": bad,
|
||
"pending": max(0, total - reviewed),
|
||
"total": total,
|
||
"reviewed": reviewed,
|
||
"pass_rate": round(acceptable / max(total, 1) * 100),
|
||
"complete": reviewed >= total and total > 0,
|
||
"stage": stage,
|
||
}
|
||
|
||
|
||
def review_progress(campaign_id: str) -> dict[str, Any]:
|
||
result = _review_summary(campaign_id)
|
||
if result.get("complete"):
|
||
reconciled = reconcile_review_stage(campaign_id)
|
||
if reconciled:
|
||
result["stage"] = reconciled
|
||
return result
|
||
|
||
|
||
def review_progress_batch(campaign_ids: list[str]) -> dict[str, Any]:
|
||
ids = [c.strip() for c in campaign_ids if c and c.strip()][:50]
|
||
items: dict[str, Any] = {}
|
||
for cid in ids:
|
||
items[cid] = review_progress(cid)
|
||
return {"items": items}
|