Files
HSAP/platform/as_platform/labeling/progress.py

564 lines
19 KiB
Python
Raw Normal View History

"""标注进度统计与按人分包。"""
from __future__ import annotations
import json
from collections import defaultdict
from datetime import datetime, timezone
from typing import Any
from as_platform.db.engine import session_scope
from as_platform.db.models import LabelingCampaign, LabelingTaskAssignment, User
from as_platform.labeling.annotate import (
_annotations_dir,
_iter_batch_images,
_task_id_for_image,
resolve_campaign_batch_dir,
)
COORDINATOR_ROLES = frozenset({"labeler", "admin", "engineer", "reviewer"})
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
def user_role_codes(user: User) -> set[str]:
return {r.code for r in (user.roles or [])}
def user_is_coordinator(user: User) -> bool:
codes = user_role_codes(user)
if codes & COORDINATOR_ROLES:
return True
perms: set[str] = set()
for r in user.roles or []:
for p in r.permissions or []:
if p.code:
perms.add(p.code)
return "*" in perms or "write:labeling_assign" in perms
def list_campaign_task_ids(campaign_id: str) -> list[str]:
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
batch_dir = resolve_campaign_batch_dir(camp)
images = _iter_batch_images(batch_dir)
return [_task_id_for_image(img, batch_dir) for img in images]
def _annotation_has_result(path) -> bool:
if not path.is_file():
return False
try:
data = json.loads(path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return False
result = data.get("result")
if result is None:
return False
if isinstance(result, list):
return len(result) > 0
if isinstance(result, dict):
return len(result) > 0
return bool(result)
def count_completed_tasks(batch_dir) -> set[str]:
ann_dir = _annotations_dir(batch_dir)
done: set[str] = set()
if not ann_dir.is_dir():
return done
for p in ann_dir.glob("*.json"):
if _annotation_has_result(p):
done.add(p.stem)
return done
def _user_name_map(db, user_ids: set[int]) -> dict[int, str]:
if not user_ids:
return {}
rows = db.query(User).filter(User.id.in_(user_ids)).all()
return {u.id: u.name or f"user-{u.id}" for u in rows}
def campaign_progress(campaign_id: str) -> dict[str, Any]:
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
batch_dir = resolve_campaign_batch_dir(camp)
all_ids = [_task_id_for_image(img, batch_dir) for img in _iter_batch_images(batch_dir)]
completed_ids = count_completed_tasks(batch_dir)
rows = (
db.query(LabelingTaskAssignment)
.filter(LabelingTaskAssignment.campaign_id == campaign_id)
.all()
)
user_ids = {r.user_id for r in rows} | {r.completed_by_user_id for r in rows if r.completed_by_user_id}
names = _user_name_map(db, user_ids)
assignment_rows = [
{
"user_id": r.user_id,
"task_id": r.task_id,
"completed_at": r.completed_at,
}
for r in rows
]
total = len(all_ids)
completed = len(completed_ids)
assigned = len(assignment_rows)
by_user_agg: dict[int, dict[str, int]] = defaultdict(lambda: {"assigned": 0, "completed": 0})
for r in assignment_rows:
by_user_agg[r["user_id"]]["assigned"] += 1
if r["completed_at"] or r["task_id"] in completed_ids:
by_user_agg[r["user_id"]]["completed"] += 1
by_user = []
for uid, stats in sorted(by_user_agg.items(), key=lambda x: names.get(x[0], "")):
a = stats["assigned"]
c = stats["completed"]
by_user.append(
{
"user_id": uid,
"name": names.get(uid, f"user-{uid}"),
"assigned": a,
"completed": c,
"percent": round(100.0 * c / a, 1) if a else 0.0,
}
)
return {
"campaign_id": campaign_id,
"total_tasks": total,
"completed_tasks": completed,
"assigned_tasks": assigned,
"unassigned_tasks": max(0, total - assigned),
"percent": round(100.0 * completed / total, 1) if total else 0.0,
"by_user": by_user,
}
def campaign_progress_summary(campaign_id: str) -> dict[str, int]:
try:
p = campaign_progress(campaign_id)
return {
"total_tasks": p["total_tasks"],
"completed_tasks": p["completed_tasks"],
"assigned_tasks": p["assigned_tasks"],
}
except FileNotFoundError:
return {"total_tasks": 0, "completed_tasks": 0, "assigned_tasks": 0}
def get_assigned_task_ids(campaign_id: str, user_id: int | None = None) -> set[str]:
with session_scope() as db:
q = db.query(LabelingTaskAssignment.task_id).filter(
LabelingTaskAssignment.campaign_id == campaign_id
)
if user_id is not None:
q = q.filter(LabelingTaskAssignment.user_id == user_id)
return {row[0] for row in q.all()}
def _assign_result(
campaign_id: str,
created: int,
notifications: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
prog = campaign_progress(campaign_id)
return {
"assigned": created,
"by_user": prog["by_user"],
"progress": prog,
"notifications": notifications or [],
}
def _build_notify_payload(
assigned_counts: dict[int, int],
user_map: dict[int, User],
camp: LabelingCampaign | None,
campaign_id: str,
) -> tuple[str, str, list[dict[str, Any]]]:
"""在 session 内提取通知所需字段,避免 DetachedInstanceError。"""
task = (camp.task if camp else "") or ""
batch = (camp.batch if camp else "") or campaign_id
targets: list[dict[str, Any]] = []
for uid, count in assigned_counts.items():
if count <= 0:
continue
u = user_map.get(uid)
if not u or not u.feishu_open_id:
continue
targets.append({
"open_id": u.feishu_open_id,
"name": u.name or f"user-{uid}",
"count": count,
})
return task, batch, targets
def _notify_assignees(
campaign_id: str,
task: str,
batch: str,
targets: list[dict[str, Any]],
) -> list[dict[str, Any]]:
if not targets:
return []
from as_platform.integrations.feishu_notify import notify_labeling_assignment
results: list[dict[str, Any]] = []
for t in targets:
r = notify_labeling_assignment(
open_id=t["open_id"],
assignee_name=t["name"],
task=task,
batch=batch,
count=t["count"],
campaign_id=campaign_id,
)
results.append(r)
return results
def assign_tasks_even(
campaign_id: str,
user_ids: list[int],
*,
assigned_by_user_id: int,
) -> dict[str, Any]:
if not user_ids:
raise ValueError("user_ids 不能为空")
now = _utcnow()
all_ids = list_campaign_task_ids(campaign_id)
with session_scope() as db:
existing = {
r.task_id
for r in db.query(LabelingTaskAssignment)
.filter(LabelingTaskAssignment.campaign_id == campaign_id)
.all()
}
unassigned = [tid for tid in all_ids if tid not in existing]
users = db.query(User).filter(User.id.in_(user_ids)).all()
if len(users) != len(set(user_ids)):
raise ValueError("存在无效 user_id")
user_map = {u.id: u for u in users}
camp = db.get(LabelingCampaign, campaign_id)
created = 0
assigned_counts: dict[int, int] = defaultdict(int)
for i, tid in enumerate(unassigned):
uid = user_ids[i % len(user_ids)]
db.add(
LabelingTaskAssignment(
campaign_id=campaign_id,
task_id=tid,
user_id=uid,
assigned_by_user_id=assigned_by_user_id,
assigned_at=now,
)
)
created += 1
assigned_counts[uid] += 1
db.flush()
task, batch, notify_targets = _build_notify_payload(
dict(assigned_counts), user_map, camp, campaign_id
)
notifications = _notify_assignees(campaign_id, task, batch, notify_targets)
return _assign_result(campaign_id, created, notifications)
def assign_tasks_quantized(
campaign_id: str,
items: list[dict[str, Any]],
*,
assigned_by_user_id: int,
) -> dict[str, Any]:
"""按指定数量分配未分配任务。
items: [{"user_id": 1, "count": 10}, ...]
每个用户从剩余未分配任务中分配最多 count
"""
if not items:
raise ValueError("items 不能为空")
now = _utcnow()
all_ids = list_campaign_task_ids(campaign_id)
with session_scope() as db:
existing = {
r.task_id
for r in db.query(LabelingTaskAssignment)
.filter(LabelingTaskAssignment.campaign_id == campaign_id)
.all()
}
unassigned = [tid for tid in all_ids if tid not in existing]
user_ids = [int(item["user_id"]) for item in items]
users = db.query(User).filter(User.id.in_(user_ids)).all()
user_map = {u.id: u for u in users}
for uid in user_ids:
if uid not in user_map:
raise ValueError(f"用户不存在: {uid}")
camp = db.get(LabelingCampaign, campaign_id)
created = 0
assigned_counts: dict[int, int] = defaultdict(int)
unassigned_iter = iter(unassigned)
for item in items:
uid = int(item["user_id"])
count = int(item.get("count", 0))
for _ in range(count):
try:
tid = next(unassigned_iter)
except StopIteration:
break # 没有更多未分配任务了
db.add(
LabelingTaskAssignment(
campaign_id=campaign_id,
task_id=tid,
user_id=uid,
assigned_by_user_id=assigned_by_user_id,
assigned_at=now,
)
)
created += 1
assigned_counts[uid] += 1
db.flush()
task, batch, notify_targets = _build_notify_payload(
dict(assigned_counts), user_map, camp, campaign_id
)
notifications = _notify_assignees(campaign_id, task, batch, notify_targets)
return _assign_result(campaign_id, created, notifications)
def assign_tasks_explicit(
campaign_id: str,
items: list[dict[str, Any]],
*,
assigned_by_user_id: int,
) -> dict[str, Any]:
now = _utcnow()
all_set = set(list_campaign_task_ids(campaign_id))
created = 0
assigned_counts: dict[int, int] = defaultdict(int)
user_map: dict[int, User] = {}
camp: LabelingCampaign | None = None
with session_scope() as db:
existing = {
r.task_id
for r in db.query(LabelingTaskAssignment)
.filter(LabelingTaskAssignment.campaign_id == campaign_id)
.all()
}
camp = db.get(LabelingCampaign, campaign_id)
for item in items:
uid = int(item["user_id"])
user = db.get(User, uid)
if not user:
raise ValueError(f"用户不存在: {uid}")
user_map[uid] = user
for tid in item.get("task_ids") or []:
if tid not in all_set:
raise ValueError(f"无效 task_id: {tid}")
if tid in existing:
continue
db.add(
LabelingTaskAssignment(
campaign_id=campaign_id,
task_id=tid,
user_id=uid,
assigned_by_user_id=assigned_by_user_id,
assigned_at=now,
)
)
existing.add(tid)
created += 1
assigned_counts[uid] += 1
db.flush()
task, batch, notify_targets = _build_notify_payload(
dict(assigned_counts), user_map, camp, campaign_id
)
notifications = _notify_assignees(campaign_id, task, batch, notify_targets)
return _assign_result(campaign_id, created, notifications)
def reassign_task(campaign_id: str, task_id: str, user_id: int) -> dict[str, Any]:
with session_scope() as db:
row = (
db.query(LabelingTaskAssignment)
.filter(
LabelingTaskAssignment.campaign_id == campaign_id,
LabelingTaskAssignment.task_id == task_id,
)
.first()
)
if not row:
raise FileNotFoundError("assignment not found")
user = db.get(User, user_id)
if not user:
raise ValueError(f"用户不存在: {user_id}")
row.user_id = user_id
db.flush()
return row.to_dict()
def release_task_assignment(campaign_id: str, task_id: str) -> dict[str, Any]:
with session_scope() as db:
row = (
db.query(LabelingTaskAssignment)
.filter(
LabelingTaskAssignment.campaign_id == campaign_id,
LabelingTaskAssignment.task_id == task_id,
)
.first()
)
if not row:
raise FileNotFoundError("assignment not found")
db.delete(row)
db.flush()
return {"ok": True, "released": task_id}
def assert_can_save_task(campaign_id: str, task_id: str, user: User) -> None:
if user_is_coordinator(user):
return
codes = user_role_codes(user)
if "vendor_labeler" in codes:
return
with session_scope() as db:
row = (
db.query(LabelingTaskAssignment)
.filter(
LabelingTaskAssignment.campaign_id == campaign_id,
LabelingTaskAssignment.task_id == task_id,
)
.first()
)
if not row:
raise PermissionError("该图未分配给您,请联系协调员分包")
if row.user_id != user.id:
raise PermissionError("该图已分配给其他标注员")
def mark_task_completed(campaign_id: str, task_id: str, user_id: int) -> None:
now = _utcnow()
with session_scope() as db:
row = (
db.query(LabelingTaskAssignment)
.filter(
LabelingTaskAssignment.campaign_id == campaign_id,
LabelingTaskAssignment.task_id == task_id,
)
.first()
)
if row and not row.completed_at:
row.completed_at = now
row.completed_by_user_id = user_id
def list_my_assignments(user_id: int) -> dict[str, Any]:
"""当前用户跨 Campaign 的分配汇总(我的标注收件箱)。"""
from as_platform.config import FRONTEND_URL
items: list[dict[str, Any]] = []
total_pending = 0
base = FRONTEND_URL.rstrip("/")
with session_scope() as db:
rows = (
db.query(LabelingTaskAssignment)
.filter(LabelingTaskAssignment.user_id == user_id)
.all()
)
by_campaign: dict[str, list[dict[str, Any]]] = defaultdict(list)
for r in rows:
by_campaign[r.campaign_id].append({
"task_id": r.task_id,
"completed_at": r.completed_at,
})
for cid, assigns in by_campaign.items():
camp = db.get(LabelingCampaign, cid)
if not camp:
continue
batch_dir = resolve_campaign_batch_dir(camp)
completed_ids = count_completed_tasks(batch_dir)
all_ids = [_task_id_for_image(img, batch_dir) for img in _iter_batch_images(batch_dir)]
assigned = len(assigns)
completed = sum(
1 for a in assigns
if a["completed_at"] or a["task_id"] in completed_ids
)
pending = max(0, assigned - completed)
total_pending += pending
items.append({
"campaign_id": cid,
"batch": camp.batch,
"task": camp.task,
"project": camp.project,
"status": camp.status,
"assigned": assigned,
"completed": completed,
"pending": pending,
"campaign_total": len(all_ids),
"annotate_url": f"{base}/labeling/annotate/{cid}",
})
items.sort(key=lambda x: (-x["pending"], x.get("batch") or ""))
return {"items": items, "total_pending": total_pending}
def campaign_my_tasks(campaign_id: str, user_id: int) -> dict[str, Any]:
"""当前用户在指定 Campaign 下的逐张任务清单。"""
with session_scope() as db:
camp = db.get(LabelingCampaign, campaign_id)
if not camp:
raise FileNotFoundError("campaign not found")
batch_dir = resolve_campaign_batch_dir(camp)
images = list(_iter_batch_images(batch_dir))
completed_ids = count_completed_tasks(batch_dir)
task_id_to_index = {
_task_id_for_image(img, batch_dir): i for i, img in enumerate(images)
}
rows = (
db.query(LabelingTaskAssignment)
.filter(
LabelingTaskAssignment.campaign_id == campaign_id,
LabelingTaskAssignment.user_id == user_id,
)
.all()
)
assignment_map = {r.task_id: r.completed_at for r in rows}
items: list[dict[str, Any]] = []
for task_id, completed_at in assignment_map.items():
idx = task_id_to_index.get(task_id)
filename = ""
relative_path = ""
if idx is not None:
img = images[idx]
try:
relative_path = img.relative_to(batch_dir).as_posix()
except ValueError:
relative_path = img.name
filename = img.name
completed = bool(completed_at) or task_id in completed_ids
items.append({
"task_id": task_id,
"filename": filename,
"relative_path": relative_path,
"completed": completed,
"frame_index": idx if idx is not None else -1,
})
items.sort(key=lambda x: (x["frame_index"] if x["frame_index"] >= 0 else 9999, x["filename"]))
assigned = len(items)
completed_count = sum(1 for i in items if i["completed"])
return {
"campaign_id": campaign_id,
"items": items,
"assigned": assigned,
"completed": completed_count,
"pending": max(0, assigned - completed_count),
}