1583 lines
60 KiB
Python
Executable File
1583 lines
60 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""Refresh a Feishu issue view and optionally process only incremental issue data."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import re
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
import tempfile
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Any, Iterable
|
||
|
||
|
||
ROOT = Path(__file__).resolve().parents[2]
|
||
DEFAULT_PYTHON_BIN = Path("/deeplearning_team/ydong/dongying/miniconda/envs/dev/bin/python")
|
||
DEFAULT_EXPORT_SCRIPT = ROOT / "tools" / "feishu_project" / "export_feishu_view_issues.py"
|
||
DEFAULT_DOWNLOAD_SCRIPT = ROOT / "tools" / "feishu_project" / "download_issue_data.py"
|
||
DEFAULT_INFERENCE_SCRIPT = ROOT / "tools" / "feishu_project" / "run_issue_data_inference.py"
|
||
DEFAULT_CLIP_FALLBACK_INFERENCE_SCRIPT = ROOT / "tools" / "model_inference" / "core" / "run_two_roi_exported_onnx_infer.py"
|
||
DEFAULT_ISSUE_TRACKING_SCRIPT = ROOT / "tools" / "feishu_project" / "run_issue_data_tracking.sh"
|
||
DATA_FIELDS = ("问题数据地址", "问题数据地址_PDCL")
|
||
PLACEHOLDER_TEXTS = {"", "待填", "待补充", "none", "null", "待提供"}
|
||
PDCL_REF_RE = re.compile(r"ADAS_[^:/\\\s]+::[^/\\\s]*")
|
||
STANDARD_PATH_SPLIT_RE = re.compile(r"[,,\n;;]+")
|
||
STATUS_LABELS = {
|
||
"OPEN": "待处理",
|
||
"Fi7of4O9X": "分析中(未解决)",
|
||
"IN PROGRESS": "处理中(已解决)",
|
||
}
|
||
|
||
|
||
def parse_args() -> argparse.Namespace:
|
||
parser = argparse.ArgumentParser(description=__doc__)
|
||
parser.add_argument("--project-key", required=True)
|
||
parser.add_argument("--user-key", required=True)
|
||
parser.add_argument("--view-name", required=True)
|
||
parser.add_argument("--work-item-type", default="issue")
|
||
parser.add_argument("--output-json", required=True, help="Path to the latest exported issue JSON.")
|
||
parser.add_argument(
|
||
"--sync-manifest-path",
|
||
default="",
|
||
help="Optional manifest path. Defaults to <output-json>.sync_manifest.json",
|
||
)
|
||
parser.add_argument(
|
||
"--snapshot-dir",
|
||
default="",
|
||
help="Optional directory for timestamped exported JSON snapshots.",
|
||
)
|
||
parser.add_argument(
|
||
"--python-bin",
|
||
default=str(DEFAULT_PYTHON_BIN),
|
||
help="Python interpreter used to launch repo scripts.",
|
||
)
|
||
parser.add_argument(
|
||
"--export-script",
|
||
default=str(DEFAULT_EXPORT_SCRIPT),
|
||
help="Path to export_feishu_view_issues.py.",
|
||
)
|
||
parser.add_argument(
|
||
"--download-script",
|
||
default=str(DEFAULT_DOWNLOAD_SCRIPT),
|
||
help="Path to download_issue_data.py.",
|
||
)
|
||
parser.add_argument(
|
||
"--inference-script",
|
||
default=str(DEFAULT_INFERENCE_SCRIPT),
|
||
help="Path to run_issue_data_inference.py.",
|
||
)
|
||
parser.add_argument(
|
||
"--download-root",
|
||
default="",
|
||
help="Optional download root. Required when --run-download is used.",
|
||
)
|
||
parser.add_argument(
|
||
"--download-manifest-path",
|
||
default="",
|
||
help="Optional explicit download manifest path.",
|
||
)
|
||
parser.add_argument(
|
||
"--inference-root",
|
||
default="",
|
||
help="Optional inference root. Required when --run-inference is used.",
|
||
)
|
||
parser.add_argument(
|
||
"--inference-manifest-path",
|
||
default="",
|
||
help="Optional explicit inference manifest path.",
|
||
)
|
||
parser.add_argument(
|
||
"--exported-model",
|
||
default="",
|
||
help="Optional exported model path forwarded to run_issue_data_inference.py.",
|
||
)
|
||
parser.add_argument(
|
||
"--use-issue-frame-window",
|
||
action="store_true",
|
||
help="Forward issue-frame window inference options to run_issue_data_inference.py.",
|
||
)
|
||
parser.add_argument("--frame-before", type=int, default=100)
|
||
parser.add_argument("--frame-after", type=int, default=100)
|
||
parser.add_argument(
|
||
"--missing-issue-frame-policy",
|
||
choices=("full", "skip"),
|
||
default="full",
|
||
help="How to handle cases without usable 问题发生frameid when issue-frame window inference is enabled.",
|
||
)
|
||
parser.add_argument(
|
||
"--issue-tracking-script",
|
||
default=str(DEFAULT_ISSUE_TRACKING_SCRIPT),
|
||
help="Path to run_issue_data_tracking.sh.",
|
||
)
|
||
parser.add_argument(
|
||
"--tracking-model-version",
|
||
default="",
|
||
help="Optional model version override forwarded to run_issue_data_tracking.sh.",
|
||
)
|
||
parser.add_argument(
|
||
"--run-download",
|
||
action="store_true",
|
||
help="Download issue data for newly actionable issues.",
|
||
)
|
||
parser.add_argument(
|
||
"--run-inference",
|
||
action="store_true",
|
||
help="Run inference for downloaded cases belonging to actionable issues.",
|
||
)
|
||
parser.add_argument(
|
||
"--run-tracking",
|
||
action="store_true",
|
||
help="Run issue-data tracking after inference.",
|
||
)
|
||
parser.add_argument(
|
||
"--refresh-changed-issues",
|
||
action="store_true",
|
||
help=(
|
||
"Allow destructive refresh for issues whose data-address fields changed after already "
|
||
"having data. This removes issue-specific download/inference outputs before reprocessing."
|
||
),
|
||
)
|
||
parser.add_argument(
|
||
"--skip-existing-inference",
|
||
action="store_true",
|
||
help="Forward --skip-existing to run_issue_data_inference.py.",
|
||
)
|
||
parser.add_argument(
|
||
"--save-snapshot",
|
||
action="store_true",
|
||
help="Write a timestamped exported JSON snapshot under --snapshot-dir.",
|
||
)
|
||
parser.add_argument(
|
||
"--dry-run",
|
||
action="store_true",
|
||
help="Refresh the live view and plan actions without mutating local outputs.",
|
||
)
|
||
parser.add_argument(
|
||
"--issue-id",
|
||
action="append",
|
||
type=int,
|
||
default=[],
|
||
help="Only process the specified issue id. Can be repeated.",
|
||
)
|
||
parser.add_argument(
|
||
"--issue-id-min",
|
||
type=int,
|
||
default=None,
|
||
help="Only process issue ids greater than or equal to this value.",
|
||
)
|
||
parser.add_argument(
|
||
"--issue-id-max",
|
||
type=int,
|
||
default=None,
|
||
help="Only process issue ids less than or equal to this value.",
|
||
)
|
||
parser.add_argument(
|
||
"--issue-name-keyword",
|
||
action="append",
|
||
default=[],
|
||
help=(
|
||
"Optional issue-name keyword filter. Matches issues whose name contains any provided "
|
||
"keyword. Can be repeated."
|
||
),
|
||
)
|
||
parser.add_argument(
|
||
"--download-arg",
|
||
action="append",
|
||
default=[],
|
||
help="Extra argument forwarded to download_issue_data.py. Can be repeated.",
|
||
)
|
||
parser.add_argument(
|
||
"--inference-arg",
|
||
action="append",
|
||
default=[],
|
||
help="Extra argument forwarded to run_issue_data_inference.py. Can be repeated.",
|
||
)
|
||
return parser.parse_args()
|
||
|
||
|
||
def canonicalize(value: Any) -> str:
|
||
return json.dumps(value, ensure_ascii=False, sort_keys=True)
|
||
|
||
|
||
def load_json(path: Path) -> dict:
|
||
return json.loads(path.read_text(encoding="utf-8"))
|
||
|
||
|
||
def save_json(path: Path, payload: dict) -> None:
|
||
path.parent.mkdir(parents=True, exist_ok=True)
|
||
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||
|
||
|
||
def log_progress(message: str) -> None:
|
||
timestamp = datetime.now().astimezone().strftime("%Y-%m-%d %H:%M:%S")
|
||
print(f"[sync_issue_data {timestamp}] {message}", flush=True)
|
||
|
||
|
||
def compact_text(value: Any, max_len: int = 80) -> str:
|
||
text = "" if value is None else str(value).strip()
|
||
text = re.sub(r"\s+", " ", text)
|
||
if len(text) <= max_len:
|
||
return text
|
||
return f"{text[: max_len - 3]}..."
|
||
|
||
|
||
def normalize_issue_name_keyword(value: Any) -> str:
|
||
text = compact_text(value, max_len=10_000)
|
||
return text.casefold()
|
||
|
||
|
||
def sanitize_issue_name_keywords(keywords: Iterable[object]) -> list[str]:
|
||
cleaned: list[str] = []
|
||
seen: set[str] = set()
|
||
for keyword in keywords:
|
||
display_text = compact_text(keyword, max_len=10_000)
|
||
normalized = display_text.casefold()
|
||
if not normalized or normalized in seen:
|
||
continue
|
||
seen.add(normalized)
|
||
cleaned.append(display_text)
|
||
return cleaned
|
||
|
||
|
||
def build_issue_lookup(payload: dict) -> dict[int, dict]:
|
||
return {int(item["id"]): item for item in payload.get("items", [])}
|
||
|
||
|
||
def issue_matches_name_keywords(item: dict, keywords: list[str]) -> bool:
|
||
if not keywords:
|
||
return True
|
||
issue_name = normalize_issue_name_keyword(item.get("name"))
|
||
return any(normalize_issue_name_keyword(keyword) in issue_name for keyword in keywords)
|
||
|
||
|
||
def collect_issue_ids_by_name_keywords(payload: dict, keywords: list[str]) -> list[int]:
|
||
if not keywords:
|
||
return []
|
||
return sorted(
|
||
int(item["id"])
|
||
for item in payload.get("items", [])
|
||
if issue_matches_name_keywords(item, keywords)
|
||
)
|
||
|
||
|
||
def issue_matches_id_range(issue_id: int, issue_id_min: int | None, issue_id_max: int | None) -> bool:
|
||
if issue_id_min is not None and issue_id < issue_id_min:
|
||
return False
|
||
if issue_id_max is not None and issue_id > issue_id_max:
|
||
return False
|
||
return True
|
||
|
||
|
||
def collect_issue_ids_by_id_range(
|
||
payload: dict,
|
||
issue_id_min: int | None,
|
||
issue_id_max: int | None,
|
||
) -> list[int]:
|
||
if issue_id_min is None and issue_id_max is None:
|
||
return []
|
||
return sorted(
|
||
int(item["id"])
|
||
for item in payload.get("items", [])
|
||
if issue_matches_id_range(int(item["id"]), issue_id_min, issue_id_max)
|
||
)
|
||
|
||
|
||
def format_issue_label(item: dict) -> str:
|
||
issue_id = int(item["id"])
|
||
name = compact_text(item.get("name"), max_len=48)
|
||
label = f"issue_{issue_id}"
|
||
if item.get("status") is not None:
|
||
label += f"({status_label(item.get('status'))})"
|
||
if name:
|
||
label += f": {name}"
|
||
return label
|
||
|
||
|
||
def describe_issue_ids(payload: dict, issue_ids: Iterable[int], limit: int = 5) -> str:
|
||
unique_issue_ids = sorted(set(issue_ids))
|
||
if not unique_issue_ids:
|
||
return "0 issues"
|
||
|
||
issue_lookup = build_issue_lookup(payload)
|
||
labels: list[str] = []
|
||
for issue_id in unique_issue_ids[:limit]:
|
||
item = issue_lookup.get(issue_id)
|
||
labels.append(format_issue_label(item) if item is not None else f"issue_{issue_id}")
|
||
|
||
remainder = len(unique_issue_ids) - len(labels)
|
||
suffix = f"; ... (+{remainder} more)" if remainder > 0 else ""
|
||
return f"{len(unique_issue_ids)} issues -> " + "; ".join(labels) + suffix
|
||
|
||
|
||
def summarize_status_counts(summary: dict[str, Any]) -> str:
|
||
if not summary:
|
||
return "no summary"
|
||
return ", ".join(f"{status}={summary[status]}" for status in sorted(summary))
|
||
|
||
|
||
def run_command(
|
||
command: list[str],
|
||
cwd: Path | None = None,
|
||
env: dict[str, str] | None = None,
|
||
stream_output: bool = False,
|
||
) -> subprocess.CompletedProcess[str]:
|
||
if stream_output:
|
||
effective_env = os.environ.copy()
|
||
if env is not None:
|
||
effective_env.update(env)
|
||
effective_env.setdefault("PYTHONUNBUFFERED", "1")
|
||
|
||
process = subprocess.Popen(
|
||
command,
|
||
cwd=None if cwd is None else str(cwd),
|
||
stdout=subprocess.PIPE,
|
||
stderr=subprocess.STDOUT,
|
||
text=True,
|
||
encoding="utf-8",
|
||
env=effective_env,
|
||
bufsize=1,
|
||
)
|
||
stdout_chunks: list[str] = []
|
||
assert process.stdout is not None
|
||
for line in process.stdout:
|
||
print(line, end="", flush=True)
|
||
stdout_chunks.append(line)
|
||
process.stdout.close()
|
||
completed = subprocess.CompletedProcess(
|
||
command,
|
||
process.wait(),
|
||
stdout="".join(stdout_chunks),
|
||
stderr="",
|
||
)
|
||
if completed.returncode != 0:
|
||
detail = completed.stdout.strip() or "unknown error"
|
||
raise RuntimeError(f"command failed ({completed.returncode}): {' '.join(command)}\n{detail}")
|
||
return completed
|
||
|
||
completed = subprocess.run(
|
||
command,
|
||
cwd=None if cwd is None else str(cwd),
|
||
check=False,
|
||
capture_output=True,
|
||
text=True,
|
||
encoding="utf-8",
|
||
env=env,
|
||
)
|
||
if completed.returncode != 0:
|
||
detail = completed.stderr.strip() or completed.stdout.strip() or "unknown error"
|
||
raise RuntimeError(f"command failed ({completed.returncode}): {' '.join(command)}\n{detail}")
|
||
return completed
|
||
|
||
|
||
def export_latest_view(args: argparse.Namespace) -> tuple[dict, Path]:
|
||
output_path = Path(args.output_json).resolve()
|
||
python_bin = str(Path(args.python_bin).resolve())
|
||
export_script = str(Path(args.export_script).resolve())
|
||
log_progress(f"[stage=export] refreshing Feishu view '{args.view_name}'")
|
||
with tempfile.NamedTemporaryFile(
|
||
prefix="sync_issue_data_",
|
||
suffix=".json",
|
||
delete=False,
|
||
) as tmp_file:
|
||
tmp_output = Path(tmp_file.name)
|
||
|
||
command = [
|
||
python_bin,
|
||
export_script,
|
||
"--project-key",
|
||
args.project_key,
|
||
"--user-key",
|
||
args.user_key,
|
||
"--view-name",
|
||
args.view_name,
|
||
"--work-item-type",
|
||
args.work_item_type,
|
||
"--output",
|
||
str(tmp_output),
|
||
]
|
||
run_command(command, cwd=ROOT)
|
||
payload = load_json(tmp_output)
|
||
log_progress(
|
||
f"[stage=export] fetched {len(payload.get('items', []))} issues from '{args.view_name}'"
|
||
)
|
||
if not args.dry_run:
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
shutil.copy2(tmp_output, output_path)
|
||
if args.save_snapshot and args.snapshot_dir:
|
||
snapshot_dir = Path(args.snapshot_dir).resolve()
|
||
snapshot_name = build_snapshot_name(payload)
|
||
snapshot_path = snapshot_dir / snapshot_name
|
||
snapshot_dir.mkdir(parents=True, exist_ok=True)
|
||
shutil.copy2(tmp_output, snapshot_path)
|
||
return payload, tmp_output
|
||
|
||
|
||
def build_snapshot_name(payload: dict) -> str:
|
||
exported_at = str(payload.get("exported_at") or "")
|
||
compact = exported_at.replace(":", "").replace("-", "").replace("+", "_").replace("T", "_")
|
||
compact = compact.replace("/", "_")
|
||
if not compact:
|
||
compact = datetime.now().astimezone().strftime("%Y%m%d_%H%M%S")
|
||
return f"{compact}.json"
|
||
|
||
|
||
def meaningful_text(value: Any) -> bool:
|
||
if value is None:
|
||
return False
|
||
text = str(value).strip()
|
||
if not text:
|
||
return False
|
||
return text.lower() not in PLACEHOLDER_TEXTS
|
||
|
||
|
||
def has_downloadable_data(item: dict) -> bool:
|
||
return any(meaningful_text(item.get(field_name)) for field_name in DATA_FIELDS)
|
||
|
||
|
||
def iter_issue_fields(item: dict) -> Iterable[tuple[str, object]]:
|
||
for field_name in DATA_FIELDS:
|
||
yield field_name, item.get(field_name)
|
||
|
||
|
||
def extract_pdcl_refs(raw_value: object) -> list[str]:
|
||
if raw_value is None:
|
||
return []
|
||
text = str(raw_value).strip()
|
||
if not text:
|
||
return []
|
||
return list(dict.fromkeys(PDCL_REF_RE.findall(text)))
|
||
|
||
|
||
def extract_standard_paths(raw_value: object) -> list[str]:
|
||
if raw_value is None:
|
||
return []
|
||
text = str(raw_value).strip()
|
||
if not text:
|
||
return []
|
||
if text.lower() in PLACEHOLDER_TEXTS:
|
||
return []
|
||
if extract_pdcl_refs(text):
|
||
return []
|
||
if "/" not in text and "\\" not in text:
|
||
return []
|
||
parts = [part.strip() for part in STANDARD_PATH_SPLIT_RE.split(text)]
|
||
return [part for part in parts if part]
|
||
|
||
|
||
def expected_download_refs(item: dict) -> list[dict]:
|
||
refs: list[dict] = []
|
||
seen: set[tuple[str, str]] = set()
|
||
for field_name, raw_value in iter_issue_fields(item):
|
||
for ref in extract_pdcl_refs(raw_value):
|
||
key = ("pdcl_mdi_download", ref)
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
refs.append(
|
||
{
|
||
"source_field": field_name,
|
||
"source_kind": "pdcl_mdi_download",
|
||
"normalized_ref": ref,
|
||
}
|
||
)
|
||
for path in extract_standard_paths(raw_value):
|
||
normalized_path = path.strip()
|
||
key = ("standard_path", normalized_path)
|
||
if key in seen:
|
||
continue
|
||
seen.add(key)
|
||
refs.append(
|
||
{
|
||
"source_field": field_name,
|
||
"source_kind": "standard_path",
|
||
"normalized_ref": normalized_path,
|
||
}
|
||
)
|
||
return refs
|
||
|
||
|
||
def data_signature(item: dict) -> str:
|
||
return canonicalize({field_name: item.get(field_name) for field_name in DATA_FIELDS})
|
||
|
||
|
||
def top_level_changed_fields(previous: dict, current: dict) -> list[str]:
|
||
changed = []
|
||
for key in sorted(set(previous) | set(current)):
|
||
if canonicalize(previous.get(key)) != canonicalize(current.get(key)):
|
||
changed.append(key)
|
||
return changed
|
||
|
||
|
||
def status_label(status: Any) -> str:
|
||
text = "" if status is None else str(status)
|
||
return STATUS_LABELS.get(text, text)
|
||
|
||
|
||
def summarize_issue(item: dict) -> dict:
|
||
return {
|
||
"id": int(item["id"]),
|
||
"name": item.get("name"),
|
||
"status": item.get("status"),
|
||
"status_label": status_label(item.get("status")),
|
||
"created_at": item.get("created_at"),
|
||
"updated_at": item.get("updated_at"),
|
||
"has_downloadable_data": has_downloadable_data(item),
|
||
}
|
||
|
||
|
||
def summarize_removed_issue(issue_id: int, item: dict) -> dict:
|
||
summary = summarize_issue(item)
|
||
summary["id"] = issue_id
|
||
return summary
|
||
|
||
|
||
def format_issue_set(items_by_id: dict[int, dict], issue_ids: Iterable[int]) -> list[dict]:
|
||
return [summarize_issue(items_by_id[issue_id]) for issue_id in sorted(set(issue_ids))]
|
||
|
||
|
||
def build_diff(previous_payload: dict | None, current_payload: dict, refresh_changed_issues: bool) -> dict:
|
||
previous_items = previous_payload.get("items", []) if previous_payload else []
|
||
current_items = current_payload.get("items", [])
|
||
previous_by_id = {int(item["id"]): item for item in previous_items}
|
||
current_by_id = {int(item["id"]): item for item in current_items}
|
||
|
||
new_issue_ids: list[int] = []
|
||
removed_issue_ids: list[int] = []
|
||
status_changed: list[dict] = []
|
||
data_added: list[dict] = []
|
||
data_changed_requires_refresh: list[dict] = []
|
||
data_removed: list[dict] = []
|
||
metadata_changed: list[dict] = []
|
||
updated_only: list[dict] = []
|
||
|
||
download_issue_ids: set[int] = set()
|
||
refresh_issue_ids: set[int] = set()
|
||
manual_review_issue_ids: set[int] = set()
|
||
|
||
for issue_id, current_item in sorted(current_by_id.items()):
|
||
previous_item = previous_by_id.get(issue_id)
|
||
if previous_item is None:
|
||
new_issue_ids.append(issue_id)
|
||
if has_downloadable_data(current_item):
|
||
download_issue_ids.add(issue_id)
|
||
continue
|
||
|
||
changed_fields = top_level_changed_fields(previous_item, current_item)
|
||
if not changed_fields:
|
||
continue
|
||
|
||
previous_has_data = has_downloadable_data(previous_item)
|
||
current_has_data = has_downloadable_data(current_item)
|
||
previous_data_signature = data_signature(previous_item)
|
||
current_data_signature = data_signature(current_item)
|
||
status_changed_flag = previous_item.get("status") != current_item.get("status")
|
||
data_changed_flag = previous_data_signature != current_data_signature
|
||
|
||
if status_changed_flag:
|
||
status_changed.append(
|
||
{
|
||
"id": issue_id,
|
||
"name": current_item.get("name"),
|
||
"previous_status": previous_item.get("status"),
|
||
"previous_status_label": status_label(previous_item.get("status")),
|
||
"current_status": current_item.get("status"),
|
||
"current_status_label": status_label(current_item.get("status")),
|
||
"previous_updated_at": previous_item.get("updated_at"),
|
||
"current_updated_at": current_item.get("updated_at"),
|
||
}
|
||
)
|
||
|
||
if data_changed_flag and not previous_has_data and current_has_data:
|
||
data_added.append(
|
||
{
|
||
"id": issue_id,
|
||
"name": current_item.get("name"),
|
||
"previous_data": {field_name: previous_item.get(field_name) for field_name in DATA_FIELDS},
|
||
"current_data": {field_name: current_item.get(field_name) for field_name in DATA_FIELDS},
|
||
}
|
||
)
|
||
download_issue_ids.add(issue_id)
|
||
elif data_changed_flag and previous_has_data and current_has_data:
|
||
entry = {
|
||
"id": issue_id,
|
||
"name": current_item.get("name"),
|
||
"previous_data": {field_name: previous_item.get(field_name) for field_name in DATA_FIELDS},
|
||
"current_data": {field_name: current_item.get(field_name) for field_name in DATA_FIELDS},
|
||
}
|
||
data_changed_requires_refresh.append(entry)
|
||
if refresh_changed_issues:
|
||
refresh_issue_ids.add(issue_id)
|
||
download_issue_ids.add(issue_id)
|
||
else:
|
||
manual_review_issue_ids.add(issue_id)
|
||
elif data_changed_flag and previous_has_data and not current_has_data:
|
||
data_removed.append(
|
||
{
|
||
"id": issue_id,
|
||
"name": current_item.get("name"),
|
||
"previous_data": {field_name: previous_item.get(field_name) for field_name in DATA_FIELDS},
|
||
"current_data": {field_name: current_item.get(field_name) for field_name in DATA_FIELDS},
|
||
}
|
||
)
|
||
manual_review_issue_ids.add(issue_id)
|
||
|
||
remaining_fields = [
|
||
field_name
|
||
for field_name in changed_fields
|
||
if field_name not in {"updated_at", "status", *DATA_FIELDS}
|
||
]
|
||
if remaining_fields:
|
||
metadata_changed.append(
|
||
{
|
||
"id": issue_id,
|
||
"name": current_item.get("name"),
|
||
"changed_fields": remaining_fields,
|
||
"previous_updated_at": previous_item.get("updated_at"),
|
||
"current_updated_at": current_item.get("updated_at"),
|
||
}
|
||
)
|
||
elif changed_fields == ["updated_at"]:
|
||
updated_only.append(
|
||
{
|
||
"id": issue_id,
|
||
"name": current_item.get("name"),
|
||
"previous_updated_at": previous_item.get("updated_at"),
|
||
"current_updated_at": current_item.get("updated_at"),
|
||
}
|
||
)
|
||
|
||
for issue_id, previous_item in sorted(previous_by_id.items()):
|
||
if issue_id not in current_by_id:
|
||
removed_issue_ids.append(issue_id)
|
||
|
||
return {
|
||
"previous_total": len(previous_by_id),
|
||
"current_total": len(current_by_id),
|
||
"new_issues": format_issue_set(current_by_id, new_issue_ids),
|
||
"removed_issues": [summarize_removed_issue(issue_id, previous_by_id[issue_id]) for issue_id in removed_issue_ids],
|
||
"status_changed_issues": status_changed,
|
||
"data_added_issues": data_added,
|
||
"data_changed_requires_refresh": data_changed_requires_refresh,
|
||
"data_removed_issues": data_removed,
|
||
"metadata_changed_issues": metadata_changed,
|
||
"updated_only_issues": updated_only,
|
||
"download_issue_ids": sorted(download_issue_ids),
|
||
"refresh_issue_ids": sorted(refresh_issue_ids),
|
||
"manual_review_issue_ids": sorted(manual_review_issue_ids),
|
||
}
|
||
|
||
|
||
def remove_path(path: Path, dry_run: bool) -> dict:
|
||
if not path.exists():
|
||
return {"path": str(path), "status": "missing"}
|
||
if dry_run:
|
||
return {"path": str(path), "status": "planned_remove"}
|
||
if path.is_dir():
|
||
shutil.rmtree(path)
|
||
else:
|
||
path.unlink()
|
||
return {"path": str(path), "status": "removed"}
|
||
|
||
|
||
def issue_root(path_root: Path, issue_id: int) -> Path:
|
||
return path_root / f"issue_{issue_id}"
|
||
|
||
|
||
def count_local_video_cases(download_root: Path, issue_id: int) -> int:
|
||
issue_dir = issue_root(download_root, issue_id)
|
||
if not issue_dir.is_dir():
|
||
return 0
|
||
return sum(1 for path in issue_dir.rglob("camera4.bin") if path.parent.name == "sigmastar.1")
|
||
|
||
|
||
def count_local_clip_export_cases(download_root: Path, issue_id: int) -> int:
|
||
issue_dir = issue_root(download_root, issue_id)
|
||
if not issue_dir.is_dir():
|
||
return 0
|
||
return sum(
|
||
1
|
||
for path in issue_dir.iterdir()
|
||
if path.is_dir() and path.name.startswith("clip_export_") and (path / "images").is_dir() and (path / "calib" / "L2_calib" / "camera4.json").is_file()
|
||
)
|
||
|
||
|
||
def count_local_downloaded_cases(download_root: Path, issue_id: int) -> int:
|
||
return count_local_video_cases(download_root, issue_id) + count_local_clip_export_cases(download_root, issue_id)
|
||
|
||
|
||
def build_local_download_check(current_payload: dict, download_root: Path | None) -> dict:
|
||
if download_root is None:
|
||
return {
|
||
"enabled": False,
|
||
"download_root": "",
|
||
"download_root_exists": False,
|
||
"issues_with_downloadable_data": [],
|
||
"missing_issue_ids": [],
|
||
"incomplete_issue_ids": [],
|
||
"unparsed_issue_ids": [],
|
||
"actionable_issue_ids": [],
|
||
}
|
||
|
||
current_items = current_payload.get("items", [])
|
||
download_root_exists = download_root.exists()
|
||
issues_with_downloadable_data: list[dict] = []
|
||
missing_issue_ids: list[int] = []
|
||
incomplete_issue_ids: list[int] = []
|
||
unparsed_issue_ids: list[int] = []
|
||
|
||
for item in sorted(current_items, key=lambda issue: int(issue["id"])):
|
||
if not has_downloadable_data(item):
|
||
continue
|
||
|
||
issue_id = int(item["id"])
|
||
expected_refs = expected_download_refs(item)
|
||
expected_case_count = len(expected_refs)
|
||
local_case_count = count_local_downloaded_cases(download_root, issue_id)
|
||
issue_summary = summarize_issue(item)
|
||
issue_summary.update(
|
||
{
|
||
"expected_case_count": expected_case_count,
|
||
"local_case_count": local_case_count,
|
||
"expected_refs": expected_refs,
|
||
}
|
||
)
|
||
issues_with_downloadable_data.append(issue_summary)
|
||
|
||
if expected_case_count == 0:
|
||
unparsed_issue_ids.append(issue_id)
|
||
continue
|
||
|
||
if local_case_count == 0:
|
||
missing_issue_ids.append(issue_id)
|
||
continue
|
||
|
||
if local_case_count < expected_case_count:
|
||
incomplete_issue_ids.append(issue_id)
|
||
|
||
actionable_issue_ids = sorted(set(missing_issue_ids) | set(incomplete_issue_ids))
|
||
return {
|
||
"enabled": True,
|
||
"download_root": str(download_root),
|
||
"download_root_exists": download_root_exists,
|
||
"issues_with_downloadable_data": issues_with_downloadable_data,
|
||
"missing_issue_ids": missing_issue_ids,
|
||
"incomplete_issue_ids": incomplete_issue_ids,
|
||
"unparsed_issue_ids": unparsed_issue_ids,
|
||
"actionable_issue_ids": actionable_issue_ids,
|
||
}
|
||
|
||
|
||
def prepare_refresh_outputs(
|
||
download_root: Path,
|
||
inference_root: Path | None,
|
||
refresh_issue_ids: Iterable[int],
|
||
dry_run: bool,
|
||
) -> dict:
|
||
download_cleanup = []
|
||
inference_cleanup = []
|
||
for issue_id in sorted(set(refresh_issue_ids)):
|
||
download_cleanup.append(remove_path(issue_root(download_root, issue_id), dry_run=dry_run))
|
||
if inference_root is not None:
|
||
inference_cleanup.append(remove_path(issue_root(inference_root, issue_id), dry_run=dry_run))
|
||
return {
|
||
"download_cleanup": download_cleanup,
|
||
"inference_cleanup": inference_cleanup,
|
||
}
|
||
|
||
|
||
def run_download(
|
||
args: argparse.Namespace,
|
||
issue_ids: list[int],
|
||
output_json: Path,
|
||
) -> dict:
|
||
if not args.download_root:
|
||
raise ValueError("--download-root is required when --run-download is used")
|
||
if not issue_ids:
|
||
return {"ran": False, "issue_ids": [], "manifest_path": None, "summary": {}}
|
||
|
||
manifest_path = (
|
||
Path(args.download_manifest_path).resolve()
|
||
if args.download_manifest_path
|
||
else Path(args.download_root).resolve() / "download_manifest.json"
|
||
)
|
||
command = [
|
||
str(Path(args.python_bin).resolve()),
|
||
str(Path(args.download_script).resolve()),
|
||
"--input-json",
|
||
str(output_json),
|
||
"--output-root",
|
||
str(Path(args.download_root).resolve()),
|
||
"--manifest-path",
|
||
str(manifest_path),
|
||
]
|
||
if args.dry_run:
|
||
command.append("--dry-run")
|
||
for issue_id in issue_ids:
|
||
command.extend(["--issue-id", str(issue_id)])
|
||
command.extend(args.download_arg)
|
||
|
||
completed = run_command(command, cwd=ROOT, stream_output=True)
|
||
manifest = {}
|
||
if not args.dry_run and manifest_path.is_file():
|
||
manifest = load_json(manifest_path)
|
||
return {
|
||
"ran": True,
|
||
"issue_ids": issue_ids,
|
||
"manifest_path": str(manifest_path),
|
||
"summary": manifest.get("summary", {}),
|
||
"stdout": completed.stdout.strip(),
|
||
}
|
||
|
||
|
||
def issue_has_downloaded_cases(download_root: Path, issue_id: int) -> bool:
|
||
return count_local_downloaded_cases(download_root, issue_id) > 0
|
||
|
||
|
||
def issue_has_video_cases(download_root: Path, issue_id: int) -> bool:
|
||
return count_local_video_cases(download_root, issue_id) > 0
|
||
|
||
|
||
def resolve_pdcl_auth_env() -> dict[str, str]:
|
||
resolved = {
|
||
key: value
|
||
for key in ("STS_UID", "STS_SECRET_KEY")
|
||
if (value := os.environ.get(key))
|
||
}
|
||
if len(resolved) == 2:
|
||
return resolved
|
||
|
||
auth_source = ROOT / "tools" / "pdcl_inference" / "get_clips_of_aeb.py"
|
||
text = auth_source.read_text(encoding="utf-8")
|
||
patterns = {
|
||
"STS_UID": re.compile(r'STS_UID"?,\s*"([^"]+)"|STS_UID\']\s*=\s*\'([^\']+)\''),
|
||
"STS_SECRET_KEY": re.compile(r'STS_SECRET_KEY"?,\s*"([^"]+)"|STS_SECRET_KEY\']\s*=\s*\'([^\']+)\''),
|
||
}
|
||
for key, pattern in patterns.items():
|
||
if key in resolved:
|
||
continue
|
||
match = pattern.search(text)
|
||
if match is None:
|
||
raise ValueError(f"Failed to resolve {key} from {auth_source}")
|
||
resolved[key] = next(group for group in match.groups() if group)
|
||
return resolved
|
||
|
||
|
||
def extract_pdcl_raw_ids(item: dict) -> list[str]:
|
||
raw_ids: list[str] = []
|
||
seen: set[str] = set()
|
||
for _, raw_value in iter_issue_fields(item):
|
||
for ref in extract_pdcl_refs(raw_value):
|
||
raw_id = ref.split("::", 1)[0].strip()
|
||
if not raw_id or raw_id in seen:
|
||
continue
|
||
seen.add(raw_id)
|
||
raw_ids.append(raw_id)
|
||
return raw_ids
|
||
|
||
|
||
def resolve_clip_ids_for_issue(item: dict, pdcl_auth_env: dict[str, str]) -> tuple[list[str], list[str]]:
|
||
if str(ROOT) not in sys.path:
|
||
sys.path.append(str(ROOT))
|
||
|
||
previous_env = {key: os.environ.get(key) for key in pdcl_auth_env}
|
||
os.environ.update(pdcl_auth_env)
|
||
try:
|
||
from tools.pdcl_inference.get_clips_of_aeb import get_clip_ukeys_from_raw
|
||
|
||
raw_ids = extract_pdcl_raw_ids(item)
|
||
clip_ids: list[str] = []
|
||
seen: set[str] = set()
|
||
for raw_id in raw_ids:
|
||
for clip_id in get_clip_ukeys_from_raw(raw_id):
|
||
clip_id = str(clip_id).strip()
|
||
if not clip_id or clip_id in seen:
|
||
continue
|
||
seen.add(clip_id)
|
||
clip_ids.append(clip_id)
|
||
return raw_ids, clip_ids
|
||
finally:
|
||
for key, value in previous_env.items():
|
||
if value is None:
|
||
os.environ.pop(key, None)
|
||
else:
|
||
os.environ[key] = value
|
||
|
||
|
||
def run_clip_fallback_inference(
|
||
args: argparse.Namespace,
|
||
issue_ids: list[int],
|
||
issue_json_path: Path,
|
||
) -> dict:
|
||
if not issue_ids:
|
||
return {
|
||
"ran": False,
|
||
"issue_ids": [],
|
||
"summary": {},
|
||
"detail": "no issues require clip fallback inference",
|
||
"fallback_results": [],
|
||
}
|
||
|
||
payload = load_json(issue_json_path)
|
||
issue_lookup = build_issue_lookup(payload)
|
||
pdcl_auth_env = resolve_pdcl_auth_env()
|
||
clip_list_dir = Path(args.inference_root).resolve() / "_clip_fallback"
|
||
if not args.dry_run:
|
||
clip_list_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
summary: dict[str, int] = {}
|
||
fallback_results: list[dict[str, Any]] = []
|
||
successful_issue_ids: list[int] = []
|
||
|
||
for issue_id in issue_ids:
|
||
item = issue_lookup.get(issue_id)
|
||
if item is None:
|
||
summary["skipped_missing_issue_record"] = summary.get("skipped_missing_issue_record", 0) + 1
|
||
fallback_results.append({
|
||
"issue_id": issue_id,
|
||
"status": "skipped_missing_issue_record",
|
||
"detail": "issue id not found in exported issue json",
|
||
})
|
||
continue
|
||
|
||
raw_ids = extract_pdcl_raw_ids(item)
|
||
if not raw_ids:
|
||
summary["skipped_no_pdcl_raw"] = summary.get("skipped_no_pdcl_raw", 0) + 1
|
||
fallback_results.append({
|
||
"issue_id": issue_id,
|
||
"status": "skipped_no_pdcl_raw",
|
||
"detail": "no PDCL raw ids found in issue data fields",
|
||
})
|
||
continue
|
||
|
||
if args.dry_run:
|
||
summary["planned_clip_fallback"] = summary.get("planned_clip_fallback", 0) + 1
|
||
fallback_results.append({
|
||
"issue_id": issue_id,
|
||
"status": "planned_clip_fallback",
|
||
"detail": f"would resolve clips from raw ids: {raw_ids}",
|
||
"raw_ids": raw_ids,
|
||
})
|
||
successful_issue_ids.append(issue_id)
|
||
continue
|
||
|
||
try:
|
||
raw_ids, clip_ids = resolve_clip_ids_for_issue(item, pdcl_auth_env)
|
||
if not clip_ids:
|
||
summary["failed_no_clip_ids"] = summary.get("failed_no_clip_ids", 0) + 1
|
||
fallback_results.append({
|
||
"issue_id": issue_id,
|
||
"status": "failed_no_clip_ids",
|
||
"detail": f"resolved raw ids but found no clip ids: {raw_ids}",
|
||
"raw_ids": raw_ids,
|
||
})
|
||
continue
|
||
|
||
clip_list_path = clip_list_dir / f"issue_{issue_id}.clip_fallback.txt"
|
||
clip_list_path.write_text("\n".join(clip_ids) + "\n", encoding="utf-8")
|
||
|
||
command = [
|
||
str(Path(args.python_bin).resolve()),
|
||
str(DEFAULT_CLIP_FALLBACK_INFERENCE_SCRIPT.resolve()),
|
||
"--clip-list-file",
|
||
str(clip_list_path),
|
||
"--export-root",
|
||
str(issue_root(Path(args.download_root).resolve(), issue_id)),
|
||
"--output-dir",
|
||
str(issue_root(Path(args.inference_root).resolve(), issue_id)),
|
||
"--output-prefix",
|
||
"clip_export",
|
||
"--camera-topic",
|
||
"camera4",
|
||
"--save-aggregate-predictions",
|
||
]
|
||
if args.exported_model:
|
||
command.extend(["--exported-model", args.exported_model])
|
||
if args.skip_existing_inference:
|
||
command.append("--skip-done")
|
||
for extra_arg in args.inference_arg:
|
||
command.append(extra_arg)
|
||
|
||
env = os.environ.copy()
|
||
env.update(pdcl_auth_env)
|
||
completed = run_command(command, cwd=ROOT, env=env, stream_output=True)
|
||
summary["clip_fallback_success"] = summary.get("clip_fallback_success", 0) + 1
|
||
fallback_results.append({
|
||
"issue_id": issue_id,
|
||
"status": "clip_fallback_success",
|
||
"detail": f"resolved {len(clip_ids)} clips via {len(raw_ids)} raw ids",
|
||
"raw_ids": raw_ids,
|
||
"clip_ids": clip_ids,
|
||
"clip_list_file": str(clip_list_path),
|
||
"stdout": completed.stdout.strip(),
|
||
})
|
||
successful_issue_ids.append(issue_id)
|
||
except Exception as exc:
|
||
summary["clip_fallback_failed"] = summary.get("clip_fallback_failed", 0) + 1
|
||
fallback_results.append({
|
||
"issue_id": issue_id,
|
||
"status": "clip_fallback_failed",
|
||
"detail": f"{type(exc).__name__}: {exc}",
|
||
"raw_ids": raw_ids,
|
||
})
|
||
|
||
return {
|
||
"ran": bool(fallback_results),
|
||
"issue_ids": successful_issue_ids,
|
||
"summary": summary,
|
||
"detail": "" if successful_issue_ids else "clip fallback produced no runnable issues",
|
||
"fallback_results": fallback_results,
|
||
}
|
||
|
||
|
||
def resolve_tracking_model_version(tracking_model_version: str, exported_model: str) -> str:
|
||
if tracking_model_version:
|
||
return tracking_model_version
|
||
if exported_model:
|
||
match = re.search(r"([0-9]{8})", exported_model)
|
||
if match:
|
||
return match.group(1)
|
||
return ""
|
||
|
||
|
||
def run_inference(
|
||
args: argparse.Namespace,
|
||
issue_ids: list[int],
|
||
issue_json_path: Path,
|
||
) -> dict:
|
||
if not args.inference_root:
|
||
raise ValueError("--inference-root is required when --run-inference is used")
|
||
if not args.download_root:
|
||
raise ValueError("--download-root is required when --run-inference is used")
|
||
if not issue_ids:
|
||
return {
|
||
"ran": False,
|
||
"issue_ids": [],
|
||
"manifest_path": None,
|
||
"summary": {},
|
||
"detail": "no issue ids selected for inference",
|
||
"fallback": {
|
||
"ran": False,
|
||
"issue_ids": [],
|
||
"summary": {},
|
||
"detail": "",
|
||
"fallback_results": [],
|
||
},
|
||
}
|
||
|
||
download_root = Path(args.download_root).resolve()
|
||
runnable_issue_ids = [issue_id for issue_id in issue_ids if issue_has_video_cases(download_root, issue_id)]
|
||
fallback_issue_ids = [issue_id for issue_id in issue_ids if issue_id not in runnable_issue_ids]
|
||
|
||
manifest_path = (
|
||
Path(args.inference_manifest_path).resolve()
|
||
if args.inference_manifest_path
|
||
else Path(args.inference_root).resolve() / "inference_manifest.json"
|
||
)
|
||
summary: dict[str, int] = {}
|
||
successful_issue_ids: list[int] = []
|
||
combined_stdout_parts: list[str] = []
|
||
fallback_result = {
|
||
"ran": False,
|
||
"issue_ids": [],
|
||
"summary": {},
|
||
"detail": "",
|
||
"fallback_results": [],
|
||
}
|
||
|
||
if runnable_issue_ids:
|
||
command = [
|
||
str(Path(args.python_bin).resolve()),
|
||
str(Path(args.inference_script).resolve()),
|
||
"--download-root",
|
||
str(download_root),
|
||
"--output-root",
|
||
str(Path(args.inference_root).resolve()),
|
||
"--manifest-path",
|
||
str(manifest_path),
|
||
]
|
||
if args.use_issue_frame_window:
|
||
command.extend(
|
||
[
|
||
"--issue-json",
|
||
str(issue_json_path),
|
||
"--use-issue-frame-window",
|
||
"--frame-before",
|
||
str(args.frame_before),
|
||
"--frame-after",
|
||
str(args.frame_after),
|
||
"--missing-issue-frame-policy",
|
||
args.missing_issue_frame_policy,
|
||
]
|
||
)
|
||
if args.exported_model:
|
||
command.extend(["--exported-model", args.exported_model])
|
||
if args.skip_existing_inference:
|
||
command.append("--skip-existing")
|
||
if args.dry_run:
|
||
command.append("--dry-run")
|
||
for issue_id in runnable_issue_ids:
|
||
command.extend(["--issue-id", str(issue_id)])
|
||
for extra_arg in args.inference_arg:
|
||
command.append(f"--inference-arg={extra_arg}")
|
||
|
||
completed = run_command(command, cwd=ROOT, stream_output=True)
|
||
manifest = {}
|
||
if not args.dry_run and manifest_path.is_file():
|
||
manifest = load_json(manifest_path)
|
||
summary.update(manifest.get("summary", {}))
|
||
successful_issue_ids.extend(runnable_issue_ids)
|
||
combined_stdout_parts.append(completed.stdout.strip())
|
||
|
||
if fallback_issue_ids:
|
||
log_progress(
|
||
"[stage=inference] clip fallback required: "
|
||
+ describe_issue_ids(load_json(issue_json_path), fallback_issue_ids)
|
||
)
|
||
fallback_result = run_clip_fallback_inference(args, fallback_issue_ids, issue_json_path)
|
||
for key, value in fallback_result.get("summary", {}).items():
|
||
summary[key] = summary.get(key, 0) + value
|
||
successful_issue_ids.extend(fallback_result.get("issue_ids", []))
|
||
|
||
if not runnable_issue_ids and not fallback_result.get("ran"):
|
||
return {
|
||
"ran": False,
|
||
"issue_ids": [],
|
||
"manifest_path": None,
|
||
"summary": {},
|
||
"detail": "no downloaded camera4.bin cases found and no clip fallback issues were runnable",
|
||
"fallback": fallback_result,
|
||
}
|
||
|
||
return {
|
||
"ran": bool(runnable_issue_ids) or bool(fallback_result.get("ran")),
|
||
"issue_ids": sorted(set(successful_issue_ids)),
|
||
"manifest_path": str(manifest_path),
|
||
"summary": summary,
|
||
"stdout": "\n".join(part for part in combined_stdout_parts if part),
|
||
"fallback": fallback_result,
|
||
}
|
||
|
||
|
||
def run_tracking(
|
||
args: argparse.Namespace,
|
||
issue_ids: list[int],
|
||
) -> dict:
|
||
if not args.inference_root:
|
||
raise ValueError("--inference-root is required when --run-tracking is used")
|
||
if args.dry_run:
|
||
return {
|
||
"ran": False,
|
||
"issue_ids": [],
|
||
"script": str(Path(args.issue_tracking_script).resolve()),
|
||
"tracking_model_version": "",
|
||
"detail": "tracking skipped because dry-run is enabled",
|
||
}
|
||
|
||
if not issue_ids:
|
||
return {
|
||
"ran": False,
|
||
"issue_ids": [],
|
||
"script": str(Path(args.issue_tracking_script).resolve()),
|
||
"tracking_model_version": "",
|
||
"detail": "no inferred issue ids available for tracking",
|
||
}
|
||
|
||
tracking_script = Path(args.issue_tracking_script).resolve()
|
||
tracking_model_version = resolve_tracking_model_version(
|
||
args.tracking_model_version,
|
||
args.exported_model,
|
||
)
|
||
command = ["bash", str(tracking_script), str(Path(args.inference_root).resolve())]
|
||
for issue_id in issue_ids:
|
||
command.extend(["--issue-id", str(issue_id)])
|
||
|
||
env = os.environ.copy()
|
||
env["PYTHON_BIN"] = str(Path(args.python_bin).resolve())
|
||
if tracking_model_version:
|
||
env["TRACKING_MODEL_VERSION"] = tracking_model_version
|
||
|
||
completed = run_command(command, cwd=ROOT, env=env, stream_output=True)
|
||
return {
|
||
"ran": True,
|
||
"issue_ids": issue_ids,
|
||
"script": str(tracking_script),
|
||
"tracking_model_version": tracking_model_version,
|
||
"stdout": completed.stdout.strip(),
|
||
}
|
||
|
||
|
||
def build_manifest(
|
||
args: argparse.Namespace,
|
||
previous_payload: dict | None,
|
||
current_payload: dict,
|
||
diff: dict,
|
||
local_download_check: dict,
|
||
requested_issue_ids: list[int],
|
||
issue_name_keywords: list[str],
|
||
matched_issue_ids: list[int],
|
||
issue_id_min: int | None,
|
||
issue_id_max: int | None,
|
||
matched_issue_ids_by_range: list[int],
|
||
matched_issue_ids_without_downloadable_data: list[int],
|
||
planned_download_issue_ids: list[int],
|
||
planned_inference_issue_ids: list[int],
|
||
planned_refresh_issue_ids: list[int],
|
||
planned_manual_review_issue_ids: list[int],
|
||
refresh_cleanup: dict,
|
||
download_result: dict,
|
||
inference_result: dict,
|
||
tracking_result: dict,
|
||
) -> dict:
|
||
output_json = Path(args.output_json).resolve()
|
||
manifest_path = (
|
||
Path(args.sync_manifest_path).resolve()
|
||
if args.sync_manifest_path
|
||
else output_json.with_name(f"{output_json.name}.sync_manifest.json")
|
||
)
|
||
return {
|
||
"generated_at": datetime.now().astimezone().isoformat(timespec="seconds"),
|
||
"project_key": args.project_key,
|
||
"user_key": args.user_key,
|
||
"view_name": args.view_name,
|
||
"work_item_type": args.work_item_type,
|
||
"output_json": str(output_json),
|
||
"sync_manifest_path": str(manifest_path),
|
||
"snapshot_dir": str(Path(args.snapshot_dir).resolve()) if args.snapshot_dir else "",
|
||
"dry_run": args.dry_run,
|
||
"refresh_changed_issues": args.refresh_changed_issues,
|
||
"run_download": args.run_download,
|
||
"run_inference": args.run_inference,
|
||
"run_tracking": args.run_tracking,
|
||
"use_issue_frame_window": args.use_issue_frame_window,
|
||
"frame_before": args.frame_before,
|
||
"frame_after": args.frame_after,
|
||
"missing_issue_frame_policy": args.missing_issue_frame_policy,
|
||
"skip_existing_inference": args.skip_existing_inference,
|
||
"requested_issue_ids": requested_issue_ids,
|
||
"issue_name_keywords": issue_name_keywords,
|
||
"matched_issue_ids": matched_issue_ids,
|
||
"issue_id_min": issue_id_min,
|
||
"issue_id_max": issue_id_max,
|
||
"matched_issue_ids_by_range": matched_issue_ids_by_range,
|
||
"matched_issue_ids_without_downloadable_data": matched_issue_ids_without_downloadable_data,
|
||
"planned_download_issue_ids": planned_download_issue_ids,
|
||
"planned_inference_issue_ids": planned_inference_issue_ids,
|
||
"planned_refresh_issue_ids": planned_refresh_issue_ids,
|
||
"planned_manual_review_issue_ids": planned_manual_review_issue_ids,
|
||
"exported_model": args.exported_model,
|
||
"tracking_model_version": args.tracking_model_version,
|
||
"previous_exported_at": None if previous_payload is None else previous_payload.get("exported_at"),
|
||
"current_exported_at": current_payload.get("exported_at"),
|
||
"previous_total": diff["previous_total"],
|
||
"current_total": diff["current_total"],
|
||
"changes": diff,
|
||
"local_download_check": local_download_check,
|
||
"actionable_download_issue_ids": planned_download_issue_ids,
|
||
"refresh_cleanup": refresh_cleanup,
|
||
"download": download_result,
|
||
"inference": inference_result,
|
||
"tracking": tracking_result,
|
||
}
|
||
|
||
|
||
def print_summary(manifest: dict) -> None:
|
||
changes = manifest["changes"]
|
||
print(f"view_name: {manifest['view_name']}")
|
||
print(f"output_json: {manifest['output_json']}")
|
||
print(f"dry_run: {manifest['dry_run']}")
|
||
if manifest.get("exported_model"):
|
||
print(f"exported_model: {manifest['exported_model']}")
|
||
if manifest.get("use_issue_frame_window"):
|
||
print(
|
||
"issue_frame_window: "
|
||
f"before={manifest['frame_before']} after={manifest['frame_after']} "
|
||
f"missing_policy={manifest['missing_issue_frame_policy']}"
|
||
)
|
||
if manifest.get("run_tracking"):
|
||
tracking_version = manifest["tracking"].get("tracking_model_version") or manifest.get("tracking_model_version")
|
||
if tracking_version:
|
||
print(f"tracking_model_version: {tracking_version}")
|
||
print(f"previous_total: {manifest['previous_total']}")
|
||
print(f"current_total: {manifest['current_total']}")
|
||
print(f"new_issues: {len(changes['new_issues'])}")
|
||
print(f"status_changed_issues: {len(changes['status_changed_issues'])}")
|
||
print(f"data_added_issues: {len(changes['data_added_issues'])}")
|
||
print(f"data_changed_requires_refresh: {len(changes['data_changed_requires_refresh'])}")
|
||
print(f"data_removed_issues: {len(changes['data_removed_issues'])}")
|
||
print(f"metadata_changed_issues: {len(changes['metadata_changed_issues'])}")
|
||
print(f"updated_only_issues: {len(changes['updated_only_issues'])}")
|
||
print(f"removed_issues: {len(changes['removed_issues'])}")
|
||
if manifest.get("requested_issue_ids"):
|
||
print(f"requested_issue_ids: {manifest['requested_issue_ids']}")
|
||
if manifest.get("issue_name_keywords"):
|
||
print(f"issue_name_keywords: {manifest['issue_name_keywords']}")
|
||
print(f"matched_issue_ids: {manifest['matched_issue_ids']}")
|
||
print(
|
||
"matched_issue_ids_without_downloadable_data: "
|
||
f"{manifest['matched_issue_ids_without_downloadable_data']}"
|
||
)
|
||
if manifest.get("issue_id_min") is not None or manifest.get("issue_id_max") is not None:
|
||
print(f"issue_id_min: {manifest['issue_id_min']}")
|
||
print(f"issue_id_max: {manifest['issue_id_max']}")
|
||
print(f"matched_issue_ids_by_range: {manifest['matched_issue_ids_by_range']}")
|
||
print(f"download_issue_ids: {changes['download_issue_ids']}")
|
||
print(f"refresh_issue_ids: {changes['refresh_issue_ids']}")
|
||
print(f"manual_review_issue_ids: {changes['manual_review_issue_ids']}")
|
||
local_download_check = manifest["local_download_check"]
|
||
if local_download_check.get("enabled"):
|
||
print(
|
||
"local_downloadable_issues: "
|
||
f"{len(local_download_check.get('issues_with_downloadable_data', []))}"
|
||
)
|
||
print(f"local_missing_issue_ids: {local_download_check.get('missing_issue_ids', [])}")
|
||
print(f"local_incomplete_issue_ids: {local_download_check.get('incomplete_issue_ids', [])}")
|
||
print(f"local_unparsed_issue_ids: {local_download_check.get('unparsed_issue_ids', [])}")
|
||
print(f"planned_download_issue_ids: {manifest['planned_download_issue_ids']}")
|
||
print(f"planned_inference_issue_ids: {manifest['planned_inference_issue_ids']}")
|
||
print(f"planned_refresh_issue_ids: {manifest['planned_refresh_issue_ids']}")
|
||
print(f"planned_manual_review_issue_ids: {manifest['planned_manual_review_issue_ids']}")
|
||
if manifest["download"].get("ran"):
|
||
print(f"download_summary: {manifest['download'].get('summary', {})}")
|
||
if manifest["inference"].get("ran"):
|
||
print(f"inference_summary: {manifest['inference'].get('summary', {})}")
|
||
elif manifest["inference"].get("detail"):
|
||
print(f"inference_detail: {manifest['inference']['detail']}")
|
||
if manifest["tracking"].get("ran"):
|
||
print(f"tracking_issue_ids: {manifest['tracking'].get('issue_ids', [])}")
|
||
elif manifest["tracking"].get("detail"):
|
||
print(f"tracking_detail: {manifest['tracking']['detail']}")
|
||
|
||
|
||
def main() -> int:
|
||
args = parse_args()
|
||
if (
|
||
args.issue_id_min is not None
|
||
and args.issue_id_max is not None
|
||
and args.issue_id_min > args.issue_id_max
|
||
):
|
||
raise ValueError(
|
||
f"--issue-id-min must be <= --issue-id-max: {args.issue_id_min} > {args.issue_id_max}"
|
||
)
|
||
|
||
output_json = Path(args.output_json).resolve()
|
||
requested_issue_ids = sorted(set(args.issue_id))
|
||
issue_name_keywords = sanitize_issue_name_keywords(args.issue_name_keyword)
|
||
previous_payload = load_json(output_json) if output_json.is_file() else None
|
||
current_payload, live_export_path = export_latest_view(args)
|
||
try:
|
||
diff = build_diff(previous_payload, current_payload, refresh_changed_issues=args.refresh_changed_issues)
|
||
issue_lookup = build_issue_lookup(current_payload)
|
||
matched_issue_ids = collect_issue_ids_by_name_keywords(current_payload, issue_name_keywords)
|
||
matched_issue_ids_without_downloadable_data = [
|
||
issue_id
|
||
for issue_id in matched_issue_ids
|
||
if not has_downloadable_data(issue_lookup[issue_id])
|
||
]
|
||
matched_issue_ids_by_range = collect_issue_ids_by_id_range(
|
||
current_payload,
|
||
args.issue_id_min,
|
||
args.issue_id_max,
|
||
)
|
||
download_root = Path(args.download_root).resolve() if args.download_root else None
|
||
local_download_check = build_local_download_check(current_payload, download_root)
|
||
default_actionable_download_issue_ids = sorted(
|
||
set(diff["download_issue_ids"]) | set(local_download_check["actionable_issue_ids"])
|
||
)
|
||
planned_download_issue_ids = list(default_actionable_download_issue_ids)
|
||
planned_inference_issue_ids = list(default_actionable_download_issue_ids)
|
||
planned_refresh_issue_ids = list(diff["refresh_issue_ids"])
|
||
planned_manual_review_issue_ids = list(diff["manual_review_issue_ids"])
|
||
|
||
if issue_name_keywords:
|
||
planned_download_issue_ids = sorted(
|
||
set(default_actionable_download_issue_ids) & set(matched_issue_ids)
|
||
)
|
||
planned_inference_issue_ids = [
|
||
issue_id
|
||
for issue_id in matched_issue_ids
|
||
if has_downloadable_data(issue_lookup[issue_id])
|
||
]
|
||
planned_refresh_issue_ids = [
|
||
issue_id for issue_id in planned_refresh_issue_ids if issue_id in matched_issue_ids
|
||
]
|
||
planned_manual_review_issue_ids = [
|
||
issue_id
|
||
for issue_id in planned_manual_review_issue_ids
|
||
if issue_id in matched_issue_ids
|
||
]
|
||
log_progress(
|
||
"[stage=plan] issue-name keyword filter enabled: "
|
||
f"{issue_name_keywords} -> {describe_issue_ids(current_payload, matched_issue_ids)}"
|
||
)
|
||
if matched_issue_ids_without_downloadable_data:
|
||
log_progress(
|
||
"[stage=plan] matched issues without downloadable data: "
|
||
+ describe_issue_ids(
|
||
current_payload,
|
||
matched_issue_ids_without_downloadable_data,
|
||
)
|
||
)
|
||
if args.issue_id_min is not None or args.issue_id_max is not None:
|
||
range_filter = set(matched_issue_ids_by_range)
|
||
planned_download_issue_ids = [
|
||
issue_id for issue_id in planned_download_issue_ids if issue_id in range_filter
|
||
]
|
||
planned_inference_issue_ids = [
|
||
issue_id for issue_id in planned_inference_issue_ids if issue_id in range_filter
|
||
]
|
||
planned_refresh_issue_ids = [
|
||
issue_id for issue_id in planned_refresh_issue_ids if issue_id in range_filter
|
||
]
|
||
planned_manual_review_issue_ids = [
|
||
issue_id for issue_id in planned_manual_review_issue_ids if issue_id in range_filter
|
||
]
|
||
log_progress(
|
||
"[stage=plan] issue-id range filter enabled: "
|
||
f"min={args.issue_id_min} max={args.issue_id_max} -> "
|
||
+ describe_issue_ids(current_payload, matched_issue_ids_by_range)
|
||
)
|
||
if requested_issue_ids:
|
||
planned_download_issue_ids = requested_issue_ids
|
||
planned_inference_issue_ids = requested_issue_ids
|
||
planned_refresh_issue_ids = [
|
||
issue_id for issue_id in planned_refresh_issue_ids if issue_id in requested_issue_ids
|
||
]
|
||
planned_manual_review_issue_ids = [
|
||
issue_id
|
||
for issue_id in planned_manual_review_issue_ids
|
||
if issue_id in requested_issue_ids
|
||
]
|
||
log_progress(
|
||
"[stage=plan] explicit issue filter enabled: "
|
||
+ describe_issue_ids(current_payload, requested_issue_ids)
|
||
)
|
||
if issue_name_keywords:
|
||
log_progress(
|
||
"[stage=plan] explicit issue filter overrides the issue-name keyword plan"
|
||
)
|
||
log_progress(
|
||
"[stage=plan] planned download set: "
|
||
+ describe_issue_ids(current_payload, planned_download_issue_ids)
|
||
)
|
||
log_progress(
|
||
"[stage=plan] planned inference set: "
|
||
+ describe_issue_ids(current_payload, planned_inference_issue_ids)
|
||
)
|
||
if planned_manual_review_issue_ids:
|
||
log_progress(
|
||
"[stage=plan] manual review required: "
|
||
+ describe_issue_ids(current_payload, planned_manual_review_issue_ids)
|
||
)
|
||
|
||
refresh_cleanup = {"download_cleanup": [], "inference_cleanup": []}
|
||
if args.refresh_changed_issues and planned_refresh_issue_ids and download_root is not None:
|
||
inference_root = Path(args.inference_root).resolve() if args.inference_root else None
|
||
log_progress(
|
||
"[stage=refresh] cleaning outputs for changed issues: "
|
||
+ describe_issue_ids(current_payload, planned_refresh_issue_ids)
|
||
)
|
||
refresh_cleanup = prepare_refresh_outputs(
|
||
download_root=download_root,
|
||
inference_root=inference_root,
|
||
refresh_issue_ids=planned_refresh_issue_ids,
|
||
dry_run=args.dry_run,
|
||
)
|
||
|
||
effective_output_json = live_export_path if args.dry_run else output_json
|
||
|
||
download_result = {"ran": False, "issue_ids": [], "manifest_path": None, "summary": {}}
|
||
if args.run_download:
|
||
if planned_download_issue_ids:
|
||
log_progress(
|
||
"[stage=download] start: "
|
||
+ describe_issue_ids(current_payload, planned_download_issue_ids)
|
||
)
|
||
else:
|
||
log_progress("[stage=download] skipped: no actionable issues")
|
||
download_result = run_download(args, planned_download_issue_ids, effective_output_json)
|
||
if download_result.get("ran"):
|
||
log_progress(
|
||
"[stage=download] done: "
|
||
+ summarize_status_counts(download_result.get("summary", {}))
|
||
)
|
||
|
||
inference_result = {"ran": False, "issue_ids": [], "manifest_path": None, "summary": {}}
|
||
if args.run_inference:
|
||
log_progress(
|
||
"[stage=inference] start: "
|
||
+ describe_issue_ids(current_payload, planned_inference_issue_ids)
|
||
)
|
||
inference_result = run_inference(args, planned_inference_issue_ids, effective_output_json)
|
||
if inference_result.get("ran"):
|
||
log_progress(
|
||
"[stage=inference] done: "
|
||
+ summarize_status_counts(inference_result.get("summary", {}))
|
||
)
|
||
elif inference_result.get("detail"):
|
||
log_progress(f"[stage=inference] skipped: {inference_result['detail']}")
|
||
|
||
tracking_result = {
|
||
"ran": False,
|
||
"issue_ids": [],
|
||
"script": str(Path(args.issue_tracking_script).resolve()),
|
||
"tracking_model_version": "",
|
||
}
|
||
if args.run_tracking:
|
||
tracking_issue_ids = inference_result.get("issue_ids", []) if args.run_inference else []
|
||
log_progress(
|
||
"[stage=tracking] start: "
|
||
+ describe_issue_ids(current_payload, tracking_issue_ids)
|
||
)
|
||
tracking_result = run_tracking(args, tracking_issue_ids)
|
||
if tracking_result.get("ran"):
|
||
log_progress(
|
||
"[stage=tracking] done: "
|
||
+ describe_issue_ids(current_payload, tracking_result.get("issue_ids", []))
|
||
)
|
||
elif tracking_result.get("detail"):
|
||
log_progress(f"[stage=tracking] skipped: {tracking_result['detail']}")
|
||
|
||
manifest = build_manifest(
|
||
args=args,
|
||
previous_payload=previous_payload,
|
||
current_payload=current_payload,
|
||
diff=diff,
|
||
local_download_check=local_download_check,
|
||
requested_issue_ids=requested_issue_ids,
|
||
issue_name_keywords=issue_name_keywords,
|
||
matched_issue_ids=matched_issue_ids,
|
||
issue_id_min=args.issue_id_min,
|
||
issue_id_max=args.issue_id_max,
|
||
matched_issue_ids_by_range=matched_issue_ids_by_range,
|
||
matched_issue_ids_without_downloadable_data=matched_issue_ids_without_downloadable_data,
|
||
planned_download_issue_ids=planned_download_issue_ids,
|
||
planned_inference_issue_ids=planned_inference_issue_ids,
|
||
planned_refresh_issue_ids=planned_refresh_issue_ids,
|
||
planned_manual_review_issue_ids=planned_manual_review_issue_ids,
|
||
refresh_cleanup=refresh_cleanup,
|
||
download_result=download_result,
|
||
inference_result=inference_result,
|
||
tracking_result=tracking_result,
|
||
)
|
||
manifest_path = Path(manifest["sync_manifest_path"])
|
||
if not args.dry_run:
|
||
save_json(manifest_path, manifest)
|
||
|
||
print_summary(manifest)
|
||
if args.dry_run:
|
||
print(f"sync_manifest (not written in dry-run): {manifest_path}")
|
||
else:
|
||
print(f"sync_manifest: {manifest_path}")
|
||
return 0
|
||
finally:
|
||
live_export_path.unlink(missing_ok=True)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|