497 lines
22 KiB
Python
Executable File
497 lines
22 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
自动将 comparison_report.json 转换为中文 Markdown 评测报告。
|
||
|
||
用法:
|
||
python generate_comparison_report.py <comparison_report.json 路径>
|
||
python generate_comparison_report.py <comparison_report.json 路径> --output <输出文件路径>
|
||
python generate_comparison_report.py <comparison_report.json 路径> --title "自定义标题"
|
||
python generate_comparison_report.py <comparison_report.json 路径> --background "背景说明文字"
|
||
python generate_comparison_report.py <comparison_report.json 路径> --date 2026-03-01
|
||
|
||
示例:
|
||
python generate_comparison_report.py \
|
||
evaluation_results/eval_results_common_match_comparison_cncap_yolov5s_20260228_roi0/comparison_common_matches_20260228_102849/comparison_report.json
|
||
|
||
python generate_comparison_report.py \
|
||
evaluation_results/.../comparison_report.json \
|
||
--output my_report.md \
|
||
--title "ROI1 模型对比报告"
|
||
"""
|
||
|
||
import json
|
||
import re
|
||
import argparse
|
||
import sys
|
||
from datetime import date
|
||
from pathlib import Path
|
||
|
||
# Allow importing class_config from the eval_tools root
|
||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||
from class_config import REPORT_3D_CLASS_LABELS
|
||
|
||
# ── 阈值设置 ─────────────────────────────────────────────────────────────────
|
||
# AP 差异超过此阈值才标记为"优",否则标记为"持平"
|
||
AP_TIE_THRESHOLD = 0.005 # 0.5%
|
||
METRIC_TIE_THRESHOLD = 0.005 # 用于 precision/recall/f1 的判断阈值(绝对值)
|
||
ERROR_TIE_THRESHOLD_REL = 2.0 # 3D 误差相对变化(%)小于此值视为持平
|
||
|
||
|
||
def fmt(v: float, decimals: int = 4) -> str:
|
||
return f"{v:.{decimals}f}"
|
||
|
||
|
||
def fmt_pct(v: float) -> str:
|
||
sign = "+" if v >= 0 else ""
|
||
return f"{sign}{v:.2f}%"
|
||
|
||
|
||
def fmt_diff(v: float) -> str:
|
||
sign = "+" if v >= 0 else ""
|
||
return f"{sign}{v:.4f}"
|
||
|
||
|
||
def judge(diff: float, rel: float, higher_is_better: bool = True,
|
||
abs_thr: float = AP_TIE_THRESHOLD, rel_thr: float = None,
|
||
model1_name: str = "model1", model2_name: str = "model2") -> str:
|
||
"""
|
||
根据 diff (model2 - model1) 判断哪个模型更好。
|
||
higher_is_better=True → diff>0 代表 model2 更好
|
||
higher_is_better=False → diff<0 代表 model2 更好(即误差更小)
|
||
"""
|
||
if rel_thr is not None:
|
||
tie = abs(rel) < rel_thr
|
||
else:
|
||
tie = abs(diff) < abs_thr
|
||
|
||
if tie:
|
||
return "⚖️ 持平"
|
||
|
||
m2_better = (diff > 0) if higher_is_better else (diff < 0)
|
||
m2_short = model2_name.split("-")[-1] # e.g. "cncap"
|
||
m1_short = model1_name.split("-")[-1] # e.g. "newdata", "mono3d"
|
||
if m2_better:
|
||
return f"✅ {m2_short}优"
|
||
else:
|
||
return f"✅ {m1_short}优"
|
||
|
||
|
||
def build_report(data: dict, model1: str, model2: str,
|
||
report_date: str, title: str = None, background: str = None) -> str:
|
||
"""生成完整 Markdown 报告字符串。"""
|
||
|
||
m2d = data["2d_metrics"]
|
||
m3d = data.get("3d_metrics", {})
|
||
stats = data.get("match_statistics", {})
|
||
summary = data.get("summary", {})
|
||
|
||
# ── 名称简写 ──────────────────────────────────────────────────────────────
|
||
m1_short = model1
|
||
m2_short = model2
|
||
# 取最后一段作为简称用于表格
|
||
m1_tag = model1.split("-")[-1] # e.g. "newdata"
|
||
m2_tag = model2.split("-")[-1] # e.g. "cncap"
|
||
|
||
lines = []
|
||
|
||
# ── 标题 ─────────────────────────────────────────────────────────────────
|
||
auto_title = title or f"模型对比Overall指标总结 ({model1} vs {model2} - 通用数据集评测)"
|
||
lines.append(f"# {auto_title}")
|
||
lines.append("")
|
||
lines.append(f"**对比模型**: {model1} vs {model2} ")
|
||
lines.append(f"**评测日期**: {report_date} ")
|
||
lines.append(f"**数据集**: 通用数据集 (Common Match Cases) ")
|
||
|
||
total_common = stats.get("common", None)
|
||
m1_total = stats.get("model1_total", None)
|
||
m2_total = stats.get("model2_total", None)
|
||
if total_common is not None and m1_total is not None and m2_total is not None:
|
||
lines.append(f"**匹配样本**: {total_common:,} ({model1}: {m1_total:,} | {model2}: {m2_total:,})")
|
||
else:
|
||
lines.append(f"**匹配样本**: N/A")
|
||
lines.append("")
|
||
|
||
if background:
|
||
lines.append(f"> **背景说明**: {background}")
|
||
else:
|
||
lines.append(f"> **背景说明**: 本次评测对比了 {model1} 与 {model2},评估两者在通用数据集上的2D/3D检测性能差异。")
|
||
lines.append("")
|
||
lines.append("---")
|
||
lines.append("")
|
||
|
||
# ── 2D Overall ────────────────────────────────────────────────────────────
|
||
ov = m2d["overall"]
|
||
lines.append("## 📊 2D检测指标 (Overall)")
|
||
lines.append("")
|
||
lines.append("### 总体性能对比")
|
||
lines.append("")
|
||
lines.append(f"| 指标 | {model1} | {model2} | 差异 | 相对变化 | 结果 |")
|
||
lines.append("|------|" + "---|" * 5)
|
||
|
||
def ov_row(key, label, higher_is_better=True):
|
||
v1 = ov[key][model1]
|
||
v2 = ov[key][model2]
|
||
diff = ov[key]["diff"]
|
||
rel = ov[key]["relative_change_%"]
|
||
j = judge(diff, rel, higher_is_better, abs_thr=METRIC_TIE_THRESHOLD,
|
||
model1_name=model1, model2_name=model2)
|
||
return f"| **{label}** | {fmt(v1)} | {fmt(v2)} | {fmt_diff(diff)} | {fmt_pct(rel)} | {j} |"
|
||
|
||
lines.append(ov_row("precision", "PRECISION"))
|
||
lines.append(ov_row("recall", "RECALL"))
|
||
lines.append(ov_row("f1_score", "F1-Score"))
|
||
lines.append(ov_row("map", "mAP"))
|
||
lines.append("")
|
||
|
||
# 关键发现
|
||
prec_diff = ov["precision"]["relative_change_%"]
|
||
rec_diff = ov["recall"]["relative_change_%"]
|
||
map_diff = ov["map"]["relative_change_%"]
|
||
f1_diff = ov["f1_score"]["relative_change_%"]
|
||
|
||
ap_wins = summary.get("2d", {}).get("ap", {}).get("wins", "?")
|
||
ap_losses = summary.get("2d", {}).get("ap", {}).get("losses", "?")
|
||
ap_ties = summary.get("2d", {}).get("ap", {}).get("ties", "?")
|
||
|
||
lines.append("### 关键发现")
|
||
lines.append("")
|
||
lines.append(f"- 📊 **Precision**: {model2}{'领先' if prec_diff > 0 else '落后'}{fmt_pct(abs(prec_diff))},{'误检率略低' if prec_diff > 0 else '误检率略高'}")
|
||
lines.append(f"- 📊 **Recall**: {model1 if rec_diff < 0 else model2}领先{fmt_pct(abs(rec_diff))},检出率{'更高' if rec_diff < 0 else '更低'}")
|
||
lines.append(f"- 📊 **mAP**: {model2 if map_diff > 0 else model1}领先{fmt_pct(abs(map_diff))}({'极小差异,基本持平' if abs(map_diff) < 2 else '有一定差距'})")
|
||
lines.append(f"- 📊 **F1-Score**: {'两模型基本持平' if abs(f1_diff) < 1 else (model2 + '更优' if f1_diff > 0 else model1 + '更优')}(差距{fmt_pct(abs(f1_diff))})")
|
||
lines.append(f"- ⚖️ **类别赢负统计 (AP)**: {m2_tag}赢{ap_wins}类, {m1_tag}赢{ap_losses}类, 平局{ap_ties}类")
|
||
lines.append("")
|
||
|
||
lines.append("---")
|
||
lines.append("")
|
||
|
||
# ── 2D Per-Class ──────────────────────────────────────────────────────────
|
||
pc = m2d.get("per_class", {})
|
||
lines.append("## 📋 2D检测指标 (Per Class)")
|
||
lines.append("")
|
||
lines.append("### 各类别性能对比")
|
||
lines.append("")
|
||
lines.append(f"| 类别 | Precision ({m1_tag}) | Precision ({m2_tag}) | Recall ({m1_tag}) | Recall ({m2_tag}) | F1 ({m1_tag}) | F1 ({m2_tag}) | AP ({m1_tag}) | AP ({m2_tag}) | AP差异 | 结果 |")
|
||
lines.append("|------|" + "---|" * 10)
|
||
|
||
adv_m2 = [] # model2 明显更好的类别
|
||
adv_m1 = [] # model1 明显更好的类别
|
||
|
||
for cls, cd in pc.items():
|
||
prec1 = cd["precision"][model1]
|
||
prec2 = cd["precision"][model2]
|
||
rec1 = cd["recall"][model1]
|
||
rec2 = cd["recall"][model2]
|
||
f1_1 = cd["f1_score"][model1]
|
||
f1_2 = cd["f1_score"][model2]
|
||
ap1 = cd["ap"][model1]
|
||
ap2 = cd["ap"][model2]
|
||
ap_d = cd["ap"]["diff"]
|
||
ap_r = cd["ap"]["relative_change_%"]
|
||
j = judge(ap_d, ap_r, True, abs_thr=AP_TIE_THRESHOLD,
|
||
model1_name=model1, model2_name=model2)
|
||
lines.append(
|
||
f"| **{cls}** | {fmt(prec1)} | {fmt(prec2)} | {fmt(rec1)} | {fmt(rec2)} "
|
||
f"| {fmt(f1_1)} | {fmt(f1_2)} | {fmt(ap1)} | {fmt(ap2)} | {fmt_diff(ap_d)} | {j} |"
|
||
)
|
||
if abs(ap_r) >= 2.0: # 相对变化>=2%才算显著
|
||
if ap_d > 0:
|
||
adv_m2.append((cls, ap1, ap2, ap_r))
|
||
elif ap_d < 0:
|
||
adv_m1.append((cls, ap1, ap2, ap_r))
|
||
|
||
lines.append("")
|
||
lines.append("### 类别分析")
|
||
lines.append("")
|
||
|
||
if adv_m2:
|
||
lines.append(f"**{model2} 优势类别** (AP更高):")
|
||
for cls, ap1, ap2, rel in sorted(adv_m2, key=lambda x: -x[3]):
|
||
mark = "**大幅领先**" if rel > 8 else "领先"
|
||
lines.append(f"- {cls}: {m2_tag} {fmt(ap2)} > {m1_tag} {fmt(ap1)}({mark}{fmt_pct(rel)})")
|
||
lines.append("")
|
||
|
||
if adv_m1:
|
||
lines.append(f"**{model1} 优势类别** (AP更高):")
|
||
for cls, ap1, ap2, rel in sorted(adv_m1, key=lambda x: x[3]):
|
||
mark = "**大幅领先**" if abs(rel) > 8 else "领先"
|
||
lines.append(f"- {cls}: {m1_tag} {fmt(ap1)} > {m2_tag} {fmt(ap2)}({mark}{fmt_pct(abs(rel))})")
|
||
lines.append("")
|
||
|
||
lines.append("---")
|
||
lines.append("")
|
||
|
||
# ── 3D Metrics ────────────────────────────────────────────────────────────
|
||
if m3d:
|
||
lines.append("## 🎯 3D检测指标")
|
||
lines.append("")
|
||
|
||
cls_labels = REPORT_3D_CLASS_LABELS
|
||
|
||
for cls_key, cls_label in cls_labels.items():
|
||
if cls_key not in m3d:
|
||
continue
|
||
cd = m3d[cls_key]
|
||
ov3 = cd.get("overall", {})
|
||
if not ov3:
|
||
continue
|
||
n = cd.get("common_samples")
|
||
n_str = f"{n:,} 个样本" if n is not None else "N/A 个样本"
|
||
|
||
lines.append(f"### {cls_label} - {n_str}")
|
||
lines.append("")
|
||
lines.append(f"| 指标 | {model1} | {model2} | 差异 | 相对变化 | 结果 |")
|
||
lines.append("|------|" + "---|" * 5)
|
||
|
||
def row3d(key, label, higher_is_better=False):
|
||
if key not in ov3:
|
||
return None
|
||
v1 = ov3[key][model1]["mean"]
|
||
v2 = ov3[key][model2]["mean"]
|
||
diff = ov3[key]["diff"]
|
||
rel = ov3[key]["relative_change_%"]
|
||
j = judge(diff, rel, higher_is_better,
|
||
rel_thr=ERROR_TIE_THRESHOLD_REL,
|
||
model1_name=model1, model2_name=model2)
|
||
return f"| **{label}** | {fmt(v1)} | {fmt(v2)} | {fmt_diff(diff)} | {fmt_pct(rel)} | {j} |"
|
||
|
||
for row in [
|
||
row3d("lateral_error", "Lateral Error"),
|
||
row3d("longitudinal_error", "Longitudinal Error"),
|
||
row3d("longitudinal_relative_error", "Longitudinal Relative Error"),
|
||
row3d("heading_error", "Heading Error"),
|
||
row3d("heading_error_relaxed", "Heading Error Relaxed"),
|
||
]:
|
||
if row is not None:
|
||
lines.append(row)
|
||
|
||
if "reversal_info" in ov3:
|
||
rev1 = ov3["reversal_info"][model1]
|
||
rev2 = ov3["reversal_info"][model2]
|
||
rev_j = "✅ " + (m2_tag if rev2["percentage"] < rev1["percentage"] else m1_tag) + "优"
|
||
if abs(rev1["percentage"] - rev2["percentage"]) < 0.5:
|
||
rev_j = "⚖️ 持平"
|
||
lines.append(
|
||
f"| **Reversal Cases** | {rev1['count']:,} ({rev1['percentage']:.2f}%) "
|
||
f"| {rev2['count']:,} ({rev2['percentage']:.2f}%) | - | - | {rev_j} |"
|
||
)
|
||
lines.append("")
|
||
|
||
# ── 纵向区间对比 ──────────────────────────────────────────────
|
||
def _long_sort_key(k):
|
||
stripped = k[len("long_"):].replace("m", "")
|
||
m = re.search(r'(?<=\d)-', stripped)
|
||
if m:
|
||
try:
|
||
return float(stripped[:m.start()])
|
||
except ValueError:
|
||
pass
|
||
return float('inf')
|
||
|
||
long_keys = sorted(
|
||
[k for k in cd.keys() if k.startswith("long_")],
|
||
key=_long_sort_key
|
||
)
|
||
if long_keys:
|
||
lines.append(f"#### 纵向区间对比")
|
||
lines.append("")
|
||
lines.append(
|
||
f"| 区间 | 样本数 "
|
||
f"| Lat ({m1_tag}) | Lat ({m2_tag}) | Lat Δ% "
|
||
f"| Long ({m1_tag}) | Long ({m2_tag}) | Long Δ% "
|
||
f"| LongRel ({m1_tag}) | LongRel ({m2_tag}) | LongRel Δ% "
|
||
f"| Head ({m1_tag}) | Head ({m2_tag}) | Head Δ% |"
|
||
)
|
||
lines.append("|------|" + "---|" * 13)
|
||
for rk in long_keys:
|
||
rb = cd[rk]
|
||
if not rb:
|
||
continue
|
||
|
||
def _rv(metric, model):
|
||
d = rb.get(metric, {})
|
||
if model in d:
|
||
return fmt(d[model]["mean"])
|
||
return "-"
|
||
|
||
def _rd(metric):
|
||
d = rb.get(metric, {})
|
||
rel = d.get("relative_change_%")
|
||
if rel is None:
|
||
return "-"
|
||
return fmt_pct(rel)
|
||
|
||
# sample count from any available metric
|
||
n_range = "-"
|
||
for _mk in ("lateral_error", "longitudinal_error", "heading_error"):
|
||
_md = rb.get(_mk, {})
|
||
if model1 in _md and "samples" in _md[model1]:
|
||
n_range = f"{_md[model1]['samples']:,}"
|
||
break
|
||
|
||
# range label: strip prefix and trailing 'm'
|
||
rl = rk[len("long_"):]
|
||
|
||
lines.append(
|
||
f"| **{rl}** | {n_range} "
|
||
f"| {_rv('lateral_error', model1)} | {_rv('lateral_error', model2)} | {_rd('lateral_error')} "
|
||
f"| {_rv('longitudinal_error', model1)} | {_rv('longitudinal_error', model2)} | {_rd('longitudinal_error')} "
|
||
f"| {_rv('longitudinal_relative_error', model1)} | {_rv('longitudinal_relative_error', model2)} | {_rd('longitudinal_relative_error')} "
|
||
f"| {_rv('heading_error', model1)} | {_rv('heading_error', model2)} | {_rd('heading_error')} |"
|
||
)
|
||
lines.append("")
|
||
|
||
lines.append("---")
|
||
lines.append("")
|
||
|
||
# ── Match Statistics ──────────────────────────────────────────────────────
|
||
if stats:
|
||
lines.append("## 📊 样本匹配统计")
|
||
lines.append("")
|
||
lines.append("### 整体匹配情况")
|
||
lines.append("")
|
||
lines.append("| 模型 | 总样本数 | 公共样本 | 独有样本 | 公共占比 |")
|
||
lines.append("|------|----------|----------|----------|---------|")
|
||
m1_pct = stats.get("common_percentage_of_model1", 0)
|
||
m2_pct = stats.get("common_percentage_of_model2", 0)
|
||
m1_uniq = stats.get("model1_unique", 0)
|
||
m2_uniq = stats.get("model2_unique", 0)
|
||
lines.append(f"| **{model1}** | {m1_total:,} | {total_common:,} | {m1_uniq:,} | {m1_pct:.2f}% |")
|
||
lines.append(f"| **{model2}** | {m2_total:,} | {total_common:,} | {m2_uniq:,} | {m2_pct:.2f}% |")
|
||
lines.append("")
|
||
|
||
per_cls_stats = stats.get("per_class", {})
|
||
if per_cls_stats:
|
||
lines.append("### 各类别匹配情况 (3D)")
|
||
lines.append("")
|
||
lines.append(f"| 类别 | {m1_tag}总数 | {m2_tag}总数 | 公共样本 | {m1_tag}占比 | {m2_tag}占比 |")
|
||
lines.append("|------|" + "---|" * 5)
|
||
for cls, cs in per_cls_stats.items():
|
||
lines.append(
|
||
f"| **{cls}** | {cs['model1_total']:,} | {cs['model2_total']:,} "
|
||
f"| {cs['common']:,} | {cs['common_percentage_of_model1']:.2f}% "
|
||
f"| {cs['common_percentage_of_model2']:.2f}% |"
|
||
)
|
||
lines.append("")
|
||
|
||
lines.append("---")
|
||
lines.append("")
|
||
|
||
# ── Summary / Conclusions ─────────────────────────────────────────────────
|
||
lines.append("## 🎯 结论与建议")
|
||
lines.append("")
|
||
lines.append("### 2D检测汇总")
|
||
lines.append("")
|
||
|
||
sum2d = summary.get("2d", {})
|
||
ap_w = sum2d.get("ap", {}).get("wins", 0)
|
||
ap_l = sum2d.get("ap", {}).get("losses", 0)
|
||
ap_t = sum2d.get("ap", {}).get("ties", 0)
|
||
f1_w = sum2d.get("f1_score", {}).get("wins", 0)
|
||
f1_l = sum2d.get("f1_score", {}).get("losses", 0)
|
||
f1_t = sum2d.get("f1_score", {}).get("ties", 0)
|
||
|
||
lines.append(f"- **AP 类别统计**: {m2_tag}赢{ap_w}类 / {m1_tag}赢{ap_l}类 / 平局{ap_t}类")
|
||
lines.append(f"- **F1 类别统计**: {m2_tag}赢{f1_w}类 / {m1_tag}赢{f1_l}类 / 平局{f1_t}类")
|
||
lines.append(f"- **整体mAP**: {model1}={fmt(ov['map'][model1])} vs {model2}={fmt(ov['map'][model2])} ({fmt_pct(ov['map']['relative_change_%'])})")
|
||
lines.append("")
|
||
|
||
if m3d:
|
||
sum3d = summary.get("3d", {})
|
||
lat_w = sum3d.get("lateral", {}).get("wins", 0)
|
||
lat_l = sum3d.get("lateral", {}).get("losses", 0)
|
||
lon_w = sum3d.get("longitudinal", {}).get("wins", 0)
|
||
lon_l = sum3d.get("longitudinal", {}).get("losses", 0)
|
||
hd_w = sum3d.get("heading", {}).get("wins", 0)
|
||
hd_l = sum3d.get("heading", {}).get("losses", 0)
|
||
|
||
lines.append("### 3D检测汇总")
|
||
lines.append("")
|
||
lines.append(f"- **横向误差 (Lateral)**: {m2_tag}优{lat_w}类 / {m1_tag}优{lat_l}类")
|
||
lines.append(f"- **纵向误差 (Longitudinal)**: {m2_tag}优{lon_w}类 / {m1_tag}优{lon_l}类")
|
||
lines.append(f"- **航向误差 (Heading)**: {m2_tag}优{hd_w}类 / {m1_tag}优{hd_l}类")
|
||
lines.append("")
|
||
|
||
lines.append("### 综合建议")
|
||
lines.append("")
|
||
# 自动判断整体赢家
|
||
map_rel = ov["map"]["relative_change_%"]
|
||
if map_rel > 2:
|
||
overall_winner = model2
|
||
elif map_rel < -2:
|
||
overall_winner = model1
|
||
else:
|
||
overall_winner = None
|
||
|
||
if overall_winner:
|
||
lines.append(f"- 🏆 **综合mAP**: {overall_winner} 整体占优({fmt_pct(abs(map_rel))})")
|
||
else:
|
||
lines.append(f"- ⚖️ **综合mAP**: 两模型基本持平(差距{fmt_pct(abs(map_rel))})")
|
||
|
||
adv_summary_m2 = [(c, r) for c, *_, r in adv_m2]
|
||
adv_summary_m1 = [(c, r) for c, *_, r in adv_m1]
|
||
if adv_summary_m2:
|
||
cls_str = "、".join(c for c, _ in adv_summary_m2)
|
||
lines.append(f"- ✅ **{model2} 改善**: {cls_str} 类别AP有所提升")
|
||
if adv_summary_m1:
|
||
cls_str = "、".join(c for c, _ in adv_summary_m1)
|
||
lines.append(f"- ⚠️ **{model2} 退化**: {cls_str} 类别AP有所下降")
|
||
|
||
lines.append("")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(
|
||
description="将 comparison_report.json 转换为中文 Markdown 评测报告"
|
||
)
|
||
parser.add_argument("json_path", help="comparison_report.json 的路径")
|
||
parser.add_argument("--output", "-o", default=None,
|
||
help="输出 Markdown 文件路径(默认与 JSON 同目录,文件名 COMPARISON_REPORT.md)")
|
||
parser.add_argument("--title", default=None,
|
||
help="自定义报告标题")
|
||
parser.add_argument("--background", default=None,
|
||
help="背景说明文字")
|
||
parser.add_argument("--date", default=str(date.today()),
|
||
help="评测日期 (默认今天,格式 YYYY-MM-DD)")
|
||
args = parser.parse_args()
|
||
|
||
json_path = Path(args.json_path)
|
||
if not json_path.exists():
|
||
print(f"错误: 文件不存在: {json_path}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
with open(json_path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
|
||
# 自动从 JSON 中读取模型名称
|
||
models = list(data["2d_metrics"]["overall"]["precision"].keys())
|
||
# 过滤掉 diff / relative_change_% 等非模型 key
|
||
skip = {"diff", "relative_change_%"}
|
||
models = [m for m in models if m not in skip]
|
||
if len(models) < 2:
|
||
print("错误: 无法从 JSON 中自动识别模型名称,请检查文件格式。", file=sys.stderr)
|
||
sys.exit(1)
|
||
model1, model2 = models[0], models[1]
|
||
print(f"模型1: {model1}")
|
||
print(f"模型2: {model2}")
|
||
|
||
report = build_report(data, model1, model2,
|
||
report_date=args.date,
|
||
title=args.title,
|
||
background=args.background)
|
||
|
||
# 输出路径
|
||
if args.output:
|
||
out_path = Path(args.output)
|
||
else:
|
||
out_path = json_path.parent / "COMPARISON_REPORT.md"
|
||
|
||
out_path.write_text(report, encoding="utf-8")
|
||
print(f"报告已生成: {out_path}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|