build-summary.py - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

build-summary.py (8388B)
      1 #!/usr/bin/env python3
      2 """
      3 Aggregate all scan.json results into a summary artifact.
      4 
      5 Produces analysis/summary.json (machine-readable) and analysis/summary.md
      6 (human-readable). These are static artifacts for working on the narrative
      7 sections of the paper before the LaTeX build runs.
      8 
      9 Usage:
     10     python scripts/build-summary.py
     11 """
     12 
     13 import json
     14 from collections import Counter, defaultdict
     15 from pathlib import Path
     16 
     17 ROOT = Path(__file__).resolve().parent.parent
     18 REGISTRY_PATH = ROOT / "registry.jsonl"
     19 PAPERS_DIR = ROOT / "papers"
     20 ANALYSIS_DIR = ROOT / "analysis"
     21 
     22 RUBRIC_DIMENSIONS = [
     23     "artifacts_reproducibility",
     24     "statistical_rigor",
     25     "benchmark_quality",
     26     "claim_to_evidence",
     27     "setup_transparency",
     28     "limitations_discussion",
     29 ]
     30 
     31 SCORE_LABELS = {0: "absent", 1: "weak", 2: "adequate", 3: "strong"}
     32 
     33 
     34 def load_registry():
     35     entries = {}
     36     with open(REGISTRY_PATH) as f:
     37         for line in f:
     38             line = line.strip()
     39             if line:
     40                 entry = json.loads(line)
     41                 entries[entry["id"]] = entry
     42     return entries
     43 
     44 
     45 def load_scans():
     46     scans = {}
     47     for scan_path in PAPERS_DIR.glob("*/scan.json"):
     48         paper_id = scan_path.parent.name
     49         with open(scan_path) as f:
     50             scans[paper_id] = json.load(f)
     51     return scans
     52 
     53 
     54 def compute_summary(registry, scans):
     55     summary = {
     56         "total_registry": len(registry),
     57         "total_scanned": len(scans),
     58         "status_counts": Counter(e["status"] for e in registry.values()),
     59     }
     60 
     61     if not scans:
     62         summary["note"] = "No scans completed yet."
     63         return summary
     64 
     65     # Score distributions per dimension
     66     dimension_scores = {d: [] for d in RUBRIC_DIMENSIONS}
     67     for scan in scans.values():
     68         rubric = scan.get("rubric", {})
     69         for dim in RUBRIC_DIMENSIONS:
     70             if dim in rubric:
     71                 dimension_scores[dim].append(rubric[dim]["score"])
     72 
     73     summary["dimensions"] = {}
     74     for dim, scores in dimension_scores.items():
     75         if not scores:
     76             continue
     77         dist = Counter(scores)
     78         summary["dimensions"][dim] = {
     79             "mean": round(sum(scores) / len(scores), 2),
     80             "distribution": {SCORE_LABELS[k]: dist.get(k, 0) for k in range(4)},
     81             "n": len(scores),
     82         }
     83 
     84     # Overall scores per paper
     85     paper_scores = {}
     86     for paper_id, scan in scans.items():
     87         rubric = scan.get("rubric", {})
     88         scores = [rubric[d]["score"] for d in RUBRIC_DIMENSIONS if d in rubric]
     89         if scores:
     90             paper_scores[paper_id] = {
     91                 "total": sum(scores),
     92                 "mean": round(sum(scores) / len(scores), 2),
     93                 "title": scan.get("paper", {}).get("title", registry.get(paper_id, {}).get("title", "?")),
     94             }
     95 
     96     ranked = sorted(paper_scores.items(), key=lambda x: x[1]["total"])
     97     summary["bottom_10"] = [
     98         {"id": pid, "title": ps["title"], "total": ps["total"], "mean": ps["mean"]}
     99         for pid, ps in ranked[:10]
    100     ]
    101     summary["top_10"] = [
    102         {"id": pid, "title": ps["title"], "total": ps["total"], "mean": ps["mean"]}
    103         for pid, ps in ranked[-10:]
    104     ][::-1]
    105 
    106     # Red flags
    107     all_flags = []
    108     for paper_id, scan in scans.items():
    109         for rf in scan.get("red_flags", []):
    110             all_flags.append(rf["flag"])
    111     summary["red_flag_counts"] = dict(Counter(all_flags).most_common(20))
    112 
    113     # Methodology tag distribution
    114     all_method_tags = []
    115     for scan in scans.values():
    116         all_method_tags.extend(scan.get("methodology_tags", []))
    117     summary["methodology_tag_counts"] = dict(Counter(all_method_tags).most_common())
    118 
    119     # Topic tag distribution (from registry)
    120     all_topic_tags = []
    121     for paper_id in scans:
    122         if paper_id in registry:
    123             all_topic_tags.extend(registry[paper_id].get("tags", []))
    124     summary["topic_tag_counts"] = dict(Counter(all_topic_tags).most_common())
    125 
    126     # Claims support breakdown
    127     support_counts = Counter()
    128     for scan in scans.values():
    129         for claim in scan.get("claims", []):
    130             support_counts[claim.get("supported", "unknown")] += 1
    131     summary["claim_support"] = dict(support_counts)
    132 
    133     # Year breakdown of scores
    134     year_scores = defaultdict(list)
    135     for paper_id, ps in paper_scores.items():
    136         year = registry.get(paper_id, {}).get("year") or scans[paper_id].get("paper", {}).get("year")
    137         if year:
    138             year_scores[year].append(ps["mean"])
    139     summary["mean_score_by_year"] = {
    140         str(y): round(sum(s) / len(s), 2) for y, s in sorted(year_scores.items())
    141     }
    142 
    143     # Citation-chased papers count
    144     total_cited = 0
    145     for scan in scans.values():
    146         total_cited += len(scan.get("cited_papers", []))
    147     summary["total_cited_papers_extracted"] = total_cited
    148 
    149     return summary
    150 
    151 
    152 def render_markdown(summary):
    153     lines = ["# Survey Summary\n"]
    154     lines.append(f"Registry: {summary['total_registry']} papers")
    155     lines.append(f"Scanned: {summary['total_scanned']} papers\n")
    156 
    157     if summary.get("note"):
    158         lines.append(f"*{summary['note']}*\n")
    159         return "\n".join(lines)
    160 
    161     status = summary.get("status_counts", {})
    162     lines.append("## Pipeline Status\n")
    163     for s, c in sorted(status.items()):
    164         lines.append(f"- {s}: {c}")
    165     lines.append("")
    166 
    167     dims = summary.get("dimensions", {})
    168     if dims:
    169         lines.append("## Rubric Score Distributions\n")
    170         lines.append("| Dimension | Mean | Absent | Weak | Adequate | Strong | N |")
    171         lines.append("|-----------|------|--------|------|----------|--------|---|")
    172         for dim in RUBRIC_DIMENSIONS:
    173             if dim not in dims:
    174                 continue
    175             d = dims[dim]
    176             dist = d["distribution"]
    177             label = dim.replace("_", " ").title()
    178             lines.append(
    179                 f"| {label} | {d['mean']} | {dist['absent']} | "
    180                 f"{dist['weak']} | {dist['adequate']} | {dist['strong']} | {d['n']} |"
    181             )
    182         lines.append("")
    183 
    184     if summary.get("bottom_10"):
    185         lines.append("## Bottom 10 (Lowest Total Scores)\n")
    186         for p in summary["bottom_10"]:
    187             lines.append(f"1. **{p['title']}** ({p['id']}) — {p['total']}/18, mean {p['mean']}")
    188         lines.append("")
    189 
    190     if summary.get("top_10"):
    191         lines.append("## Top 10 (Highest Total Scores)\n")
    192         for p in summary["top_10"]:
    193             lines.append(f"1. **{p['title']}** ({p['id']}) — {p['total']}/18, mean {p['mean']}")
    194         lines.append("")
    195 
    196     if summary.get("red_flag_counts"):
    197         lines.append("## Most Common Red Flags\n")
    198         for flag, count in summary["red_flag_counts"].items():
    199             lines.append(f"- {flag}: {count}")
    200         lines.append("")
    201 
    202     if summary.get("claim_support"):
    203         lines.append("## Claim Support Breakdown\n")
    204         for level, count in summary["claim_support"].items():
    205             lines.append(f"- {level}: {count}")
    206         lines.append("")
    207 
    208     if summary.get("methodology_tag_counts"):
    209         lines.append("## Methodology Types\n")
    210         for tag, count in summary["methodology_tag_counts"].items():
    211             lines.append(f"- {tag}: {count}")
    212         lines.append("")
    213 
    214     if summary.get("mean_score_by_year"):
    215         lines.append("## Mean Score by Year\n")
    216         for year, score in summary["mean_score_by_year"].items():
    217             lines.append(f"- {year}: {score}")
    218         lines.append("")
    219 
    220     return "\n".join(lines)
    221 
    222 
    223 def main():
    224     ANALYSIS_DIR.mkdir(exist_ok=True)
    225 
    226     registry = load_registry()
    227     scans = load_scans()
    228     summary = compute_summary(registry, scans)
    229 
    230     json_path = ANALYSIS_DIR / "summary.json"
    231     md_path = ANALYSIS_DIR / "summary.md"
    232 
    233     with open(json_path, "w") as f:
    234         json.dump(summary, f, indent=2, ensure_ascii=False)
    235     print(f"Wrote {json_path}")
    236 
    237     md = render_markdown(summary)
    238     with open(md_path, "w") as f:
    239         f.write(md)
    240     print(f"Wrote {md_path}")
    241 
    242     # Print a quick overview
    243     print(f"\n{summary['total_scanned']}/{summary['total_registry']} papers scanned")
    244     if summary.get("dimensions"):
    245         print("Mean scores:")
    246         for dim in RUBRIC_DIMENSIONS:
    247             if dim in summary["dimensions"]:
    248                 label = dim.replace("_", " ").title()
    249                 print(f"  {label}: {summary['dimensions'][dim]['mean']}")
    250 
    251 
    252 if __name__ == "__main__":
    253     main()
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs