build-summary.py (8388B)
1 #!/usr/bin/env python3 2 """ 3 Aggregate all scan.json results into a summary artifact. 4 5 Produces analysis/summary.json (machine-readable) and analysis/summary.md 6 (human-readable). These are static artifacts for working on the narrative 7 sections of the paper before the LaTeX build runs. 8 9 Usage: 10 python scripts/build-summary.py 11 """ 12 13 import json 14 from collections import Counter, defaultdict 15 from pathlib import Path 16 17 ROOT = Path(__file__).resolve().parent.parent 18 REGISTRY_PATH = ROOT / "registry.jsonl" 19 PAPERS_DIR = ROOT / "papers" 20 ANALYSIS_DIR = ROOT / "analysis" 21 22 RUBRIC_DIMENSIONS = [ 23 "artifacts_reproducibility", 24 "statistical_rigor", 25 "benchmark_quality", 26 "claim_to_evidence", 27 "setup_transparency", 28 "limitations_discussion", 29 ] 30 31 SCORE_LABELS = {0: "absent", 1: "weak", 2: "adequate", 3: "strong"} 32 33 34 def load_registry(): 35 entries = {} 36 with open(REGISTRY_PATH) as f: 37 for line in f: 38 line = line.strip() 39 if line: 40 entry = json.loads(line) 41 entries[entry["id"]] = entry 42 return entries 43 44 45 def load_scans(): 46 scans = {} 47 for scan_path in PAPERS_DIR.glob("*/scan.json"): 48 paper_id = scan_path.parent.name 49 with open(scan_path) as f: 50 scans[paper_id] = json.load(f) 51 return scans 52 53 54 def compute_summary(registry, scans): 55 summary = { 56 "total_registry": len(registry), 57 "total_scanned": len(scans), 58 "status_counts": Counter(e["status"] for e in registry.values()), 59 } 60 61 if not scans: 62 summary["note"] = "No scans completed yet." 63 return summary 64 65 # Score distributions per dimension 66 dimension_scores = {d: [] for d in RUBRIC_DIMENSIONS} 67 for scan in scans.values(): 68 rubric = scan.get("rubric", {}) 69 for dim in RUBRIC_DIMENSIONS: 70 if dim in rubric: 71 dimension_scores[dim].append(rubric[dim]["score"]) 72 73 summary["dimensions"] = {} 74 for dim, scores in dimension_scores.items(): 75 if not scores: 76 continue 77 dist = Counter(scores) 78 summary["dimensions"][dim] = { 79 "mean": round(sum(scores) / len(scores), 2), 80 "distribution": {SCORE_LABELS[k]: dist.get(k, 0) for k in range(4)}, 81 "n": len(scores), 82 } 83 84 # Overall scores per paper 85 paper_scores = {} 86 for paper_id, scan in scans.items(): 87 rubric = scan.get("rubric", {}) 88 scores = [rubric[d]["score"] for d in RUBRIC_DIMENSIONS if d in rubric] 89 if scores: 90 paper_scores[paper_id] = { 91 "total": sum(scores), 92 "mean": round(sum(scores) / len(scores), 2), 93 "title": scan.get("paper", {}).get("title", registry.get(paper_id, {}).get("title", "?")), 94 } 95 96 ranked = sorted(paper_scores.items(), key=lambda x: x[1]["total"]) 97 summary["bottom_10"] = [ 98 {"id": pid, "title": ps["title"], "total": ps["total"], "mean": ps["mean"]} 99 for pid, ps in ranked[:10] 100 ] 101 summary["top_10"] = [ 102 {"id": pid, "title": ps["title"], "total": ps["total"], "mean": ps["mean"]} 103 for pid, ps in ranked[-10:] 104 ][::-1] 105 106 # Red flags 107 all_flags = [] 108 for paper_id, scan in scans.items(): 109 for rf in scan.get("red_flags", []): 110 all_flags.append(rf["flag"]) 111 summary["red_flag_counts"] = dict(Counter(all_flags).most_common(20)) 112 113 # Methodology tag distribution 114 all_method_tags = [] 115 for scan in scans.values(): 116 all_method_tags.extend(scan.get("methodology_tags", [])) 117 summary["methodology_tag_counts"] = dict(Counter(all_method_tags).most_common()) 118 119 # Topic tag distribution (from registry) 120 all_topic_tags = [] 121 for paper_id in scans: 122 if paper_id in registry: 123 all_topic_tags.extend(registry[paper_id].get("tags", [])) 124 summary["topic_tag_counts"] = dict(Counter(all_topic_tags).most_common()) 125 126 # Claims support breakdown 127 support_counts = Counter() 128 for scan in scans.values(): 129 for claim in scan.get("claims", []): 130 support_counts[claim.get("supported", "unknown")] += 1 131 summary["claim_support"] = dict(support_counts) 132 133 # Year breakdown of scores 134 year_scores = defaultdict(list) 135 for paper_id, ps in paper_scores.items(): 136 year = registry.get(paper_id, {}).get("year") or scans[paper_id].get("paper", {}).get("year") 137 if year: 138 year_scores[year].append(ps["mean"]) 139 summary["mean_score_by_year"] = { 140 str(y): round(sum(s) / len(s), 2) for y, s in sorted(year_scores.items()) 141 } 142 143 # Citation-chased papers count 144 total_cited = 0 145 for scan in scans.values(): 146 total_cited += len(scan.get("cited_papers", [])) 147 summary["total_cited_papers_extracted"] = total_cited 148 149 return summary 150 151 152 def render_markdown(summary): 153 lines = ["# Survey Summary\n"] 154 lines.append(f"Registry: {summary['total_registry']} papers") 155 lines.append(f"Scanned: {summary['total_scanned']} papers\n") 156 157 if summary.get("note"): 158 lines.append(f"*{summary['note']}*\n") 159 return "\n".join(lines) 160 161 status = summary.get("status_counts", {}) 162 lines.append("## Pipeline Status\n") 163 for s, c in sorted(status.items()): 164 lines.append(f"- {s}: {c}") 165 lines.append("") 166 167 dims = summary.get("dimensions", {}) 168 if dims: 169 lines.append("## Rubric Score Distributions\n") 170 lines.append("| Dimension | Mean | Absent | Weak | Adequate | Strong | N |") 171 lines.append("|-----------|------|--------|------|----------|--------|---|") 172 for dim in RUBRIC_DIMENSIONS: 173 if dim not in dims: 174 continue 175 d = dims[dim] 176 dist = d["distribution"] 177 label = dim.replace("_", " ").title() 178 lines.append( 179 f"| {label} | {d['mean']} | {dist['absent']} | " 180 f"{dist['weak']} | {dist['adequate']} | {dist['strong']} | {d['n']} |" 181 ) 182 lines.append("") 183 184 if summary.get("bottom_10"): 185 lines.append("## Bottom 10 (Lowest Total Scores)\n") 186 for p in summary["bottom_10"]: 187 lines.append(f"1. **{p['title']}** ({p['id']}) — {p['total']}/18, mean {p['mean']}") 188 lines.append("") 189 190 if summary.get("top_10"): 191 lines.append("## Top 10 (Highest Total Scores)\n") 192 for p in summary["top_10"]: 193 lines.append(f"1. **{p['title']}** ({p['id']}) — {p['total']}/18, mean {p['mean']}") 194 lines.append("") 195 196 if summary.get("red_flag_counts"): 197 lines.append("## Most Common Red Flags\n") 198 for flag, count in summary["red_flag_counts"].items(): 199 lines.append(f"- {flag}: {count}") 200 lines.append("") 201 202 if summary.get("claim_support"): 203 lines.append("## Claim Support Breakdown\n") 204 for level, count in summary["claim_support"].items(): 205 lines.append(f"- {level}: {count}") 206 lines.append("") 207 208 if summary.get("methodology_tag_counts"): 209 lines.append("## Methodology Types\n") 210 for tag, count in summary["methodology_tag_counts"].items(): 211 lines.append(f"- {tag}: {count}") 212 lines.append("") 213 214 if summary.get("mean_score_by_year"): 215 lines.append("## Mean Score by Year\n") 216 for year, score in summary["mean_score_by_year"].items(): 217 lines.append(f"- {year}: {score}") 218 lines.append("") 219 220 return "\n".join(lines) 221 222 223 def main(): 224 ANALYSIS_DIR.mkdir(exist_ok=True) 225 226 registry = load_registry() 227 scans = load_scans() 228 summary = compute_summary(registry, scans) 229 230 json_path = ANALYSIS_DIR / "summary.json" 231 md_path = ANALYSIS_DIR / "summary.md" 232 233 with open(json_path, "w") as f: 234 json.dump(summary, f, indent=2, ensure_ascii=False) 235 print(f"Wrote {json_path}") 236 237 md = render_markdown(summary) 238 with open(md_path, "w") as f: 239 f.write(md) 240 print(f"Wrote {md_path}") 241 242 # Print a quick overview 243 print(f"\n{summary['total_scanned']}/{summary['total_registry']} papers scanned") 244 if summary.get("dimensions"): 245 print("Mean scores:") 246 for dim in RUBRIC_DIMENSIONS: 247 if dim in summary["dimensions"]: 248 label = dim.replace("_", " ").title() 249 print(f" {label}: {summary['dimensions'][dim]['mean']}") 250 251 252 if __name__ == "__main__": 253 main()