paper_type.json (291B)
1 { 2 "paper_type": "empirical", 3 "reason": "Measures AI model performance on long software tasks, reports quantitative results on task completion times and trends (50% horizon doubling ~every 7 months), and analyzes capability improvements across multiple models and task characteristics." 4 }