ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (19417B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "theoretical",
      4   "paper": {
      5     "title": "Energy-Aware Routing to Large Reasoning Models",
      6     "authors": [
      7       "Austin R. Ellis-Mohr",
      8       "Max Hartman",
      9       "Lav R. Varshney"
     10     ],
     11     "year": 2025,
     12     "venue": "arXiv.org",
     13     "arxiv_id": "2601.00823",
     14     "doi": "10.48550/arXiv.2601.00823"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "Claims about critical regime, variance-driven performance, and scaling laws are supported by Theorems 1–2 and the formal analysis in Sections III–IV. The mathematical framework substantiates each abstract assertion.",
     22         "source": "haiku"
     23       },
     24       "causal_claims_justified": {
     25         "applies": true,
     26         "answer": true,
     27         "justification": "Causal claims (e.g., 'routing errors dominate fluctuation costs') are derived from formal theory. Figure 4 demonstrates the predicted regime transition from √T to linear scaling as prediction errors accumulate, validating the causal structure.",
     28         "source": "haiku"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": true,
     33         "justification": "Scope is bounded by Assumption 1 (i.i.d. arrivals, harvests), oracle stopping, unlimited parallelism, and energy as sole constraint. Discussion (Section V) explicitly lists relaxations as future work, not gaps in current applicability.",
     34         "source": "haiku"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": false,
     38         "answer": false,
     39         "justification": "Pure theoretical paper with no empirical claims; criterion does not apply per instructions.",
     40         "source": "haiku"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": true,
     44         "answer": true,
     45         "justification": "Auxiliary energy consumption D_T (Eq. 7) is precisely defined and clearly distinguished from the goal (minimize D_T while meeting task/deadline constraints). No conflation of measured quantity with intended objective.",
     46         "source": "haiku"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": false,
     53         "justification": "Discussion (Section V) lists future extensions but contains no dedicated limitations or threats-to-validity section. Design choices (oracle stopping, i.i.d. assumptions, task independence) are explained as modeling choices, not discussed as limitations.",
     54         "source": "haiku"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": false,
     59         "justification": "The oracle stopping assumption, i.i.d. assumptions, task independence, and deterministic success model are not analyzed for their specific impact on validity. Threats are mentioned in future work rather than evaluated as limitations of the current analysis.",
     60         "source": "haiku"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": false,
     65         "justification": "Model assumptions (unlimited parallelism, energy as only constraint, Poisson arrivals) are stated but framed as modeling choices rather than explicit scope boundaries. What the results do NOT apply to is not clearly delineated.",
     66         "source": "haiku"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No funding source is explicitly mentioned in the paper. Author affiliations (UIUC, Stony Brook) are listed but no grant numbers or funding acknowledgment.",
     74         "source": "haiku"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Author affiliations with UIUC Department of Electrical and Computer Engineering and Stony Brook's AI Innovation Institute are clearly stated with email addresses.",
     80         "source": "haiku"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": false,
     84         "answer": false,
     85         "justification": "No funder identified, so criterion cannot be assessed.",
     86         "source": "haiku"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "No competing interests statement or declaration of financial interests (patents, equity, consulting relationships). Standard disclosure is absent.",
     92         "source": "haiku"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": false,
     99         "justification": "'Large reasoning models' is used throughout but never formally defined; the paper assumes familiarity with [1], [2]. Other key terms (auxiliary energy, critical regime, variance-aware routing) are mathematical but 'LRM' lacks operational definition.",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Explicitly stated: 'introduce a mathematically principled formulation of the energy-aware model routing problem' with first/second-order characterizations and connections to scaling laws. Contribution is unambiguous.",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Introduction engages with energy harvesting (Varaiya et al.), system design (Clover, EcoServe, FrugalGPT), information theory (Yang & Ulukus), and scaling laws (Kaplan, Hoffmann). Prior work is contextualized, not just listed.",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "theoretical": {
    118       "formal_quality": {
    119         "assumptions_stated_explicitly": {
    120           "applies": true,
    121           "answer": true,
    122           "justification": "Assumption 1 (page 4) explicitly specifies Poisson arrivals, energy distributions, task distributions, and initial conditions. Oracle stopping, unlimited parallelism, and binary success model are stated as design choices.",
    123           "source": "haiku"
    124         },
    125         "proofs_complete_or_sketched": {
    126           "applies": true,
    127           "answer": true,
    128           "justification": "Theorem 1 (Appendix A, Eqs. 29–43) is fully proved via case analysis. Theorems 2 and Lemmas 1–3 are complete; Lemma 3 references Borodin & Salminen [29] for standard Brownian motion results. No 'proof omitted' claims.",
    129           "source": "haiku"
    130         },
    131         "bounds_tight_or_discussed": {
    132           "applies": true,
    133           "answer": false,
    134           "justification": "Theorem 2 provides exact closed-form expressions for E[D_T], not bounds. The paper does not discuss whether these are tight or whether proposed routing policies achieve them. Optimality of greedy policy is not proved.",
    135           "source": "haiku"
    136         },
    137         "counterexamples_explored": {
    138           "applies": true,
    139           "answer": false,
    140           "justification": "Numerical simulation (Appendix B, Fig. 4) verifies main predictions but does not explore edge cases, failure modes, or counterexamples to the theory. Testing is confirmatory, not adversarial.",
    141           "source": "haiku"
    142         },
    143         "notation_consistent": {
    144           "applies": true,
    145           "answer": true,
    146           "justification": "Notation (x for tasks, i for models, τ for thinking time, R_t, G_t, B_t, ψ for success, ε for tolerance) is used consistently throughout. No overloading or conflicts detected.",
    147           "source": "haiku"
    148         },
    149         "constructive_vs_existence_noted": {
    150           "applies": true,
    151           "answer": false,
    152           "justification": "Theorem 1 provides constructive greedy policy (Eq. 13) for optimal auxiliary energy, and Section IV integrates scaling laws. However, the paper does not propose or analyze actual routing algorithms, only characterizes optimal behavior abstractly.",
    153           "source": "haiku"
    154         }
    155       },
    156       "connections": {
    157         "connection_to_practice_discussed": {
    158           "applies": true,
    159           "answer": false,
    160           "justification": "Practical motivation (renewable energy, AI data centers) is stated in introduction. Section IV gestures at scaling laws for practitioners. But no empirical validation on real systems, no implementation, no deployment case study; connection remains conceptual.",
    161           "source": "haiku"
    162         },
    163         "relationship_to_prior_work_clear": {
    164           "applies": true,
    165           "answer": true,
    166           "justification": "Paper clearly differentiates from energy-harvesting communications (first/second-order analysis via Brownian motion vs. prior information-theoretic work) and from prior system approaches (jointly considers renewable energy, inference heterogeneity, and deadlines where prior work studied in isolation).",
    167           "source": "haiku"
    168         },
    169         "computational_complexity_discussed": {
    170           "applies": true,
    171           "answer": false,
    172           "justification": "Paper describes myopic dispatcher as 'tractable' and discusses dispatcher overhead informally, but provides no Big-O analysis, NP-hardness results, or formal complexity treatment. Tractability is asserted, not proved.",
    173           "source": "haiku"
    174         },
    175         "limitations_of_formal_model_stated": {
    176           "applies": true,
    177           "answer": false,
    178           "justification": "Oracle stopping assumption, binary success model, unlimited parallelism, and i.i.d. assumptions are explained as design choices but not characterized as limitations. Paper does not explicitly state what the model fails to capture about reality.",
    179           "source": "haiku"
    180         }
    181       }
    182     }
    183   },
    184   "claims": [
    185     {
    186       "claim": "At critical operating point (CLB ≈ R), auxiliary energy deficit scales as σ√(2T/π) with time, governed by Brownian motion fluctuations.",
    187       "evidence": "Theorem 2 (Eq. 28), diffusion approximation analysis (Section III.B), Figure 4 numerical validation.",
    188       "supported": "strong"
    189     },
    190     {
    191       "claim": "System performance exhibits three regimes: linear growth for persistent deficit (CLB > R), bounded deficit for persistent surplus (CLB < R), and √T scaling at criticality (CLB ≈ R).",
    192       "evidence": "Theorem 2 with case analysis µ<0, µ>0, µ=0. Figure 4 confirms regime transitions.",
    193       "supported": "strong"
    194     },
    195     {
    196       "claim": "Routing errors accumulate as first-order drift, eventually dominating second-order fluctuation effects.",
    197       "evidence": "Section III.C analysis of excess energy ΔE, Figure 2 mean-variance tradeoff ratio κ, Figure 4 transition point detection.",
    198       "supported": "strong"
    199     },
    200     {
    201       "claim": "Training-compute and inference-compute scaling laws can guide lightweight energy-aware routing without heavy computation at dispatch time.",
    202       "evidence": "Section IV parametric framework (Eqs. for e_i, T_i) integrated with Hoffmann et al. scaling laws. No empirical deployment validation.",
    203       "supported": "moderate"
    204     },
    205     {
    206       "claim": "The greedy auxiliary energy injection policy (Eq. 13) minimizes cumulative injections, proven via Theorem 1 connection to running minimum of unconstrained battery trajectory.",
    207       "evidence": "Theorem 1 proof (Appendix A). Greedy policy construction is optimal by construction for the stated problem.",
    208       "supported": "strong"
    209     },
    210     {
    211       "claim": "Energy-aware routing requires matching task difficulty and requirements to model capability; misrouting incurs significant excess energy.",
    212       "evidence": "Figure 3 demonstrates energy-latency crossover point where small model becomes infeasible. Figure 4 shows prediction error accumulation.",
    213       "supported": "strong"
    214     }
    215   ],
    216   "methodology_tags": [
    217     "theoretical"
    218   ],
    219   "key_findings": "The paper provides first- and second-order characterizations of energy-aware routing to large reasoning models under renewable energy constraints. The critical operating regime (where renewable energy rate equals expected consumption) exhibits √T deficit scaling governed by Brownian motion fluctuations. Three distinct regimes emerge: persistent deficit yields linear growth in auxiliary energy, persistent surplus yields bounded deficit, and criticality yields √T scaling. Routing accuracy is a first-order effect that eventually dominates second-order fluctuation costs; the interplay between mean drift and variance determines whether dispatch accuracy or robustness is the binding constraint. Integration with empirical scaling laws enables lightweight routing policies without heavy real-time computation.",
    220   "red_flags": [
    221     {
    222       "flag": "No empirical validation",
    223       "detail": "Despite practical motivation (AI data centers, renewable energy), the paper provides no experiments on real systems, no traces from production workloads, and no comparison to baseline routing policies. Validation is limited to numerical simulation of the theoretical model."
    224     },
    225     {
    226       "flag": "Oracle stopping assumption unrealistic",
    227       "detail": "Model assumes router can perfectly halt computation at chosen time τ. Real systems exhibit variability in actual execution time, early-stopping heuristics, and stragglers. This significantly simplifies the control problem."
    228     },
    229     {
    230       "flag": "Task independence assumption oversimplified",
    231       "detail": "Tasks are modeled as independent; in practice, user sessions have request dependencies, priority levels, and chaining. This limits applicability to batch workloads."
    232     },
    233     {
    234       "flag": "No algorithmic contribution",
    235       "detail": "Paper characterizes the optimal routing problem but proposes no concrete routing algorithms beyond myopic dispatcher. Practitioners would need substantial translation work to implement results."
    236     },
    237     {
    238       "flag": "Scaling law integration is loose",
    239       "detail": "Section IV adds parametric energy/latency scaling laws but does not deeply integrate them into the main Brownian motion analysis. Connection feels post-hoc rather than foundational."
    240     },
    241     {
    242       "flag": "Oversimplified success model",
    243       "detail": "Binary success probability ψ_i(θ;τ) with monotonic relationship to compute ignores quality degradation, saturation effects, and the empirical reality that reasoning models exhibit diminishing returns past optimal thinking time."
    244     },
    245     {
    246       "flag": "No comparison to alternative theoretical frameworks",
    247       "detail": "Paper does not justify why Brownian motion diffusion is the right lens; other stochastic models (queueing theory, optimal control, network flow) are not discussed."
    248     }
    249   ],
    250   "cited_papers": [
    251     {
    252       "title": "Multi-step reasoning with large language models, a survey",
    253       "authors": "Plaat et al.",
    254       "year": 2025,
    255       "relevance": "Surveys reasoning model capabilities and recent advances; motivates heterogeneity of inference costs in LRMs."
    256     },
    257     {
    258       "title": "A Theory of Inference Compute Scaling: Reasoning through Directed Stochastic Skill Search",
    259       "authors": "Ellis-Mohr, Nayak, Varshney",
    260       "year": 2026,
    261       "relevance": "Theoretical framework for inference-compute scaling; directly integrated into Section IV to model task success probability and token scaling."
    262     },
    263     {
    264       "title": "Redesigning data centers for renewable energy",
    265       "authors": "Agarwal et al.",
    266       "year": 2021,
    267       "relevance": "Addresses operational challenges of renewable-powered data centers; establishes motivation for energy-aware scheduling."
    268     },
    269     {
    270       "title": "Training compute-optimal large language models",
    271       "authors": "Hoffmann et al.",
    272       "year": 2022,
    273       "relevance": "Empirical scaling laws for training compute; parameterized in Section IV as sigmoid success probability as a function of model size."
    274     },
    275     {
    276       "title": "Clover: Toward sustainable AI with carbon-aware machine learning inference service",
    277       "authors": "Li et al.",
    278       "year": 2023,
    279       "relevance": "Practical system for carbon-aware routing; demonstrates the real-world need for energy-efficient dispatch."
    280     },
    281     {
    282       "title": "EcoServe: Designing carbon-aware AI inference systems",
    283       "authors": "Li et al.",
    284       "year": 2025,
    285       "relevance": "Carbon-aware AI inference system design; provides context for practical deployment of energy-aware routing."
    286     },
    287     {
    288       "title": "Optimal packet scheduling in an energy harvesting communication system",
    289       "authors": "Yang & Ulukus",
    290       "year": 2012,
    291       "relevance": "Foundational work on scheduling under energy harvesting constraints; paper extends these ideas to LRM routing with deadline constraints."
    292     },
    293     {
    294       "title": "Energy harvesting wireless communications: A review of recent advances",
    295       "authors": "Ulukus et al.",
    296       "year": 2015,
    297       "relevance": "Survey of energy-harvesting communication systems; provides theoretical precedents for diffusion-based analysis."
    298     },
    299     {
    300       "title": "FrugalGPT: How to use large language models while reducing cost and improving performance",
    301       "authors": "Chen et al.",
    302       "year": 2024,
    303       "relevance": "Practical model-selection strategies (cascade, prompt adaptation) for cost reduction; complements theoretical routing framework."
    304     }
    305   ],
    306   "engagement_factors": {
    307     "practical_relevance": {
    308       "score": 2,
    309       "justification": "Problem is well-motivated (energy costs in AI data centers, renewable variability) but theoretical results are abstract. No implementation or deployment guidance provided; practitioners would struggle to operationalize the framework."
    310     },
    311     "surprise_contrarian": {
    312       "score": 2,
    313       "justification": "The √T scaling at criticality (vs. linear or constant growth) is moderately surprising. The three-regime characterization provides new structure. However, the Brownian motion analysis is standard technique; the novelty lies in problem formulation rather than analytical method."
    314     },
    315     "fear_safety": {
    316       "score": 0,
    317       "justification": "Paper focuses on energy efficiency and cost minimization. No connection to AI safety, alignment, robustness, or failure modes. No risk or threat narrative engaged."
    318     },
    319     "drama_conflict": {
    320       "score": 1,
    321       "justification": "Could frame as environmental/sustainability concern (reducing AI energy consumption) but the paper does not engage with this narrative. Presentation is dry and technical rather than provocative or timely."
    322     },
    323     "demo_ability": {
    324       "score": 1,
    325       "justification": "Numerical simulation in Appendix B demonstrates √T scaling; could be replicated as a self-contained Python script. However, no code released, no reproducible artifact, and simulations use abstract parameters, not real models or workloads."
    326     },
    327     "brand_recognition": {
    328       "score": 2,
    329       "justification": "Authors affiliated with UIUC and Stony Brook (respectable but not FAANG labs). Varshney is known for information-theoretic approaches. arXiv preprint; venue acceptance status unknown. Not yet high-visibility research."
    330     }
    331   },
    332   "hn_data": {
    333     "threads": [],
    334     "top_points": 0,
    335     "total_points": 0,
    336     "total_comments": 0
    337   }
    338 }

Impressum · Datenschutz