citation-graph.json (349556B)
1 { 2 "node_count": 572, 3 "edge_count": 2030, 4 "nodes": [ 5 { 6 "id": "2025-ai-agent-2026", 7 "title": "The 2025 AI Agent Index" 8 }, 9 { 10 "id": "3dshape2vecset-3d-shape-2023", 11 "title": "3DShape2VecSet: A 3D Shape Representation for Neural Fields and Generative Diffusion Models" 12 }, 13 { 14 "id": "a2hcoder-llmdriven-coding-2025", 15 "title": "A2H-MAS: An Algorithm-to-HLS Multi-Agent System for Automated and Reliable FPGA Implementation" 16 }, 17 { 18 "id": "aart-aiassisted-redteaming-2023", 19 "title": "AART: AI-Assisted Red-Teaming with Diverse Data Generation for New LLM-powered Applications" 20 }, 21 { 22 "id": "acar-adaptive-complexity-2026", 23 "title": "ACAR: Adaptive Complexity Routing for Multi-Model Ensembles with Auditable Decision Traces" 24 }, 25 { 26 "id": "accelerating-automatic-program-2025", 27 "title": "Accelerating Automatic Program Repair with Dual Retrieval-Augmented Fine-Tuning and Patch Generation on Large Language Models" 28 }, 29 { 30 "id": "accelerating-large-language-2023", 31 "title": "Accelerating Large Language Model Decoding with Speculative Sampling" 32 }, 33 { 34 "id": "across-programming-language-2025", 35 "title": "Across Programming Language Silos: A Study on Cross-Lingual Retrieval-augmented Code Generation" 36 }, 37 { 38 "id": "adafuse-adaptive-ensemble-2026", 39 "title": "AdaFuse: Adaptive Ensemble Decoding with Test-Time Scaling for LLMs" 40 }, 41 { 42 "id": "adaplanner-adaptive-planning-2023", 43 "title": "AdaPlanner: Adaptive Planning from Feedback with Language Models" 44 }, 45 { 46 "id": "adaptevolve-improving-efficiency-2026", 47 "title": "AdaptEvolve: Improving Efficiency of Evolutionary AI Agents through Adaptive Model Selection" 48 }, 49 { 50 "id": "adapting-knowledge-prompt-2025", 51 "title": "Adapting Knowledge Prompt Tuning for Enhanced Automated Program Repair" 52 }, 53 { 54 "id": "adaptive-attacks-break-2025", 55 "title": "Adaptive Attacks Break Defenses Against Indirect Prompt Injection Attacks on LLM Agents" 56 }, 57 { 58 "id": "adaptive-attacks-bypass-defenses-2025", 59 "title": "The Attacker Moves Second: Stronger Adaptive Attacks Bypass Defenses Against LLM Jailbreaks and Prompt Injections" 60 }, 61 { 62 "id": "adaptive-data-augmentation-2026", 63 "title": "Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition" 64 }, 65 { 66 "id": "adaptive-selftriggered-control-2023", 67 "title": "Adaptive Self-Triggered Control for Multi-Agent Systems with Actuator Failures and Time-Varying State Constraints" 68 }, 69 { 70 "id": "adaptive-test-generation-2023", 71 "title": "An Empirical Evaluation of Using Large Language Models for Automated Unit Test Generation" 72 }, 73 { 74 "id": "adaptive-visionbased-coverage-2025", 75 "title": "Adaptive Vision-Based Coverage Optimization in Mobile Wireless Sensor Networks: A Multi-Agent Deep Reinforcement Learning Approach" 76 }, 77 { 78 "id": "adaptrack-constrained-decoding-2025", 79 "title": "AdapTrack: Constrained Decoding without Distorting LLM's Output Intent" 80 }, 81 { 82 "id": "adas-automated-design-2024", 83 "title": "Automated Design of Agentic Systems" 84 }, 85 { 86 "id": "adoption-generative-artificial-2026", 87 "title": "Adoption of Generative Artificial Intelligence in the German Software Engineering Industry: An Empirical Study" 88 }, 89 { 90 "id": "advancements-generative-ai-2023", 91 "title": "Advancements in Generative AI: A Comprehensive Review of GANs, GPT, Autoencoders, Diffusion Model, and Transformers" 92 }, 93 { 94 "id": "advancements-software-engineering-2024", 95 "title": "Advancements in software engineering using AI" 96 }, 97 { 98 "id": "advancing-code-generation-2025", 99 "title": "Towards Advancing Code Generation with Large Language Models: A Research Roadmap" 100 }, 101 { 102 "id": "advancing-engineering-research-2025", 103 "title": "Advancing engineering research through context-aware and knowledge graph–based retrieval-augmented generation" 104 }, 105 { 106 "id": "advancing-language-model-2025", 107 "title": "T1: Advancing Language Model Reasoning through Reinforcement Learning and Inference Scaling" 108 }, 109 { 110 "id": "advancing-largemolecule-discovery-2025", 111 "title": "Advancing large-molecule discovery with a unified digital platform for data analysis and workflow management" 112 }, 113 { 114 "id": "advancing-llm-safe-2025", 115 "title": "Advancing LLM Safe Alignment with Safety Representation Ranking" 116 }, 117 { 118 "id": "advancing-methodological-development-2025", 119 "title": "Advancing methodological development of artificial intelligence in patient-centered comparative clinical effectiveness research: Patient-Centered Outcomes Research Institute's unique contribution to research done differently" 120 }, 121 { 122 "id": "advancing-nursing-regulation-2025", 123 "title": "Advancing nursing regulation in the digital era: Harnessing AI to bridge workforce gaps and strengthen practice competency and safety" 124 }, 125 { 126 "id": "advancing-software-quality-2025", 127 "title": "Advancing Software Quality: A Standards-Focused Review of LLM-Based Assurance Techniques" 128 }, 129 { 130 "id": "adversarial-bug-reports-2025", 131 "title": "Adversarial Bug Reports as a Security Risk in Language Model-Based Automated Program Repair" 132 }, 133 { 134 "id": "adversarial-threat-vectors-2025", 135 "title": "Adversarial Threat Vectors and Risk Mitigation for Retrieval-Augmented Generation Systems" 136 }, 137 { 138 "id": "advevomarl-shaping-internalized-2025", 139 "title": "AdvEvo-MARL: Shaping Internalized Safety Through Adversarial Co-Evolution in Multi-Agent Reinforcement Learning" 140 }, 141 { 142 "id": "aegaeon-effective-gpu-2025", 143 "title": "Aegaeon: Effective GPU Pooling for Concurrent LLM Serving on the Market" 144 }, 145 { 146 "id": "aegis-automated-coevolutionary-2025", 147 "title": "AEGIS: Automated Co-Evolutionary Framework for Guarding Prompt Injection" 148 }, 149 { 150 "id": "aegis20-diverse-ai-2025", 151 "title": "AEGIS2.0: A Diverse AI Safety Dataset and Risks Taxonomy for Alignment of LLM Guardrails" 152 }, 153 { 154 "id": "aegisagent-autonomous-defense-2025", 155 "title": "AegisAgent: An Autonomous Defense Agent Against Prompt Injection Attacks in LLM-HARs" 156 }, 157 { 158 "id": "african-woman-rhythmic-2024", 159 "title": "The African Woman is Rhythmic and Soulful: An Investigation of Implicit Biases in LLM Open-ended Text Generation" 160 }, 161 { 162 "id": "agent-contracts-formal-2026", 163 "title": "Agent Contracts: A Formal Framework for Resource-Bounded Autonomous AI Systems" 164 }, 165 { 166 "id": "agent-developer-practices-2025", 167 "title": "An Empirical Study of Agent Developer Practices in AI Agent Frameworks" 168 }, 169 { 170 "id": "agent-error-taxonomy-2025", 171 "title": "Where LLM Agents Fail and How They Can Learn from Failures" 172 }, 173 { 174 "id": "agent-security-bench-2024", 175 "title": "Agent Security Bench (ASB): Formalizing and Benchmarking Attacks and Defenses in LLM-Based Agents" 176 }, 177 { 178 "id": "agentasajudge-evaluate-agents-2024", 179 "title": "Agent-as-a-Judge: Evaluate Agents with Agents" 180 }, 181 { 182 "id": "agentask-multiagent-systems-2025", 183 "title": "AGENTASK: Multi-Agent Systems Need to Ask" 184 }, 185 { 186 "id": "agentbased-evaluation-framework-2025", 187 "title": "An Agent-based Evaluation Framework for Complex Code Generation" 188 }, 189 { 190 "id": "agentbench-evaluating-llms-2023", 191 "title": "AgentBench: Evaluating LLMs as Agents" 192 }, 193 { 194 "id": "agentdojo-dynamic-environment-2024", 195 "title": "AgentDojo: A Dynamic Environment to Evaluate Prompt Injection Attacks and Defenses for LLM Agents" 196 }, 197 { 198 "id": "agentfm-roleaware-failure-2025", 199 "title": "AgentFM: Role-Aware Failure Management for Distributed Databases with LLM-Driven Multi-Agents" 200 }, 201 { 202 "id": "agentfuzzer-generic-blackbox-2025", 203 "title": "AGENTVIGIL: Generic Black-Box Red-teaming for Indirect Prompt Injection against LLM Agents" 204 }, 205 { 206 "id": "agentic-adoption-github-2026", 207 "title": "Agentic Much? Adoption of Coding Agents on GitHub" 208 }, 209 { 210 "id": "agentic-ai-architectures-2026", 211 "title": "Agentic Artificial Intelligence (AI): Architectures, Taxonomies, and Evaluation of Large Language Model Agents" 212 }, 213 { 214 "id": "agentic-ai-assessment-framework-2025", 215 "title": "Beyond Task Completion: An Assessment Framework for Evaluating Agentic AI Systems" 216 }, 217 { 218 "id": "agentic-ai-modernization-2026", 219 "title": "Agentic AI Modernization: Transforming Institutional Infrastructure Through Orchestrated Multi-Agent LLM Framework" 220 }, 221 { 222 "id": "agentic-ai-security-survey-2025", 223 "title": "Agentic AI Security: Threats, Defenses, Evaluation, and Open Challenges" 224 }, 225 { 226 "id": "agentic-ai-software-2025", 227 "title": "Agentic AI for Software: thoughts from Software Engineering community" 228 }, 229 { 230 "id": "agentic-ai-software-2025-2", 231 "title": "Agentic AI Software Engineers: Programming with Trust" 232 }, 233 { 234 "id": "agentic-bug-reproduction-2025", 235 "title": "Agentic Bug Reproduction for Effective Automated Program Repair at Google" 236 }, 237 { 238 "id": "agentic-memory-learning-2026", 239 "title": "Agentic Memory: Learning Unified Long-Term and Short-Term Memory Management for Large Language Model Agents" 240 }, 241 { 242 "id": "agentic-programming-survey-2025", 243 "title": "AI Agentic Programming: A Survey of Techniques, Challenges, and Opportunities" 244 }, 245 { 246 "id": "agentic-refactoring-empirical-2025", 247 "title": "Agentic Refactoring: An Empirical Study of AI Coding Agents" 248 }, 249 { 250 "id": "agentic-software-engineering-2025", 251 "title": "Toward Agentic Software Engineering Beyond Code: Framing Vision, Values, and Vocabulary" 252 }, 253 { 254 "id": "agentless-2024", 255 "title": "AGENTLESS: Demystifying LLM-based Software Engineering Agents" 256 }, 257 { 258 "id": "agentmesh-cooperative-multiagent-2025", 259 "title": "AgentMesh: A Cooperative Multi-Agent Generative AI Framework for Software Development Automation" 260 }, 261 { 262 "id": "agents-of-chaos-2026", 263 "title": "Agents of Chaos" 264 }, 265 { 266 "id": "agents4plc-automating-closedloop-2024", 267 "title": "Agents4PLC: Automating Closed-loop PLC Code Generation and Verification in Industrial Control Systems using LLM-based Agents" 268 }, 269 { 270 "id": "agentsllm-augmentative-generation-2025", 271 "title": "AGENTS-LLM: Augmentative GENeration of Challenging Traffic Scenarios with an Agentic LLM Framework" 272 }, 273 { 274 "id": "agentsnet-coordination-collaborative-2025", 275 "title": "AGENTSNET: Coordination and Collaborative Reasoning in Multi-Agent LLMs" 276 }, 277 { 278 "id": "agentspawn-adaptive-multiagent-2026", 279 "title": "AgentSpawn: Adaptive Multi-Agent Collaboration Through Dynamic Spawning for Long-Horizon Code Generation" 280 }, 281 { 282 "id": "agenttypo-adaptive-typographic-2025", 283 "title": "AgentTypo: Adaptive Typographic Prompt Injection Attacks against Black-box Multimodal Agents" 284 }, 285 { 286 "id": "agentvigil-generic-blackbox-2025", 287 "title": "AGENTVIGIL: Generic Black-Box Red-teaming for Indirect Prompt Injection against LLM Agents" 288 }, 289 { 290 "id": "agint-agentic-graph-2025", 291 "title": "Agint: Agentic Graph Compilation for Software Engineering Agents" 292 }, 293 { 294 "id": "ai-agents-software-2025", 295 "title": "AI Agents in Software Engineering Optimizing Software Development Processes and Enhancing Security Management in Learning Management Systems" 296 }, 297 { 298 "id": "ai-alignment-contemporary-2025", 299 "title": "AI Alignment: A Comprehensive Survey" 300 }, 301 { 302 "id": "ai-alignment-strategies-2025", 303 "title": "AI Alignment Strategies from a Risk Perspective: Independent Safety Mechanisms or Shared Failures?" 304 }, 305 { 306 "id": "ai-as-cognitive-2025", 307 "title": "AI as Cognitive Amplifier: Rethinking Human Judgment in the Age of Generative AI" 308 }, 309 { 310 "id": "ai-assistance-legal-2023", 311 "title": "AI Assistance in Legal Analysis: An Empirical Study" 312 }, 313 { 314 "id": "ai-code-generators-2024", 315 "title": "AI Code Generators for Security: Friend or Foe?" 316 }, 317 { 318 "id": "ai-code-maintainability-registered-report-2024", 319 "title": "Does Co-Development with AI Assistants Lead to More Maintainable Code? A Registered Report" 320 }, 321 { 322 "id": "ai-code-not-reproducible-2025", 323 "title": "AI-Generated Code Is Not Reproducible (Yet): An Empirical Study of Dependency Gaps in LLM-Based Coding Agents" 324 }, 325 { 326 "id": "ai-code-review-2025", 327 "title": "AI Code Review Assistant: A Modern Web Based Solution for Automated Code Analysis and Developer Productivity Enhancement" 328 }, 329 { 330 "id": "ai-code-survival-open-source-2026", 331 "title": "Will It Survive? Deciphering the Fate of AI-Generated Code in Open Source" 332 }, 333 { 334 "id": "ai-code-wild-2025", 335 "title": "AI Code in the Wild: Measuring Security Risks and Ecosystem Shifts of AI-Generated Code in Modern Software" 336 }, 337 { 338 "id": "ai-ides-vs-agents-impact-2026", 339 "title": "AI IDEs or Autonomous Agents? Measuring the Impact of Coding Agents on Software Development" 340 }, 341 { 342 "id": "ai-inference-falling-costs-2025", 343 "title": "The Price of Progress: Algorithmic Efficiency and the Falling Cost of AI Inference" 344 }, 345 { 346 "id": "ai-nonmem-coding-2025", 347 "title": "AI for NONMEM Coding in Pharmacometrics Research and Education: Shortcut or Pitfall?" 348 }, 349 { 350 "id": "ai-productivity-index-2025", 351 "title": "The AI Productivity Index: APEX-v1-extended" 352 }, 353 { 354 "id": "ai-prs-code-quality-reuse-2026", 355 "title": "More Code, Less Reuse: Investigating Code Quality and Reviewer Sentiment towards AI-generated Pull Requests" 356 }, 357 { 358 "id": "ai-safety-subproblems-2023", 359 "title": "AI Safety Subproblems for Software Engineering Researchers" 360 }, 361 { 362 "id": "ai-scientist-fully-2024", 363 "title": "The AI Scientist: Towards Fully Automated Open-Ended Scientific Discovery" 364 }, 365 { 366 "id": "ai-scientistv2-workshoplevel-2025", 367 "title": "The AI Scientist-v2: Workshop-Level Automated Scientific Discovery via Agentic Tree Search" 368 }, 369 { 370 "id": "ai-software-engineering-2023", 371 "title": "AI in Software Engineering: A Survey on Project Management Applications" 372 }, 373 { 374 "id": "ai-software-engineering-2023-2", 375 "title": "AI in Software Engineering: Case Studies and Prospects" 376 }, 377 { 378 "id": "ai-software-engineering-2025", 379 "title": "AI in Software Engineering: Perceived Roles and Their Impact on Adoption" 380 }, 381 { 382 "id": "ai-testing-should-2025", 383 "title": "AI Testing Should Account for Sophisticated Strategic Behaviour" 384 }, 385 { 386 "id": "aiassisted-assessment-coding-2024", 387 "title": "AI-Assisted Assessment of Coding Practices in Modern Code Review" 388 }, 389 { 390 "id": "aiassisted-code-editors-2025", 391 "title": "AI-Assisted Code Editors with Real-Time Collaboration: A Comprehensive Review" 392 }, 393 { 394 "id": "aiassisted-fixes-code-2025", 395 "title": "AI-Assisted Fixes to Code Review Comments at Scale" 396 }, 397 { 398 "id": "aiassisted-programming-decreases-2025", 399 "title": "AI-Assisted Programming Decreases the Productivity of Experienced Developers by Increasing the Technical Debt and Maintenance Burden" 400 }, 401 { 402 "id": "aiaugmented-devops-paradigm-2023", 403 "title": "AI-Augmented DevOps: A Paradigm Shift in Scalable Software Engineering and IT Operations" 404 }, 405 { 406 "id": "aiaugmented-software-engineering-2025", 407 "title": "Towards AI-Augmented Software Engineering: A Theoretical Framework" 408 }, 409 { 410 "id": "aidriven-scholarly-peer-2025", 411 "title": "AI-Driven Scholarly Peer Review via Persistent Workflow Prompting, Meta-Prompting, and Meta-Reasoning" 412 }, 413 { 414 "id": "aidriven-software-engineering-2023", 415 "title": "AI-driven software engineering" 416 }, 417 { 418 "id": "aidriven-software-engineering-2024", 419 "title": "AI-Driven Software Engineering – The Role of Conceptual Modeling" 420 }, 421 { 422 "id": "aiguided-modeldriven-embedded-2022", 423 "title": "AI-guided Model-Driven Embedded Software Engineering" 424 }, 425 { 426 "id": "aiintegrated-software-engineering-2025", 427 "title": "AI-Integrated Software Engineering: Developing Systems that Evolve with Learning Capabilities" 428 }, 429 { 430 "id": "aime-ai-system-2024", 431 "title": "AIME: AI System Optimization via Multiple LLM Evaluators" 432 }, 433 { 434 "id": "ainative-software-engineering-2024", 435 "title": "Towards AI-Native Software Engineering (SE 3.0): A Vision and a Challenge Roadmap" 436 }, 437 { 438 "id": "ainstein-assessing-feasibility-2025", 439 "title": "AInstein: Assessing the Feasibility of AI-Generated Approaches to Research Problems" 440 }, 441 { 442 "id": "aipowered-code-review-2023", 443 "title": "AI-Powered Code Review Enhancing Software Quality with Intelligent Agents" 444 }, 445 { 446 "id": "aipowered-code-review-2024", 447 "title": "AI-powered Code Review with LLMs: Early Results" 448 }, 449 { 450 "id": "aipowered-peer-review-2023", 451 "title": "AI-powered peer review process: An approach to enhance computer science students' engagement with code review in industry-based subjects" 452 }, 453 { 454 "id": "aipowered-software-development-2025-2", 455 "title": "AI-Powered Software Development Life Cycle: From Requirements to Maintenance" 456 }, 457 { 458 "id": "aipowered-solutions-computer-2025", 459 "title": "AI-Powered Solutions in Computer Science: A Comprehensive COPRAS Evaluation" 460 }, 461 { 462 "id": "ais-environmental-cost-2025", 463 "title": "AI's Environmental Cost: Comparing Resource Consumption Between SLMs and LLMs Across Queries" 464 }, 465 { 466 "id": "aitutoring-software-engineering-2024", 467 "title": "AI-Tutoring in Software Engineering Education: Experiences with Large Language Models in Programming Assessments" 468 }, 469 { 470 "id": "aixamine-simplified-llm-2025", 471 "title": "aiXamine: Simplified LLM Safety and Security" 472 }, 473 { 474 "id": "aladdin-joint-placement-2024", 475 "title": "Aladdin: Joint Placement and Scaling for SLO-Aware LLM Serving" 476 }, 477 { 478 "id": "aligned-query-expansion-2025", 479 "title": "Aligned Query Expansion: Efficient Query Expansion for Information Retrieval through LLM Alignment" 480 }, 481 { 482 "id": "aligning-objective-llmbased-2024", 483 "title": "Aligning the Objective of LLM-based Program Repair" 484 }, 485 { 486 "id": "alignment-faking-2024", 487 "title": "Alignment Faking in Large Language Models" 488 }, 489 { 490 "id": "alignment-safety-llm-survey-2025", 491 "title": "Alignment and Safety in Large Language Models: Safety Mechanisms, Training Paradigms, and Emerging Challenges" 492 }, 493 { 494 "id": "alignpro-principled-approach-2025", 495 "title": "Align-Pro: A Principled Approach to Prompt Optimization for LLM Alignment" 496 }, 497 { 498 "id": "alleviating-fear-losing-2025", 499 "title": "Alleviating the Fear of Losing Alignment in LLM Fine-tuning" 500 }, 501 { 502 "id": "alphacode-competition-level-2022", 503 "title": "Competition-Level Code Generation with AlphaCode" 504 }, 505 { 506 "id": "alphapo-reward-shape-2025", 507 "title": "AlphaPO: Reward Shape Matters for LLM Alignment" 508 }, 509 { 510 "id": "ambigswe-interactive-agents-2025", 511 "title": "Ambig-SWE: Interactive Agents to Overcome Underspecificity in Software Engineering" 512 }, 513 { 514 "id": "among-us-measuring-2026", 515 "title": "Among Us: Measuring and Mitigating Malicious Contributions in Model Collaboration Systems" 516 }, 517 { 518 "id": "among-us-sandbox-2025", 519 "title": "Among Us: A Sandbox for Measuring and Detecting Agentic Deception" 520 }, 521 { 522 "id": "analysis-evaluation-synthetic-2025", 523 "title": "Analysis and Evaluation of Synthetic Data Generation in Speech Dysfluency Detection" 524 }, 525 { 526 "id": "analysis-research-status-2025", 527 "title": "Analysis of Research Status in the Field of Automated Program Repair" 528 }, 529 { 530 "id": "analysis-studentllm-interaction-2025", 531 "title": "Analysis of Student-LLM Interaction in a Software Engineering Project" 532 }, 533 { 534 "id": "anatomy-capability-emergence-2026", 535 "title": "Anatomy of Capability Emergence: Scale-Invariant Representation Collapse and Top-Down Reorganization in Neural Networks" 536 }, 537 { 538 "id": "ancoder-anchored-code-2026", 539 "title": "AnCoder: Anchored Code Generation via Discrete Diffusion Models" 540 }, 541 { 542 "id": "animagents-coordinating-multistage-2025", 543 "title": "AnimAgents: Coordinating Multi-Stage Animation Pre-Production with Human–Multi-Agent Collaboration" 544 }, 545 { 546 "id": "annotation-alignment-comparing-2024", 547 "title": "Annotation alignment: Comparing LLM and human annotations of conversational safety" 548 }, 549 { 550 "id": "antiregulatory-ai-how-2025", 551 "title": "Anti-Regulatory AI: How \"AI Safety\" is Leveraged Against Regulatory Oversight" 552 }, 553 { 554 "id": "appatch-automated-adaptive-2024", 555 "title": "APPATCH: Automated Adaptive Prompting Large Language Models for Real-World Software Vulnerability Patching" 556 }, 557 { 558 "id": "applying-rlaif-code-2024", 559 "title": "Applying RLAIF for Code Generation with API-usage in Lightweight LLMs" 560 }, 561 { 562 "id": "appworld-controllable-world-2024", 563 "title": "AppWorld: A Controllable World of Apps and People for Benchmarking Interactive Coding Agents" 564 }, 565 { 566 "id": "apr-llm-survey-2025", 567 "title": "A Survey of LLM-based Software Repair: Taxonomies, Design Paradigms, and Applications" 568 }, 569 { 570 "id": "april-api-synthesis-2025", 571 "title": "APRIL: API Synthesis with Automatic Prompt Optimization and Reinforcement Learning" 572 }, 573 { 574 "id": "aptserve-adaptive-request-2025", 575 "title": "Apt-Serve: Adaptive Request Scheduling on Hybrid Cache for Scalable LLM Inference Serving" 576 }, 577 { 578 "id": "aquallm-evaluating-accuracy-2025", 579 "title": "AQUA-LLM: Evaluating Accuracy, Quantization, and Adversarial Robustness Trade-offs in LLMs for Cybersecurity Question Answering" 580 }, 581 { 582 "id": "arc-measure-intelligence-2019", 583 "title": "On the Measure of Intelligence" 584 }, 585 { 586 "id": "arcmemo-abstract-reasoning-2025", 587 "title": "ArcMemo: Abstract Reasoning Composition with Lifelong LLM Memory" 588 }, 589 { 590 "id": "arcs-agentic-retrievalaugmented-2025", 591 "title": "ARCS: Agentic Retrieval-Augmented Code Synthesis with Iterative Refinement" 592 }, 593 { 594 "id": "arena-hard-auto-2024", 595 "title": "From Crowdsourced Data to High-Quality Benchmarks: Arena-Hard and BenchBuilder Pipeline" 596 }, 597 { 598 "id": "ares-automated-evaluation-2023", 599 "title": "ARES: An Automated Evaluation Framework for Retrieval-Augmented Generation Systems" 600 }, 601 { 602 "id": "argus-defending-against-2025", 603 "title": "ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior" 604 }, 605 { 606 "id": "arks-active-retrieval-2024", 607 "title": "EVOR: Evolving Retrieval for Code Generation" 608 }, 609 { 610 "id": "art-adaptive-response-2025", 611 "title": "ART: Adaptive Response Tuning Framework — A Multi-Agent Tournament-Based Approach to LLM Response Optimization" 612 }, 613 { 614 "id": "art-repair-optimizing-2025", 615 "title": "The Art of Repair: Optimizing Iterative Program Repair with Instruction-Tuned Models" 616 }, 617 { 618 "id": "art-scaling-test-time-compute-2025", 619 "title": "The Art of Scaling Test-Time Compute for Large Language Models" 620 }, 621 { 622 "id": "artifactsbench-bridging-visualinteractive-2025", 623 "title": "ArtifactsBench: Bridging the Visual-Interactive Gap in LLM Code Generation Evaluation" 624 }, 625 { 626 "id": "artificial-brain-neuroscience-2026", 627 "title": "The Artificial Brain: A Neuroscience Inspired Architecture for Multimodal AI Systems" 628 }, 629 { 630 "id": "artificial-human-intelligence-2025", 631 "title": "Artificial or Human Intelligence?" 632 }, 633 { 634 "id": "artificial-intelligence-assistance-2026", 635 "title": "Artificial intelligence assistance in foresight research: Enhancing technology assessment through data-driven methods" 636 }, 637 { 638 "id": "artificial-intelligence-health-2022", 639 "title": "Artificial Intelligence for Health Message Generation: Theory, Method, and an Empirical Study Using Prompt Engineering" 640 }, 641 { 642 "id": "artificial-just-artful-2025", 643 "title": "Artificial or Just Artful? Do LLMs Bend the Rules in Programming?" 644 }, 645 { 646 "id": "artificial-organisations-2026", 647 "title": "Artificial Organisations" 648 }, 649 { 650 "id": "ask-me-anything-2022", 651 "title": "Ask Me Anything: A Simple Strategy for Prompting Language Models" 652 }, 653 { 654 "id": "askeda-design-assistant-2024", 655 "title": "Ask-EDA: A Design Assistant Empowered by LLM, Hybrid RAG and Abbreviation De-hallucination" 656 }, 657 { 658 "id": "assessing-answerability-queries-2024", 659 "title": "Assessing the Answerability of Queries in Retrieval-Augmented Code Generation" 660 }, 661 { 662 "id": "assessing-correctness-llmbased-2025", 663 "title": "Assessing Correctness in LLM-Based Code Generation via Uncertainty Estimation" 664 }, 665 { 666 "id": "assessing-data-extraction-2026", 667 "title": "Assessing data extraction in randomized clinical trials with large language models" 668 }, 669 { 670 "id": "assessing-domainlevel-susceptibility-2026", 671 "title": "Assessing Domain-Level Susceptibility to Emergent Misalignment from Narrow Finetuning" 672 }, 673 { 674 "id": "assessing-impact-code-2025", 675 "title": "Assessing the Impact of Code Changes on the Fault Localizability of Large Language Models" 676 }, 677 { 678 "id": "assessing-latent-automated-2024", 679 "title": "Assessing the Latent Automated Program Repair Capabilities of Large Language Models using Round-Trip Translation" 680 }, 681 { 682 "id": "assessing-verifying-task-2024", 683 "title": "Assessing and Verifying Task Utility in LLM-Powered Applications" 684 }, 685 { 686 "id": "assignment-incentives-reduce-2023", 687 "title": "Using Assignment Incentives to Reduce Student Procrastination and Encourage Code Review Interactions" 688 }, 689 { 690 "id": "astrovisbench-code-benchmark-2025", 691 "title": "ASTROVISBENCH: A Code Benchmark for Scientific Computing and Visualization in Astronomy" 692 }, 693 { 694 "id": "asymptotic-study-incontext-2025", 695 "title": "Asymptotic Study of In-Context Learning with Random Transformers Through Equivalent Models" 696 }, 697 { 698 "id": "atlas-artifact-generation-2025", 699 "title": "ATLAS: Artifact Generation Through Layered Constraints and LLM × MDE Synergy" 700 }, 701 { 702 "id": "atom-thoughts-markov-2025", 703 "title": "Atom of Thoughts for Markov LLM Test-Time Scaling" 704 }, 705 { 706 "id": "attacking-llms-ai-2025", 707 "title": "Attacking LLMs and AI Agents: Advertisement Embedding Attacks Against Large Language Models" 708 }, 709 { 710 "id": "attacks-by-content-2025", 711 "title": "Attacks by Content: Automated Fact-checking is an AI Security Issue" 712 }, 713 { 714 "id": "attention-all-you-2025", 715 "title": "Attention is All You Need to Defend Against Indirect Prompt Injection Attacks in LLMs" 716 }, 717 { 718 "id": "attention-is-all-you-need-2017", 719 "title": "Attention Is All You Need" 720 }, 721 { 722 "id": "attention-pruning-automated-2025", 723 "title": "Attention Pruning: Automated Fairness Repair of Language Models via Surrogate Simulated Annealing" 724 }, 725 { 726 "id": "attention-tracker-detecting-2024", 727 "title": "Attention Tracker: Detecting Prompt Injection Attacks in LLMs" 728 }, 729 { 730 "id": "audit-trails-accountability-2026", 731 "title": "Audit Trails for Accountability in Large Language Models" 732 }, 733 { 734 "id": "auditing-fairness-under-2026", 735 "title": "Auditing Fairness under Model Updates: Fundamental Complexity and Property-Preserving Updates" 736 }, 737 { 738 "id": "augmented-language-models-2023", 739 "title": "Augmented Language Models: a Survey" 740 }, 741 { 742 "id": "autocodebench-large-language-2025", 743 "title": "AutoCodeBench: Large Language Models are Automatic Code Benchmark Generators" 744 }, 745 { 746 "id": "autocypher-improving-llms-2024", 747 "title": "Auto-Cypher: Improving LLMs on Cypher generation via LLM-supervised generation-verification framework" 748 }, 749 { 750 "id": "autoflow-automated-workflow-2024", 751 "title": "AutoFlow: Automated Workflow Generation for Large Language Model Agents" 752 }, 753 { 754 "id": "autogen-enabling-nextgen-2023", 755 "title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation" 756 }, 757 { 758 "id": "autogen-multi-agent-2023", 759 "title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation" 760 }, 761 { 762 "id": "autogenics-automated-generation-2024", 763 "title": "AUTOGENICS: Automated Generation of Context-Aware Inline Comments for Code Snippets on Programming Q&A Sites Using LLM" 764 }, 765 { 766 "id": "autokaggle-multiagent-framework-2024", 767 "title": "AutoKaggle: A Multi-Agent Framework for Autonomous Data Science Competitions" 768 }, 769 { 770 "id": "automated-bug-detection-2025", 771 "title": "Automated Bug Detection and Correction in Software Development using Machine Learning" 772 }, 773 { 774 "id": "automated-cc-program-2024", 775 "title": "Automated C/C++ Program Repair for High-Level Synthesis via Large Language Models" 776 }, 777 { 778 "id": "automated-code-generation-2025", 779 "title": "Automated Code Generation and Validation for Software Components of Microcontrollers" 780 }, 781 { 782 "id": "automated-code-review-practice-2024", 783 "title": "Automated Code Review In Practice" 784 }, 785 { 786 "id": "automated-discovery-test-2025", 787 "title": "Automated Discovery of Test Oracles for Database Management Systems Using LLMs" 788 }, 789 { 790 "id": "automated-extraction-mechanical-2026", 791 "title": "Automated Extraction of Mechanical Constitutive Models from Scientific Literature using Large Language Models: Applications in Cultural Heritage Conservation" 792 }, 793 { 794 "id": "automated-formalization-conceptual-2025", 795 "title": "Automated Formalization via Conceptual Retrieval-Augmented LLMs" 796 }, 797 { 798 "id": "automated-knowledge-component-2025", 799 "title": "Automated Knowledge Component Generation for Interpretable Knowledge Tracing in Coding Problems" 800 }, 801 { 802 "id": "automated-program-repair-2022", 803 "title": "Practical Program Repair in the Era of Large Pre-trained Language Models" 804 }, 805 { 806 "id": "automated-program-repair-2023", 807 "title": "Keep the Conversation Going: Fixing 162 out of 337 bugs for $0.42 each using ChatGPT" 808 }, 809 { 810 "id": "automated-program-repair-2023-2", 811 "title": "Enhancing Automated Program Repair through Fine-tuning and Prompt Engineering" 812 }, 813 { 814 "id": "automated-program-repair-2024", 815 "title": "Automated Program Repair: Emerging trends pose and expose problems for benchmarks" 816 }, 817 { 818 "id": "automated-program-repair-2025", 819 "title": "Automated Program Repair Based on REST API Specifications Using Large Language Models" 820 }, 821 { 822 "id": "automated-program-repair-2025-2", 823 "title": "Automated Program Repair of Uncompilable Student Code" 824 }, 825 { 826 "id": "automated-repair-ai-2024", 827 "title": "Automated Repair of AI Code with Large Language Models and Formal Verification" 828 }, 829 { 830 "id": "automated-repair-c-2025", 831 "title": "Automated Repair of C Programs Using Large Language Models" 832 }, 833 { 834 "id": "automated-smart-contract-2025", 835 "title": "Towards Automated Smart Contract Generation: Evaluation, Benchmarking, and Retrieval-Augmented Repair" 836 }, 837 { 838 "id": "automated-structural-testing-2026", 839 "title": "Automated structural testing of LLM-based agents: methods, framework, and case studies" 840 }, 841 { 842 "id": "automated-test-case-2024", 843 "title": "Automated Test Case Repair Using Language Models" 844 }, 845 { 846 "id": "automated-test-generation-2024", 847 "title": "Automated test generation to evaluate tool-augmented LLMs as conversational AI agents" 848 }, 849 { 850 "id": "automated-unit-test-2024", 851 "title": "Automated Unit Test Improvement using Large Language Models at Meta" 852 }, 853 { 854 "id": "automatic-generation-benchmarks-2024", 855 "title": "Automatic Generation of Benchmarks and Reliable LLM Judgment for Code Tasks" 856 }, 857 { 858 "id": "automatic-universal-prompt-2024", 859 "title": "Automatic and Universal Prompt Injection Attacks against Large Language Models" 860 }, 861 { 862 "id": "automatically-benchmarking-code-agents-2025", 863 "title": "Automatically Benchmarking LLM Code Agents through Agent-driven Annotation and Evaluation" 864 }, 865 { 866 "id": "automatically-generating-web-2025", 867 "title": "Automatically Generating Web Applications from Requirements Via Multi-Agent Test-Driven Development" 868 }, 869 { 870 "id": "automatically-surfacing-opportunities-2025", 871 "title": "Automatically Surfacing Opportunities for Improvements In Internet-Scale Applications" 872 }, 873 { 874 "id": "automating-deception-scalable-2025", 875 "title": "Automating Deception: Scalable Multi-Turn LLM Jailbreaks" 876 }, 877 { 878 "id": "automating-rest-api-2024", 879 "title": "Automating REST API Postman Test Cases Using LLM" 880 }, 881 { 882 "id": "automating-structural-engineering-2025", 883 "title": "Automating Structural Engineering Workflows with Large Language Model Agents" 884 }, 885 { 886 "id": "automation-ai-intergenerational-2025", 887 "title": "Automation, AI, and the Intergenerational Transmission of Knowledge" 888 }, 889 { 890 "id": "autonomous-normative-multiagent-2025", 891 "title": "Towards autonomous normative multi-agent systems for Human-AI software engineering teams" 892 }, 893 { 894 "id": "autonomous-supplier-evaluation-2025", 895 "title": "Autonomous Supplier Evaluation and Data Stewardship with AI: Building Transparent and Resilient Supply Chains" 896 }, 897 { 898 "id": "autop2c-llmbased-agent-2025", 899 "title": "AutoP2C: An LLM-Based Agent Framework for Code Repository Generation from Multimodal Content in Academic Papers" 900 }, 901 { 902 "id": "autostreampipe-llm-assisted-2025", 903 "title": "AutoStreamPipe: LLM Assisted Automatic Generation of Data Stream Processing Pipelines" 904 }, 905 { 906 "id": "autotom-scaling-modelbased-2025", 907 "title": "AutoToM: Scaling Model-based Mental Inference via Automated Agent Modeling" 908 }, 909 { 910 "id": "autovcoder-systematic-framework-2024", 911 "title": "AutoVCoder: A Systematic Framework for Automated Verilog Code Generation using LLMs" 912 }, 913 { 914 "id": "autoverus-automated-proof-2024", 915 "title": "AutoVerus: Automated Proof Generation for Rust Code" 916 }, 917 { 918 "id": "avicuna-audiovisual-llm-2024", 919 "title": "Empowering LLMs with Pseudo-Untrimmed Videos for Audio-Visual Temporal Understanding" 920 }, 921 { 922 "id": "awcp-workspace-delegation-2026", 923 "title": "AWCP: A Workspace Delegation Protocol for Deep-Engagement Collaboration across Remote Agents" 924 }, 925 { 926 "id": "backdoor-attribution-elucidating-2025", 927 "title": "Backdoor Attribution: Elucidating and Controlling Backdoors in Language Models" 928 }, 929 { 930 "id": "backdoor-samples-detection-2025", 931 "title": "Backdoor Samples Detection Based on Perturbation Discrepancy Consistency in Pre-trained Language Models" 932 }, 933 { 934 "id": "backdoored-retrievers-prompt-2024", 935 "title": "Backdoored Retrievers for Prompt Injection Attacks on Retrieval Augmented Generation of Large Language Models" 936 }, 937 { 938 "id": "backdooring-bias-large-2026", 939 "title": "Backdooring Bias in Large Language Models" 940 }, 941 { 942 "id": "backdoorpowered-prompt-injection-2025", 943 "title": "Backdoor-Powered Prompt Injection Attacks Nullify Defense Methods" 944 }, 945 { 946 "id": "backportbench-multilingual-benchmark-2025", 947 "title": "BackportBench: A Multilingual Benchmark for Automated Backporting of Patches" 948 }, 949 { 950 "id": "bamas-structuring-budgetaware-2025", 951 "title": "BAMAS: Structuring Budget-Aware Multi-Agent Systems" 952 }, 953 { 954 "id": "bamboo-comprehensive-benchmark-2023", 955 "title": "BAMBOO: A Comprehensive Benchmark for Evaluating Long Text Modeling Capacities of Large Language Models" 956 }, 957 { 958 "id": "banglaforge-llm-collaboration-2025", 959 "title": "BanglaForge: LLM Collaboration with Self-Refinement for Bangla Code Generation" 960 }, 961 { 962 "id": "bashexplainer-retrievalaugmented-bash-2022", 963 "title": "BASHEXPLAINER: Retrieval-Augmented Bash Code Comment Generation based on Fine-tuned CodeBERT" 964 }, 965 { 966 "id": "basics-binary-analysis-2025", 967 "title": "BASICS: Binary Analysis and Stack Integrity Checker System for Buffer Overflow Mitigation" 968 }, 969 { 970 "id": "battleagentbench-benchmark-evaluating-2024", 971 "title": "BattleAgentBench: A Benchmark for Evaluating Cooperation and Competition Capabilities of Language Models in Multi-Agent Systems" 972 }, 973 { 974 "id": "bayesian-reward-models-2024", 975 "title": "Bayesian Reward Models for LLM Alignment" 976 }, 977 { 978 "id": "beavertails-improved-safety-2023", 979 "title": "BeaverTails: Towards Improved Safety Alignment of LLM via a Human-Preference Dataset" 980 }, 981 { 982 "id": "behavior-alignment-new-2024", 983 "title": "Behavior Alignment: A New Perspective of Evaluating LLM-based Conversational Recommender Systems" 984 }, 985 { 986 "id": "bench-benchmark-toolagentuser-2024", 987 "title": "τ-bench: A Benchmark for Tool-Agent-User Interaction in Real-World Domains" 988 }, 989 { 990 "id": "benchmark-contamination-survey-2024", 991 "title": "Benchmark Data Contamination of Large Language Models: A Survey" 992 }, 993 { 994 "id": "benchmark-expertlevel-academic-2025", 995 "title": "Humanity's Last Exam" 996 }, 997 { 998 "id": "benchmark-test-time-scaling-agents-2026", 999 "title": "Benchmark Test-Time Scaling of General LLM Agents" 1000 }, 1001 { 1002 "id": "benchmarking-ai-models-2025", 1003 "title": "Benchmarking AI Models in Software Engineering: A Review, Search Tool, and Unified Approach for Elevating Benchmark Quality" 1004 }, 1005 { 1006 "id": "benchmarking-ai-models-2025-2", 1007 "title": "Benchmarking AI Models in Software Engineering: A Review, Search Tool, and Unified Approach for Elevating Benchmark Quality" 1008 }, 1009 { 1010 "id": "benchmarking-epistemology-construct-2025", 1011 "title": "The Benchmarking Epistemology: Construct Validity for Evaluating Machine Learning Models" 1012 }, 1013 { 1014 "id": "benchmarking-hallucination-large-2024", 1015 "title": "Benchmarking Hallucination in Large Language Models based on Unanswerable Math Word Problem" 1016 }, 1017 { 1018 "id": "benchmarking-large-language-2022", 1019 "title": "Benchmarking Large Language Models for Automated Verilog RTL Code Generation" 1020 }, 1021 { 1022 "id": "benchmarking-large-language-2024", 1023 "title": "Benchmarking Large Language Models with Integer Sequence Generation Tasks" 1024 }, 1025 { 1026 "id": "benchmarking-llms-unit-2025", 1027 "title": "Benchmarking LLMs for Unit Test Generation from Real-World Functions" 1028 }, 1029 { 1030 "id": "benchmarks-automated-commonsense-2023", 1031 "title": "Benchmarks for Automated Commonsense Reasoning: A Survey" 1032 }, 1033 { 1034 "id": "benchmarl-benchmarking-multiagent-2023", 1035 "title": "BenchMARL: Benchmarking Multi-Agent Reinforcement Learning" 1036 }, 1037 { 1038 "id": "bert-pretraining-deep-2018", 1039 "title": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" 1040 }, 1041 { 1042 "id": "best-practices-ai-2023", 1043 "title": "Best Practices for Using AI Tools as an Author, Peer Reviewer, or Editor" 1044 }, 1045 { 1046 "id": "beyond-automation-job-redesign-2025", 1047 "title": "Beyond Automation: Redesigning Jobs with LLMs to Enhance Productivity" 1048 }, 1049 { 1050 "id": "beyond-benchmark-innovative-2025", 1051 "title": "Beyond the Benchmark: Innovative Defenses Against Prompt Injection Attacks" 1052 }, 1053 { 1054 "id": "beyond-chinchillaoptimal-accounting-2023", 1055 "title": "Beyond Chinchilla-Optimal: Accounting for Inference in Language Model Scaling Laws" 1056 }, 1057 { 1058 "id": "beyond-chunks-graphs-2025", 1059 "title": "Beyond Chunks and Graphs: Retrieval-Augmented Generation through Triplet-Driven Thinking" 1060 }, 1061 { 1062 "id": "beyond-commit-developer-perspectives-2026", 1063 "title": "Beyond the Commit: Developer Perspectives on Productivity with AI Coding Assistants" 1064 }, 1065 { 1066 "id": "beyond-correctness-benchmarking-2024", 1067 "title": "Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models" 1068 }, 1069 { 1070 "id": "beyond-correctness-rewarding-2025", 1071 "title": "Beyond Correctness: Rewarding Faithful Reasoning in Retrieval-Augmented Generation" 1072 }, 1073 { 1074 "id": "beyond-functional-correctness-2024", 1075 "title": "Beyond Functional Correctness: Exploring Hallucinations in LLM-Generated Code" 1076 }, 1077 { 1078 "id": "beyond-hype-comprehensive-2024", 1079 "title": "Beyond the Hype: A Comprehensive Review of Current Trends in Generative AI Research, Teaching Practices, and Tools" 1080 }, 1081 { 1082 "id": "beyond-imitation-game-2022", 1083 "title": "Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models" 1084 }, 1085 { 1086 "id": "beyond-mimicry-preference-2025", 1087 "title": "Beyond Mimicry: Testing Preference Coherence in Large Language Models Through AI-Specific Trade-Off Scenarios" 1088 }, 1089 { 1090 "id": "beyond-promptinduced-lies-2025", 1091 "title": "Beyond Prompt-Induced Lies: Investigating LLM Deception on Benign Prompts" 1092 }, 1093 { 1094 "id": "beyond-quantity-trajectory-2026", 1095 "title": "Beyond Quantity: Trajectory Diversity Scaling for Code Agents" 1096 }, 1097 { 1098 "id": "beyond-semantic-entropy-2025", 1099 "title": "Beyond Semantic Entropy: Boosting LLM Uncertainty Quantification with Pairwise Semantic Similarity" 1100 }, 1101 { 1102 "id": "beyond-singleagent-safety-2025", 1103 "title": "Beyond Single-Agent Safety: A Taxonomy of Risks in LLM-to-LLM Interactions" 1104 }, 1105 { 1106 "id": "beyond-static-datasets-2023", 1107 "title": "Beyond Static Datasets: A Deep Interaction Approach to LLM Evaluation" 1108 }, 1109 { 1110 "id": "beyond-static-pattern-2025", 1111 "title": "Exploring Automatic Cryptographic API Misuse Detection in the Era of LLMs" 1112 }, 1113 { 1114 "id": "beyond-synthetic-benchmarks-2025", 1115 "title": "Beyond Synthetic Benchmarks: Evaluating LLM Performance on Real-World Class-Level Code Generation" 1116 }, 1117 { 1118 "id": "beyond-textual-context-2025", 1119 "title": "Beyond Textual Context: Structural Graph Encoding with Adaptive Space Alignment to alleviate the hallucination of LLMs" 1120 }, 1121 { 1122 "id": "beyond-token-probes-2025", 1123 "title": "Beyond Token Probes: Hallucination Detection via Activation Tensors with ACT-ViT" 1124 }, 1125 { 1126 "id": "bias-assessment-mitigation-2023", 1127 "title": "Bias Testing and Mitigation in LLM-based Code Generation" 1128 }, 1129 { 1130 "id": "bias-unveiled-investigating-2024", 1131 "title": "Bias Unveiled: Investigating Social Bias in LLM-Generated Code" 1132 }, 1133 { 1134 "id": "biasalert-plugandplay-tool-2024", 1135 "title": "BiasAlert: A Plug-and-play Tool for Social Bias Detection in LLMs" 1136 }, 1137 { 1138 "id": "bigcodebench-2024", 1139 "title": "BigCodeBench: Benchmarking Code Generation with Diverse Function Calls and Complex Instructions" 1140 }, 1141 { 1142 "id": "bigrpo-bidirectional-optimization-2025", 1143 "title": "BI-GRPO: Bidirectional Optimization for Jailbreak Backdoor Injection on LLMs" 1144 }, 1145 { 1146 "id": "bioplanner-automatic-evaluation-2023", 1147 "title": "BioPlanner: Automatic Evaluation of LLMs on Protocol Planning in Biology" 1148 }, 1149 { 1150 "id": "bioragent-retrievalaugmented-generation-2024", 1151 "title": "BioRAGent: A Retrieval-Augmented Generation System for Showcasing Generative Query Expansion and Domain-Specific Search for Scientific Q&A" 1152 }, 1153 { 1154 "id": "biotrouble-multiagent-workflow-2026", 1155 "title": "BioTrouble: A Multi-Agent Workflow for Troubleshooting Molecular Biology Techniques" 1156 }, 1157 { 1158 "id": "bitsaicr-automated-code-2025", 1159 "title": "BitsAI-CR: Automated Code Review via LLM in Practice" 1160 }, 1161 { 1162 "id": "blockdialect-blockwise-finegrained-2025", 1163 "title": "BlockDialect: Block-wise Fine-grained Mixed Format Quantization for Energy-Efficient LLM Inference" 1164 }, 1165 { 1166 "id": "boosting-llm-reasoning-2025", 1167 "title": "Boosting LLM Reasoning via Spontaneous Self-Correction" 1168 }, 1169 { 1170 "id": "boosting-redundancybased-automated-2023", 1171 "title": "Boosting Redundancy-based Automated Program Repair by Fine-grained Pattern Mining" 1172 }, 1173 { 1174 "id": "bottomup-domainspecific-superintelligence-2025", 1175 "title": "Bottom-up Domain-specific Superintelligence: A Reliable Knowledge Graph is What We Need" 1176 }, 1177 { 1178 "id": "boute-costefficient-llm-2026", 1179 "title": "BOUTE: Cost-Efficient LLM Serving with Heterogeneous LLMs and GPUs via Multi-Objective Bayesian Optimization" 1180 }, 1181 { 1182 "id": "bpo-staying-close-2024", 1183 "title": "BPO: Staying Close to the Behavior LLM Creates Better Online LLM Alignment" 1184 }, 1185 { 1186 "id": "break-sequential-dependency-2024", 1187 "title": "Break the Sequential Dependency of LLM Inference Using Lookahead Decoding" 1188 }, 1189 { 1190 "id": "breaking-prompt-wall-2025", 1191 "title": "Breaking the Prompt Wall (I): A Real-World Case Study of Attacking ChatGPT via Lightweight Prompt Injection" 1192 }, 1193 { 1194 "id": "bridging-human-interpretation-2026", 1195 "title": "Bridging Human Interpretation and Machine Representation: A Landscape of Qualitative Data Analysis in the LLM Era" 1196 }, 1197 { 1198 "id": "bridging-llmgenerated-code-2025", 1199 "title": "Bridging LLM-Generated Code and Requirements: Reverse Generation technique and SBC Metric for Developer Insights" 1200 }, 1201 { 1202 "id": "bridging-mde-ai-2023", 1203 "title": "Bridging MDE and AI: A Systematic Review of Domain-Specific Languages and Model-Driven Practices in AI Software Systems Engineering" 1204 }, 1205 { 1206 "id": "broken-neural-scaling-2022", 1207 "title": "Broken Neural Scaling Laws" 1208 }, 1209 { 1210 "id": "browserarena-web-agents-2025", 1211 "title": "BrowserArena: Evaluating LLM Agents on Real-World Web Navigation Tasks" 1212 }, 1213 { 1214 "id": "browsesafe-understanding-preventing-2025", 1215 "title": "BrowseSafe: Understanding and Preventing Prompt Injection Within AI Browser Agents" 1216 }, 1217 { 1218 "id": "budgetaware-agentic-routing-2026", 1219 "title": "Budget-Aware Agentic Routing via Boundary-Guided Training" 1220 }, 1221 { 1222 "id": "bugdar-aiaugmented-secure-2025", 1223 "title": "Bugdar: AI-Augmented Secure Code Review for GitHub Pull Requests" 1224 }, 1225 { 1226 "id": "bugs-large-language-2024", 1227 "title": "Bugs in Large Language Models Generated Code: An Empirical Study" 1228 }, 1229 { 1230 "id": "bugsphp-dataset-automated-2024", 1231 "title": "BugsPHP: A dataset for Automated Program Repair in PHP" 1232 }, 1233 { 1234 "id": "build-your-personalized-2025", 1235 "title": "Build Your Personalized Research Group: A Multiagent Framework for Continual and Interactive Science Automation" 1236 }, 1237 { 1238 "id": "building-coding-assistant-2024", 1239 "title": "Building A Coding Assistant via the Retrieval-Augmented Language Model" 1240 }, 1241 { 1242 "id": "building-cooperative-embodied-2023", 1243 "title": "Building Cooperative Embodied Agents Modularly with Large Language Models" 1244 }, 1245 { 1246 "id": "building-understandable-messaging-2024", 1247 "title": "Building Understandable Messaging for Policy and Evidence Review (BUMPER) with AI" 1248 }, 1249 { 1250 "id": "bypassing-llm-guardrails-2025", 1251 "title": "Bypassing LLM Guardrails: An Empirical Analysis of Evasion Attacks against Prompt Injection and Jailbreak Detection Systems" 1252 }, 1253 { 1254 "id": "bytesized32refactored-extensible-interactive-2025", 1255 "title": "ByteSized32Refactored: Towards an Extensible Interactive Text Games Corpus for LLM World Modeling and Evaluation" 1256 }, 1257 { 1258 "id": "c3po-optimized-large-2025", 1259 "title": "C3PO: Optimized Large Language Model Cascades with Probabilistic Cost Constraints for Reasoning" 1260 }, 1261 { 1262 "id": "cacheprune-neuralbased-attribution-2025", 1263 "title": "CachePrune: Neural-Based Attribution Defense Against Indirect Prompt Injection Attacks" 1264 }, 1265 { 1266 "id": "calibrating-llm-judges-2025", 1267 "title": "Calibrating LLM Judges: Linear Probes for Fast and Reliable Uncertainty Estimation" 1268 }, 1269 { 1270 "id": "calibration-large-language-2026", 1271 "title": "On Calibration of Large Language Models: From Response To Capability" 1272 }, 1273 { 1274 "id": "camel-communicative-agents-2023", 1275 "title": "CAMEL: Communicative Agents for \"Mind\" Exploration of Large Language Model Society" 1276 }, 1277 { 1278 "id": "can-1b-llm-2025", 1279 "title": "Can 1B LLM Surpass 405B LLM? Rethinking Compute-Optimal Test-Time Scaling" 1280 }, 1281 { 1282 "id": "can-ai-serve-2023", 1283 "title": "Can AI Serve as a Substitute for Human Subjects in Software Engineering Research?" 1284 }, 1285 { 1286 "id": "can-chatgpt-support-2024", 1287 "title": "Can ChatGPT Support Developers? An Empirical Evaluation of Large Language Models for Code Generation" 1288 }, 1289 { 1290 "id": "can-indirect-prompt-2025", 1291 "title": "Can Indirect Prompt Injection Attacks Be Detected and Removed?" 1292 }, 1293 { 1294 "id": "can-large-language-2025", 1295 "title": "Can Large Language Models Develop Gambling Addiction?" 1296 }, 1297 { 1298 "id": "can-llm-replace-2023", 1299 "title": "Can LLM Replace Stack Overflow? A Study on Robustness and Reliability of Large Language Model Code Generation" 1300 }, 1301 { 1302 "id": "can-llms-replace-2025", 1303 "title": "Can LLMs Replace Human Evaluators? An Empirical Study of LLM-as-a-Judge in Software Engineering" 1304 }, 1305 { 1306 "id": "can-reasoning-models-2025", 1307 "title": "Can Reasoning Models Obfuscate Reasoning? Stress-Testing Chain-of-Thought Monitorability" 1308 }, 1309 { 1310 "id": "can-vibe-coding-2025", 1311 "title": "Can Vibe Coding Beat Graduate CS Students? An LLM vs. Human Coding Tournament on Market-driven Strategic Planning" 1312 }, 1313 { 1314 "id": "can-we-automatically-2022", 1315 "title": "Can We Automatically Fix Bugs by Learning Edit Operations?" 1316 }, 1317 { 1318 "id": "canaries-coal-mine-2025", 1319 "title": "Canaries in the Coal Mine? Six Facts about the Recent Employment Effects of Artificial Intelligence" 1320 }, 1321 { 1322 "id": "capability-ceilings-autoregressive-2025", 1323 "title": "Capability Ceilings in Autoregressive Language Models: Empirical Evidence from Knowledge-Intensive Tasks" 1324 }, 1325 { 1326 "id": "capabilityoriented-training-induced-2026", 1327 "title": "Capability-Oriented Training Induced Alignment Risk" 1328 }, 1329 { 1330 "id": "capture-contextaware-prompt-2025", 1331 "title": "CAPTURE: Context-Aware Prompt Injection Testing and Robustness Enhancement" 1332 }, 1333 { 1334 "id": "carbon-footprint-evaluation-2025", 1335 "title": "Carbon Footprint Evaluation of Code Generation through LLM as a Service" 1336 }, 1337 { 1338 "id": "caredio-cultural-alignment-2025", 1339 "title": "CAReDiO: Cultural Alignment of LLM via Representativeness and Distinctiveness Guided Data Optimization" 1340 }, 1341 { 1342 "id": "case-4bit-precision-2022", 1343 "title": "The case for 4-bit precision: k-bit Inference Scaling Laws" 1344 }, 1345 { 1346 "id": "case-learned-cloud-2025", 1347 "title": "A Case for Learned Cloud Emulators" 1348 }, 1349 { 1350 "id": "case-study-transformative-2025", 1351 "title": "A case study on the transformative potential of AI in software engineering on LeetCode and ChatGPT" 1352 }, 1353 { 1354 "id": "cast-enhancing-code-2025", 1355 "title": "CAST: Enhancing Code Retrieval-Augmented Generation with Structural Chunking via Abstract Syntax Tree" 1356 }, 1357 { 1358 "id": "caster-breaking-costperformance-2026", 1359 "title": "CASTER: Breaking the Cost-Performance Barrier in Multi-Agent Orchestration via Context-Aware Strategy for Task Efficient Routing" 1360 }, 1361 { 1362 "id": "catarena-evaluating-evolutionary-2025", 1363 "title": "CATArena: Evaluating Evolutionary Capabilities of Code Agents via Iterative Tournaments" 1364 }, 1365 { 1366 "id": "catch-me-if-2025", 1367 "title": "Catch Me If You Can: Rogue AI Detection and Correction at Scale" 1368 }, 1369 { 1370 "id": "catdb-datacatalogguided-llmbased-2025", 1371 "title": "CatDB: Data-catalog-guided, LLM-based Generation of Data-centric ML Pipelines" 1372 }, 1373 { 1374 "id": "causalarmor-efficient-indirect-2026", 1375 "title": "CausalArmor: Efficient Indirect Prompt Injection Guardrails via Causal Attribution" 1376 }, 1377 { 1378 "id": "cbfllm-safe-control-2024", 1379 "title": "CBF-LLM: Safe Control for LLM Alignment" 1380 }, 1381 { 1382 "id": "ccfc-core-corefullcore-2025", 1383 "title": "CCFC: Core & Core–Full–Core Dual-Track Defense for LLM Jailbreak Protection" 1384 }, 1385 { 1386 "id": "cctest-testing-repairing-2022", 1387 "title": "CCTEST: Testing and Repairing Code Completion Systems" 1388 }, 1389 { 1390 "id": "chain-of-thought-prompting-2022", 1391 "title": "Chain-of-Thought Prompting Elicits Reasoning in Large Language Models" 1392 }, 1393 { 1394 "id": "chainofthought-prompting-obscures-2025", 1395 "title": "Chain-of-Thought Prompting Obscures Hallucination Cues in Large Language Models: An Empirical Evaluation" 1396 }, 1397 { 1398 "id": "chainpoll-high-efficacy-2023", 1399 "title": "ChainPoll: A High Efficacy Method for LLM Hallucination Detection" 1400 }, 1401 { 1402 "id": "challenge-optimization-context-2025", 1403 "title": "Challenge on Optimization of Context Collection for Code Completion" 1404 }, 1405 { 1406 "id": "challenges-humanagent-communication-2024", 1407 "title": "Challenges in Human-Agent Communication" 1408 }, 1409 { 1410 "id": "challenges-paths-ai-2025", 1411 "title": "Challenges and Paths Towards AI for Software Engineering" 1412 }, 1413 { 1414 "id": "changes-coding-behavior-2026", 1415 "title": "Changes in Coding Behavior and Performance Since the Introduction of LLMs" 1416 }, 1417 { 1418 "id": "chaos-engineering-20-2025", 1419 "title": "Chaos Engineering 2.0: A Review of AI-Driven, Policy-Guided Resilience for Multi-Cloud Systems" 1420 }, 1421 { 1422 "id": "characterizing-llm-inference-2025", 1423 "title": "Characterizing LLM Inference Energy-Performance Tradeoffs across Workloads and GPU Scaling" 1424 }, 1425 { 1426 "id": "chasing-progress-not-2024", 1427 "title": "Chasing Progress, Not Perfection: Revisiting Strategies for End-to-End LLM Plan Generation" 1428 }, 1429 { 1430 "id": "chat-bankmanfried-exploration-2024", 1431 "title": "Chat Bankman-Fried: an Exploration of LLM Alignment in Finance" 1432 }, 1433 { 1434 "id": "chatassert-llmbased-test-2025", 1435 "title": "CHATASSERT: LLM-Based Test Oracle Generation With External Tools Assistance" 1436 }, 1437 { 1438 "id": "chatbot-arena-open-2024", 1439 "title": "Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference" 1440 }, 1441 { 1442 "id": "chatdev-communicative-agents-2023", 1443 "title": "ChatDev: Communicative Agents for Software Development" 1444 }, 1445 { 1446 "id": "chateval-better-llmbased-2023", 1447 "title": "ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate" 1448 }, 1449 { 1450 "id": "chatgpt-agent-system-2025", 1451 "title": "ChatGPT Agent System Card" 1452 }, 1453 { 1454 "id": "chatgpt-not-all-2023", 1455 "title": "ChatGPT is not all you need. A State of the Art Review of large Generative AI models" 1456 }, 1457 { 1458 "id": "chatinject-abusing-chat-2025", 1459 "title": "ChatInject: Abusing Chat Templates for Prompt Injection in LLM Agents" 1460 }, 1461 { 1462 "id": "chatofthought-collaborative-multiagent-2025", 1463 "title": "Chat-of-Thought: Collaborative Multi-Agent System for Generating Domain Specific Information" 1464 }, 1465 { 1466 "id": "chatunitest-framework-llmbased-2023", 1467 "title": "ChatUniTest: A Framework for LLM-Based Test Generation" 1468 }, 1469 { 1470 "id": "check-your-facts-2023", 1471 "title": "Check Your Facts and Try Again: Improving Large Language Models with External Knowledge and Automated Feedback" 1472 }, 1473 { 1474 "id": "checkpointgcg-auditing-attacking-2025", 1475 "title": "Checkpoint-GCG: Auditing and Attacking Fine-Tuning-Based Prompt Injection Defenses" 1476 }, 1477 { 1478 "id": "chinchilla-compute-optimal-2022", 1479 "title": "Training Compute-Optimal Large Language Models" 1480 }, 1481 { 1482 "id": "chipbench-nextstep-benchmark-2026", 1483 "title": "ChipBench: A Next-Step Benchmark for Evaluating LLM Performance in AI-Aided Chip Design" 1484 }, 1485 { 1486 "id": "chopchop-programmable-framework-2025", 1487 "title": "ChopChop: A Programmable Framework for Semantically Constraining the Output of Language Models" 1488 }, 1489 { 1490 "id": "chorus-zeroshot-hierarchical-2025", 1491 "title": "CHORUS: Zero-shot Hierarchical Retrieval and Orchestration for Generating Linear Programming Code" 1492 }, 1493 { 1494 "id": "ciata-risk-assessment-2025", 1495 "title": "CIA+TA Risk Assessment for AI Reasoning Vulnerabilities" 1496 }, 1497 { 1498 "id": "cigar-costefficient-program-2024", 1499 "title": "CigaR: Cost-efficient Program Repair with LLMs" 1500 }, 1501 { 1502 "id": "citationenhanced-generation-llmbased-2024", 1503 "title": "Citation-Enhanced Generation for LLM-based Chatbots" 1504 }, 1505 { 1506 "id": "citationgrounded-code-comprehension-2025", 1507 "title": "Citation-Grounded Code Comprehension: Preventing LLM Hallucination Through Hybrid Retrieval and Graph-Augmented Context" 1508 }, 1509 { 1510 "id": "citywalk-enhancing-llmbased-2025", 1511 "title": "CITYWALK: Enhancing LLM-Based C++ Unit Test Generation via Project-Dependency Awareness and Language-Specific Knowledge" 1512 }, 1513 { 1514 "id": "ckgfuzzer-llmbased-fuzz-2025", 1515 "title": "CKGFuzzer: LLM-Based Fuzz Driver Generation Enhanced By Code Knowledge Graph" 1516 }, 1517 { 1518 "id": "clarifygpt-empowering-llmbased-2023", 1519 "title": "ClarifyGPT: Empowering LLM-based Code Generation with Intention Clarification" 1520 }, 1521 { 1522 "id": "clarifygpt-framework-enhancing-2024", 1523 "title": "ClarifyGPT: Empowering LLM-based Code Generation with Intention Clarification" 1524 }, 1525 { 1526 "id": "classeval-manuallycrafted-benchmark-2023", 1527 "title": "ClassEval: A Manually-Crafted Benchmark for Evaluating LLMs on Class-level Code Generation" 1528 }, 1529 { 1530 "id": "classifying-addressing-diversity-2025", 1531 "title": "Classifying and Addressing the Diversity of Errors in Retrieval-Augmented Generation Systems" 1532 }, 1533 { 1534 "id": "classit-conversational-lecturealigned-2025", 1535 "title": "CLASS-IT: Conversational and Lecture-Aligned Small-Scale Instruction Tuning for BabyLMs" 1536 }, 1537 { 1538 "id": "claude-sonnet-45-2025", 1539 "title": "System Card: Claude Sonnet 4.5" 1540 }, 1541 { 1542 "id": "clinnoteagents-llm-multiagent-2025", 1543 "title": "ClinNoteAgents: An LLM Multi-Agent System for Predicting and Interpreting Heart Failure 30-Day Readmission from Clinical Notes" 1544 }, 1545 { 1546 "id": "cloud-platforms-developing-2024", 1547 "title": "Cloud Platforms for Developing Generative AI Solutions: A Scoping Review of Tools and Services" 1548 }, 1549 { 1550 "id": "cloudevalyaml-practical-benchmark-2023", 1551 "title": "CloudEval-YAML: A Practical Benchmark for Cloud Configuration Generation" 1552 }, 1553 { 1554 "id": "cloudfix-automated-policy-2025", 1555 "title": "CloudFix: Automated Policy Repair for Cloud Access Control Policies Using Large Language Models" 1556 }, 1557 { 1558 "id": "cmoe-converting-mixtureofexperts-2025", 1559 "title": "CMoE: Converting Mixture-of-Experts from Dense to Accelerate LLM Inference" 1560 }, 1561 { 1562 "id": "cocomic-code-completion-2022", 1563 "title": "COCOMIC: Code Completion By Jointly Modeling In-file and Cross-file Context" 1564 }, 1565 { 1566 "id": "cocreating-automated-mhealth-2023", 1567 "title": "Cocreating an Automated mHealth Apps Systematic Review Process With Generative AI: Design Science Research Approach" 1568 }, 1569 { 1570 "id": "codamosa-escaping-coverage-2023", 1571 "title": "CODAMOSA: Escaping Coverage Plateaus in Test Generation with Pre-trained Large Language Models" 1572 }, 1573 { 1574 "id": "code-aesthetics-agentic-2025", 1575 "title": "Code Aesthetics with Agentic Reward Feedback" 1576 }, 1577 { 1578 "id": "code-hallucination-2024", 1579 "title": "Code Hallucination" 1580 }, 1581 { 1582 "id": "code-hallucinations-slr-2025", 1583 "title": "A Systematic Literature Review of Code Hallucinations in LLMs: Characterization, Mitigation Methods, Challenges, and Future Directions for Reliable AI" 1584 }, 1585 { 1586 "id": "code-less-align-2024", 1587 "title": "Code Less, Align More: Efficient LLM Fine-tuning for Code Generation with Data Pruning" 1588 }, 1589 { 1590 "id": "code-llama-2023", 1591 "title": "Code Llama: Open Foundation Models for Code" 1592 }, 1593 { 1594 "id": "code-me-me-2025", 1595 "title": "Code with Me or for Me? How Increasing AI Automation Transforms Developer Workflows" 1596 }, 1597 { 1598 "id": "code-ownership-opensource-2023", 1599 "title": "Code Ownership in Open-Source AI Software Security" 1600 }, 1601 { 1602 "id": "code-review-automation-2025", 1603 "title": "Code Review Automation using Retrieval Augmented Generation" 1604 }, 1605 { 1606 "id": "code-review-survey-pre-post-llm-2026", 1607 "title": "A Survey of Code Review Benchmarks and Evaluation Practices in Pre-LLM and LLM Era" 1608 }, 1609 { 1610 "id": "codearena-collective-evaluation-2025", 1611 "title": "CodeArena: A Collective Evaluation Platform for LLM Code Generation" 1612 }, 1613 { 1614 "id": "codeaware-prompting-study-2024", 1615 "title": "Code-Aware Prompting: A Study of Coverage-Guided Test Generation in Regression Setting using LLM" 1616 }, 1617 { 1618 "id": "codebenchgen-creating-scalable-2024", 1619 "title": "CodeBenchGen: Creating Scalable Execution-Based Code Generation Benchmarks" 1620 }, 1621 { 1622 "id": "codebert-pretrained-model-2020", 1623 "title": "CodeBERT: A Pre-Trained Model for Programming and Natural Languages" 1624 }, 1625 { 1626 "id": "codecontests-highquality-test-2025", 1627 "title": "CodeContests+: High-Quality Test Case Generation for Competitive Programming" 1628 }, 1629 { 1630 "id": "codecor-llmbased-selfreflective-2025", 1631 "title": "CodeCoR: An LLM-Based Self-Reflective Multi-Agent Framework for Code Generation" 1632 }, 1633 { 1634 "id": "codecriticbench-holistic-code-2025", 1635 "title": "CodeCriticBench: A Holistic Code Critique Benchmark for Large Language Models" 1636 }, 1637 { 1638 "id": "codediting-reasoningbased-metric-2025", 1639 "title": "CODE-DITING: A Reasoning-Based Metric for Functional Alignment in Code Evaluation" 1640 }, 1641 { 1642 "id": "codeelo-benchmarking-competitionlevel-2025", 1643 "title": "CODEELO: Benchmarking Competition-level Code Generation of LLMs with Human-comparable Elo Ratings" 1644 }, 1645 { 1646 "id": "codefill-multitoken-code-2022", 1647 "title": "CodeFill: Multi-token Code Completion by Jointly Learning from Structure and Naming Sequences" 1648 }, 1649 { 1650 "id": "codegrag-bridging-gap-2024", 1651 "title": "CodeGRAG: Bridging the Gap between Natural Language and Programming Language via Graphical Retrieval Augmented Generation" 1652 }, 1653 { 1654 "id": "codeinsight-curated-dataset-2024", 1655 "title": "CodeInsight: A Curated Dataset of Practical Coding Solutions from Stack Overflow" 1656 }, 1657 { 1658 "id": "codejudge-evaluating-code-2024", 1659 "title": "CodeJudge: Evaluating Code Generation with Large Language Models" 1660 }, 1661 { 1662 "id": "codejudgebench-benchmarking-llmasajudge-2025", 1663 "title": "CodeJudgeBench: Benchmarking LLM-as-a-Judge for Coding Tasks" 1664 }, 1665 { 1666 "id": "codemark-imperceptible-watermarking-2023", 1667 "title": "CodeMark: Imperceptible Watermarking for Code Datasets against Neural Code Completion Models" 1668 }, 1669 { 1670 "id": "codemirage-hallucinations-code-2024", 1671 "title": "CodeMirage: Hallucinations in Code Generated by Large Language Models" 1672 }, 1673 { 1674 "id": "codemmlu-multitask-benchmark-2024", 1675 "title": "CodeMMLU: A Multi-Task Benchmark for Assessing Code Understanding & Reasoning Capabilities of CodeLLMs" 1676 }, 1677 { 1678 "id": "codemmlu-multitask-benchmark-2024-2", 1679 "title": "CodeMMLU: A Multi-Task Benchmark for Assessing Code Understanding & Reasoning Capabilities of CodeLLMs" 1680 }, 1681 { 1682 "id": "codemorph-mitigating-data-2025", 1683 "title": "CODEMORPH: Mitigating Data Leakage in Large Language Model Assessment" 1684 }, 1685 { 1686 "id": "codepde-inference-framework-2025", 1687 "title": "CodePDE: An Inference Framework for LLM-driven PDE Solver Generation" 1688 }, 1689 { 1690 "id": "codepromptzip-codespecific-prompt-2025", 1691 "title": "CODEPROMPTZIP: Code-specific Prompt Compression for Retrieval-Augmented Generation in Coding Tasks with LMs" 1692 }, 1693 { 1694 "id": "coderagbench-can-retrieval-2024", 1695 "title": "CODERAG-BENCH: Can Retrieval Augment Code Generation?" 1696 }, 1697 { 1698 "id": "codereviewqa-code-review-2025", 1699 "title": "CodeReviewQA: The Code Review Comprehension Assessment for Large Language Models" 1700 }, 1701 { 1702 "id": "coderl-improving-code-2025", 1703 "title": "CODERL+: Improving Code Generation via Reinforcement with Execution Semantics Alignment" 1704 }, 1705 { 1706 "id": "codescope-executionbased-multilingual-2023", 1707 "title": "CodeScope: An Execution-based Multilingual Multitask Multidimensional Benchmark for Evaluating LLMs on Code Understanding and Generation" 1708 }, 1709 { 1710 "id": "codescore-evaluating-code-2023", 1711 "title": "CodeScore: Evaluating Code Generation by Learning Code Execution" 1712 }, 1713 { 1714 "id": "codesift-llmbased-referenceless-2024", 1715 "title": "CodeSift: An LLM-Based Reference-Less Framework for Automatic Code Validation" 1716 }, 1717 { 1718 "id": "codetm4-detecting-machinegenerated-2025", 1719 "title": "CoDet-M4: Detecting Machine-Generated Code in Multi-Lingual, Multi-Generator and Multi-Domain Settings" 1720 }, 1721 { 1722 "id": "codex-humaneval-2021", 1723 "title": "Evaluating Large Language Models Trained on Code" 1724 }, 1725 { 1726 "id": "codexity-secure-aiassisted-2024", 1727 "title": "Codexity: Secure AI-assisted Code Generation" 1728 }, 1729 { 1730 "id": "codified-context-infrastructure-2026", 1731 "title": "Codified Context: Infrastructure for AI Agents in a Complex Codebase" 1732 }, 1733 { 1734 "id": "coding-agents-generating-2026", 1735 "title": "Are Coding Agents Generating Over-Mocked Tests? An Empirical Study" 1736 }, 1737 { 1738 "id": "coevolving-llm-coder-2025", 1739 "title": "Co-Evolving LLM Coder and Unit Tester via Reinforcement Learning" 1740 }, 1741 { 1742 "id": "coffe-code-efficiency-2025", 1743 "title": "COFFE: A Code Efficiency Benchmark for Code Generation" 1744 }, 1745 { 1746 "id": "cognitive-control-architecture-2025", 1747 "title": "Cognitive Control Architecture (CCA): A Lifecycle Supervision Framework for Robustly Aligned AI Agents" 1748 }, 1749 { 1750 "id": "cognitive-models-ai-2026", 1751 "title": "Cognitive Models and AI Algorithms Provide Templates for Designing Language Agents" 1752 }, 1753 { 1754 "id": "cognitive-overload-attackprompt-2024", 1755 "title": "Cognitive Overload Attack: Prompt Injection for Long Context" 1756 }, 1757 { 1758 "id": "coladder-supporting-programmers-2023", 1759 "title": "CoLadder: Supporting Programmers with Hierarchical Code Generation in Multi-Level Abstraction" 1760 }, 1761 { 1762 "id": "collab-controlled-decoding-2025", 1763 "title": "Collab: Controlled Decoding using Mixture of Agents for LLM Alignment" 1764 }, 1765 { 1766 "id": "collaborating-genai-incentives-2025", 1767 "title": "Collaborating with GenAI: Incentives and Replacements" 1768 }, 1769 { 1770 "id": "collaboration-all-you-2025", 1771 "title": "Collaboration is all you need: LLM Assisted Safe Code Translation" 1772 }, 1773 { 1774 "id": "collaborative-agents-automated-2025", 1775 "title": "Collaborative Agents for Automated Program Repair in Ruby" 1776 }, 1777 { 1778 "id": "collubench-benchmark-predicting-2024", 1779 "title": "Collu-Bench: A Benchmark for Predicting Language Model Hallucinations in Code" 1780 }, 1781 { 1782 "id": "colm-collaborative-large-2025", 1783 "title": "CoLM: Collaborative Large Models via A Client-Server Paradigm" 1784 }, 1785 { 1786 "id": "colt-lightweight-multillm-2026", 1787 "title": "COLT: Lightweight Multi-LLM Collaboration through Shared MCTS Reasoning for Model Compilation" 1788 }, 1789 { 1790 "id": "comback-versatile-dataset-2024", 1791 "title": "ComBack: A Versatile Dataset for Enhancing Compiler Backend Development Efficiency" 1792 }, 1793 { 1794 "id": "combined-approach-program-2024", 1795 "title": "A Combined Approach of Program Analysis and Deep Learning for Code Completion" 1796 }, 1797 { 1798 "id": "combining-costconstrained-runtime-2025", 1799 "title": "Combining Cost-Constrained Runtime Monitors for AI Safety" 1800 }, 1801 { 1802 "id": "combining-large-language-2025", 1803 "title": "Combining Large Language Models with Static Analyzers for Code Review Generation" 1804 }, 1805 { 1806 "id": "comparative-analysis-pretrained-2025", 1807 "title": "Comparative Analysis of Pre-trained Code Language Models for Automated Program Repair via Code Infill Generation" 1808 }, 1809 { 1810 "id": "comparative-review-ai-2024", 1811 "title": "A Comparative Review of AI Techniques for Automated Code Generation in Software Development: Advancements, Challenges, and Future Directions" 1812 }, 1813 { 1814 "id": "comparative-study-ai-2025", 1815 "title": "A Comparative Study of AI and Human Evaluation for Student Website Projects" 1816 }, 1817 { 1818 "id": "comparative-study-dsl-2024", 1819 "title": "A Comparative Study of DSL Code Generation: Fine-Tuning vs. Optimized Retrieval Augmentation" 1820 }, 1821 { 1822 "id": "comparative-study-large-2025", 1823 "title": "A comparative study of large language models with chain-of-thought prompting for automated program repair" 1824 }, 1825 { 1826 "id": "comparing-codefree-bespoke-2024", 1827 "title": "Comparing code-free and bespoke deep learning approaches in ophthalmology" 1828 }, 1829 { 1830 "id": "compass-contrastive-learning-2026", 1831 "title": "ComPass: Contrastive Learning for Automated Patch Correctness Assessment in Program Repair" 1832 }, 1833 { 1834 "id": "competitive-programming-reasoning-models-2025", 1835 "title": "Competitive Programming with Large Reasoning Models" 1836 }, 1837 { 1838 "id": "compilable-neural-code-2022", 1839 "title": "Compilable Neural Code Generation with Compiler Feedback" 1840 }, 1841 { 1842 "id": "compilation-quotient-cq-2024", 1843 "title": "Compilation Quotient (CQ): A Metric for the Compilation Hardness of Programming Languages" 1844 }, 1845 { 1846 "id": "compiler-feedback-loops-2025", 1847 "title": "Feedback Loops and Code Perturbations in LLM-based Software Engineering: A Case Study on a C-to-Rust Translation System" 1848 }, 1849 { 1850 "id": "compilernext-searchbased-compiler-2025", 1851 "title": "Compiler.next: A Search-Based Compiler to Power the AI-Native Future of Software Engineering" 1852 }, 1853 { 1854 "id": "completion-by-comprehension-2025", 1855 "title": "Completion by Comprehension: Guiding Code Generation with Multi-Granularity Understanding" 1856 }, 1857 { 1858 "id": "complexcodeeval-benchmark-evaluating-2024", 1859 "title": "ComplexCodeEval: A Benchmark for Evaluating Large Code Models on More Complex Code" 1860 }, 1861 { 1862 "id": "compounding-reliability-2025", 1863 "title": "The Illusion of Diminishing Returns: Measuring Long Horizon Execution in LLMs" 1864 }, 1865 { 1866 "id": "comprehensive-analysis-machine-2025", 1867 "title": "Comprehensive Analysis of Machine Learning and Deep Learning models on Prompt Injection Classification using Natural Language Processing techniques" 1868 }, 1869 { 1870 "id": "comprehensive-llm-secure-code-2025", 1871 "title": "Rethinking the Evaluation of Secure Code Generation" 1872 }, 1873 { 1874 "id": "comprehensive-study-posttraining-2023", 1875 "title": "ZeroQuant-V2: Exploring Post-training Quantization in LLMs from Comprehensive Study to Low Rank Compensation" 1876 }, 1877 { 1878 "id": "comprehensive-survey-aidriven-2024", 1879 "title": "A Comprehensive Survey of AI-Driven Advancements and Techniques in Automated Program Repair and Code Generation" 1880 }, 1881 { 1882 "id": "comprehensive-survey-llm-2024", 1883 "title": "A Comprehensive Survey of LLM Alignment Techniques: RLHF, RLAIF, PPO, DPO and More" 1884 }, 1885 { 1886 "id": "comprehensive-survey-trustworthiness-2025", 1887 "title": "A Comprehensive Survey on Trustworthiness in Reasoning with Large Language Models" 1888 }, 1889 { 1890 "id": "comprehensive-taxonomy-hallucinations-2025", 1891 "title": "A comprehensive taxonomy of hallucinations in Large Language Models" 1892 }, 1893 { 1894 "id": "comprehensive-verilog-design-2025", 1895 "title": "Comprehensive Verilog Design Problems: A Next-Generation Benchmark Dataset for Evaluating Large Language Models and Agents on RTL Design and Verification" 1896 }, 1897 { 1898 "id": "compute-optimal-inference-2024", 1899 "title": "Scaling LLM Test-Time Compute Optimally can be More Effective than Scaling Model Parameters" 1900 }, 1901 { 1902 "id": "concept-influence-leveraging-2026", 1903 "title": "Concept Influence: Leveraging Interpretability to Improve Performance and Efficiency in Training Data Attribution" 1904 }, 1905 { 1906 "id": "conceptguard-neurosymbolic-safety-2025", 1907 "title": "ConceptGuard: Neuro-Symbolic Safety Guardrails via Sparse Interpretable Jailbreak Concepts" 1908 }, 1909 { 1910 "id": "concerned-data-contamination-2024", 1911 "title": "Concerned with Data Contamination? Assessing Countermeasures in Code Language Model" 1912 }, 1913 { 1914 "id": "conco-optimizing-compilation-2025", 1915 "title": "ConCo: Optimizing Compilation of Concurrent Tensor Programs on Shared GPU" 1916 }, 1917 { 1918 "id": "concordance-randomised-controlled-2024", 1919 "title": "Concordance of randomised controlled trials for artificial intelligence interventions with the CONSORT-AI reporting guidelines" 1920 }, 1921 { 1922 "id": "concrete-roadmap-safety-2025", 1923 "title": "A Concrete Roadmap towards Safety Cases based on Chain-of-Thought Monitoring" 1924 }, 1925 { 1926 "id": "condor-enhance-llm-2025", 1927 "title": "Condor: Enhance LLM Alignment with Knowledge-Driven Data Synthesis and Refinement" 1928 }, 1929 { 1930 "id": "confidencedriven-multiscale-model-2026", 1931 "title": "Confidence-Driven Multi-Scale Model Selection for Cost-Efficient Inference" 1932 }, 1933 { 1934 "id": "confidenceguided-stepwise-model-2025", 1935 "title": "Confidence-Guided Stepwise Model Routing for Cost-Efficient Reasoning" 1936 }, 1937 { 1938 "id": "configuring-agentic-coding-tools-2026", 1939 "title": "Configuring Agentic AI Coding Tools: An Exploratory Study" 1940 }, 1941 { 1942 "id": "conformal-constrained-policy-2025", 1943 "title": "Conformal Constrained Policy Optimization for Cost-Effective LLM Agents" 1944 }, 1945 { 1946 "id": "consistency-key-detecting-2025", 1947 "title": "Consistency Is the Key: Detecting Hallucinations in LLM Generated Text By Checking Inconsistencies About Key Facts" 1948 }, 1949 { 1950 "id": "constitutional-ai-2022", 1951 "title": "Constitutional AI: Harmlessness from AI Feedback" 1952 }, 1953 { 1954 "id": "constrained-decoding-diffusion-2025", 1955 "title": "Constrained Decoding of Diffusion LLMs with Context-Free Grammars" 1956 }, 1957 { 1958 "id": "constrained-decoding-fillinthemiddle-2024", 1959 "title": "Constrained Decoding for Fill-in-the-Middle Code Language Models via Efficient Left and Right Quotienting of Context-Sensitive Grammars" 1960 }, 1961 { 1962 "id": "context-composing-full-2024", 1963 "title": "Context Composing for Full Line Code Completion" 1964 }, 1965 { 1966 "id": "context-engineering-ai-2025", 1967 "title": "Context Engineering for AI Agents in Open-Source Software" 1968 }, 1969 { 1970 "id": "contextalignment-activating-enhancing-2025", 1971 "title": "Context-Alignment: Activating and Enhancing LLM Capabilities in Time Series" 1972 }, 1973 { 1974 "id": "contextaugmented-code-generation-2024", 1975 "title": "Context-Augmented Code Generation Using Programming Knowledge Graphs" 1976 }, 1977 { 1978 "id": "continuous-software-engineering-2024", 1979 "title": "Continuous Software Engineering Practices in AI/ML Development Past the Narrow Lens of MLOps: Adoption Challenges" 1980 }, 1981 { 1982 "id": "contrastrepair-enhancing-conversationbased-2024", 1983 "title": "ContrastRepair: Enhancing Conversation-Based Automated Program Repair via Contrastive Test Case Pairs" 1984 }, 1985 { 1986 "id": "control-models-inide-2026", 1987 "title": "Control Models for In-IDE Code Completion: Saving Inference Costs While Improving Completion Quality Metrics" 1988 }, 1989 { 1990 "id": "controlled-selfevolution-algorithmic-2026", 1991 "title": "Controlled Self-Evolution for Algorithmic Code Optimization" 1992 }, 1993 { 1994 "id": "convergence-dynamics-agenttoagent-2025", 1995 "title": "Convergence Dynamics of Agent-to-Agent Interactions with Misaligned Objectives" 1996 }, 1997 { 1998 "id": "cooperbench-why-coding-2026", 1999 "title": "CooperBench: Why Coding Agents Cannot be Your Teammates Yet" 2000 }, 2001 { 2002 "id": "copilot-arena-platform-2025", 2003 "title": "Copilot Arena: A Platform for Code LLM Evaluation in the Wild" 2004 }, 2005 { 2006 "id": "copilot-code-quality-empirical-2023", 2007 "title": "Evaluating the Code Quality of AI-Assisted Code Generation Tools: An Empirical Study on GitHub Copilot, Amazon CodeWhisperer, and ChatGPT" 2008 }, 2009 { 2010 "id": "copilot-efficiency-real-world-2024", 2011 "title": "Transforming Software Development: Evaluating the Efficiency and Challenges of GitHub Copilot in Real-World Projects" 2012 }, 2013 { 2014 "id": "copilot-evaluation-harness-2024", 2015 "title": "Copilot Evaluation Harness: Evaluating LLM-Guided Software Programming" 2016 }, 2017 { 2018 "id": "copilot-longitudinal-case-study-2025", 2019 "title": "Developer Productivity With and Without GitHub Copilot: A Longitudinal Mixed-Methods Case Study" 2020 }, 2021 { 2022 "id": "copilot-productivity-controlled-2023", 2023 "title": "The Impact of AI on Developer Productivity: Evidence from GitHub Copilot" 2024 }, 2025 { 2026 "id": "copilot-security-weaknesses-2023", 2027 "title": "Security Weaknesses of Copilot-Generated Code in GitHub Projects: An Empirical Study" 2028 }, 2029 { 2030 "id": "copilot-zoominfo-productivity-2025", 2031 "title": "Experience with GitHub Copilot for Developer Productivity at Zoominfo" 2032 }, 2033 { 2034 "id": "copiloting-copilots-fusing-2023", 2035 "title": "Copiloting the Copilots: Fusing Large Language Models with Completion Engines for Automated Program Repair" 2036 }, 2037 { 2038 "id": "coprompter-usercentric-evaluation-2024", 2039 "title": "CoPrompter: User-Centric Evaluation of LLM Instruction Alignment for Improved Prompt Engineering" 2040 }, 2041 { 2042 "id": "core-bench-computational-2024", 2043 "title": "CORE-Bench: Fostering the Credibility of Published Research Through a Computational Reproducibility Agent Benchmark" 2044 }, 2045 { 2046 "id": "core-comprehensive-ontological-2026", 2047 "title": "CORE: Comprehensive Ontological Relation Evaluation for Large Language Models" 2048 }, 2049 { 2050 "id": "corecodebench-decoupling-code-2025", 2051 "title": "CORECODEBENCH: Decoupling Code Intelligence via Fine-Grained Repository-Level Tasks" 2052 }, 2053 { 2054 "id": "correctnessguaranteed-code-generation-2025", 2055 "title": "Correctness-Guaranteed Code Generation via Constrained Decoding" 2056 }, 2057 { 2058 "id": "cosight-enhancing-llmbased-2025", 2059 "title": "Co-Sight: Enhancing LLM-Based Agents via Conflict-Aware Meta-Verification and Trustworthy Reasoning with Structured Facts" 2060 }, 2061 { 2062 "id": "cost-accuracy-longterm-2026", 2063 "title": "Cost and accuracy of long-term memory in Distributed Multi-Agent Systems based on Large Language Models" 2064 }, 2065 { 2066 "id": "cost-dynamic-reasoning-2025", 2067 "title": "The Cost of Dynamic Reasoning: Demystifying AI Agents and Test-Time Scaling from an AI Infrastructure Perspective" 2068 }, 2069 { 2070 "id": "cotbased-synthesizer-enhancing-2025", 2071 "title": "CoT-based Synthesizer: Enhancing LLM Performance through Answer Synthesis" 2072 }, 2073 { 2074 "id": "cotdeceptoradversarial-code-obfuscation-2025", 2075 "title": "CoTDeceptor: Adversarial Code Obfuscation Against CoT-Enhanced LLM Code Agents" 2076 }, 2077 { 2078 "id": "cotrag-integrating-chain-2025", 2079 "title": "CoT-RAG: Integrating Chain of Thought and Retrieval-Augmented Generation to Enhance Reasoning in Large Language Models" 2080 }, 2081 { 2082 "id": "cotran-llmbased-code-2023", 2083 "title": "CoTran: An LLM-based Code Translator using Reinforcement Learning with Feedback from Compiler and Symbolic Execution" 2084 }, 2085 { 2086 "id": "courtguard-local-multiagent-2025", 2087 "title": "CourtGuard: A Local, Multiagent Prompt Injection Classifier" 2088 }, 2089 { 2090 "id": "coverup-effective-high-2024", 2091 "title": "CoverUp: Effective High Coverage Test Generation for Python" 2092 }, 2093 { 2094 "id": "cracking-code-hallucination-2024", 2095 "title": "Cracking the Code of Hallucination in LVLMs with Vision-aware Head Divergence" 2096 }, 2097 { 2098 "id": "cracking-code-scoping-2024", 2099 "title": "Cracking the code: a scoping review to unite disciplines in tackling legal issues in health artificial intelligence" 2100 }, 2101 { 2102 "id": "cracking-sql-barriers-2025", 2103 "title": "Cracking SQL Barriers: An LLM-based Dialect Translation System" 2104 }, 2105 { 2106 "id": "creativeval-evaluating-creativity-2024", 2107 "title": "CreativEval: Evaluating Creativity of LLM-Based Hardware Code Generation" 2108 }, 2109 { 2110 "id": "critical-evaluation-defenses-2025", 2111 "title": "A Critical Evaluation of Defenses against Prompt Injection Attacks" 2112 }, 2113 { 2114 "id": "critical-review-large-2023", 2115 "title": "A Critical Review of Large Language Model on Software Engineering: An Example from ChatGPT and Automated Program Repair" 2116 }, 2117 { 2118 "id": "crosscodeeval-diverse-multilingual-2023", 2119 "title": "CROSSCODEEVAL: A Diverse and Multilingual Benchmark for Cross-File Code Completion" 2120 }, 2121 { 2122 "id": "crossllm-generalization-behavioral-2025", 2123 "title": "Cross-LLM Generalization of Behavioral Backdoor Detection in AI Agent Supply Chains" 2124 }, 2125 { 2126 "id": "crossmodal-memory-compression-2026", 2127 "title": "Cross-Modal Memory Compression for Efficient Multi-Agent Debate" 2128 }, 2129 { 2130 "id": "crossplatform-evaluation-large-2025", 2131 "title": "Cross-Platform Evaluation of Large Language Model Safety in Pediatric Consultations: Evolution of Adversarial Robustness and the Scale Paradox" 2132 }, 2133 { 2134 "id": "crqbench-benchmark-code-2024", 2135 "title": "CRQBench: A Benchmark of Code Reasoning Questions" 2136 }, 2137 { 2138 "id": "crscore-reinforcement-learning-2025", 2139 "title": "CRScore++: Reinforcement Learning with Verifiable Tool and AI Feedback for Code Review" 2140 }, 2141 { 2142 "id": "cruxeval-benchmark-code-2024", 2143 "title": "CRUXEval: A Benchmark for Code Reasoning, Understanding and Execution" 2144 }, 2145 { 2146 "id": "cruxevalx-benchmark-multilingual-2024", 2147 "title": "CRUXEVAL-X: A Benchmark for Multilingual Code Reasoning, Understanding and Execution" 2148 }, 2149 { 2150 "id": "crystalyse-multitool-agent-2025", 2151 "title": "Crystalyse: a multi-tool agent for materials design" 2152 }, 2153 { 2154 "id": "cuckoo-attack-stealthy-2025", 2155 "title": "Cuckoo Attack: Stealthy and Persistent Attacks Against AI-IDE" 2156 }, 2157 { 2158 "id": "cudaforge-agent-framework-2025", 2159 "title": "CudaForge: An Agent Framework with Hardware Feedback for CUDA Kernel Optimization" 2160 }, 2161 { 2162 "id": "cumo-scaling-multimodal-2024", 2163 "title": "CuMo: Scaling Multimodal LLM with Co-Upcycled Mixture-of-Experts" 2164 }, 2165 { 2166 "id": "curious-critical-thinker-2025", 2167 "title": "Curious, Critical Thinker, Empathetic, and Ethically Responsible: Essential Soft Skills for Data Scientists in Software Engineering" 2168 }, 2169 { 2170 "id": "current-challenges-software-2024", 2171 "title": "The Current Challenges of Software Engineering in the Era of Large Language Models" 2172 }, 2173 { 2174 "id": "curriculum-guided-massive-2025", 2175 "title": "Curriculum Guided Massive Multi Agent System Solving for Robust Long Horizon Tasks" 2176 }, 2177 { 2178 "id": "cweval-outcomedriven-evaluation-2025", 2179 "title": "CWEVAL: Outcome-driven Evaluation on Functionality and Security of LLM Code Generation" 2180 }, 2181 { 2182 "id": "cyberbot-ontologygrounded-retrieval-2025", 2183 "title": "CyberBOT: Towards Reliable Cybersecurity Education via Ontology-Grounded Retrieval Augmented Generation" 2184 }, 2185 { 2186 "id": "cyberphysical-system-defense-2025", 2187 "title": "Cyber-Physical System Defense Against Structured False Data Injection Attacks Using an Adaptive Security Framework with Passivity Enhancement" 2188 }, 2189 { 2190 "id": "cybersecurity-ai-hacking-2025", 2191 "title": "Cybersecurity AI: Hacking the AI Hackers via Prompt Injection" 2192 }, 2193 { 2194 "id": "dacode-agent-data-2024", 2195 "title": "DA-Code: Agent Data Science Code Generation Benchmark for Large Language Models" 2196 }, 2197 { 2198 "id": "dancing-critiques-enhancing-2025", 2199 "title": "Dancing with Critiques: Enhancing LLM Reasoning with Stepwise Natural Language Self-Critique" 2200 }, 2201 { 2202 "id": "dangers-poisoned-llms-2025", 2203 "title": "On The Dangers of Poisoned LLMs In Security Automation" 2204 }, 2205 { 2206 "id": "dapo-opensource-llm-2025", 2207 "title": "DAPO: An Open-Source LLM Reinforcement Learning System at Scale" 2208 }, 2209 { 2210 "id": "deep-rl-matters-2018", 2211 "title": "Deep Reinforcement Learning that Matters" 2212 }, 2213 { 2214 "id": "gans-created-equal-2018", 2215 "title": "Are GANs Created Equal? A Large-Scale Study" 2216 }, 2217 { 2218 "id": "leakage-reproducibility-crisis-2023", 2219 "title": "Leakage and the Reproducibility Crisis in ML-based Science" 2220 }, 2221 { 2222 "id": "lost-middle-how-2023", 2223 "title": "Lost in the Middle: How Language Models Use Long Contexts" 2224 }, 2225 { 2226 "id": "lost-mix-evaluating-2025", 2227 "title": "Lost in the Mix: Evaluating LLM Understanding of Code-Switched Text" 2228 }, 2229 { 2230 "id": "lpcd-unified-framework-2025", 2231 "title": "LPCD: Unified Framework from Layer-Wise to Submodule Quantization" 2232 }, 2233 { 2234 "id": "lutllm-efficient-large-2025", 2235 "title": "LUT-LLM: Efficient Large Language Model Inference with Memory-based Computations on FPGAs" 2236 }, 2237 { 2238 "id": "lynx-open-source-2024", 2239 "title": "Lynx: An Open Source Hallucination Evaluation Model" 2240 }, 2241 { 2242 "id": "mactg-multiagent-collaborative-2024", 2243 "title": "MaCTG: Multi-Agent Collaborative Thought Graph for Automatic Programming" 2244 }, 2245 { 2246 "id": "madspear-conformitydriven-prompt-2025", 2247 "title": "MAD-SPEAR: A Conformity-Driven Prompt Injection Attack on Multi-Agent Debate Systems" 2248 }, 2249 { 2250 "id": "maestro-multiagent-evaluation-2026", 2251 "title": "MAESTRO: Multi-Agent Evaluation Suite for Testing, Reliability, and Observability" 2252 }, 2253 { 2254 "id": "magentic-marketplace-opensource-2025", 2255 "title": "Magentic Marketplace: An Open-Source Environment for Studying Agentic Markets" 2256 }, 2257 { 2258 "id": "magenticone-generalist-multiagent-2024", 2259 "title": "Magentic-One: A Generalist Multi-Agent System for Solving Complex Tasks" 2260 }, 2261 { 2262 "id": "neurips-reproducibility-2021", 2263 "title": "Improving Reproducibility in Machine Learning Research (A Report from the NeurIPS 2019 Reproducibility Program)" 2264 }, 2265 { 2266 "id": "questionable-practices-ml-2024", 2267 "title": "Questionable practices in machine learning" 2268 }, 2269 { 2270 "id": "reforms-consensus-ml-2024", 2271 "title": "Determination of the force transmission error in a single-sinker magnetic suspension densimeter due to the fluid-specific effect and its correction for use with gas mixtures containing oxygen" 2272 }, 2273 { 2274 "id": "reproducibility-ml-overview-2025", 2275 "title": "Reproducibility in Machine Learning-based Research: Overview, Barriers and Drivers" 2276 }, 2277 { 2278 "id": "show-your-work-2019", 2279 "title": "Show Your Work: Improved Reporting of Experimental Results" 2280 }, 2281 { 2282 "id": "troubling-trends-ml-2018", 2283 "title": "Troubling Trends in Machine Learning Scholarship" 2284 }, 2285 { 2286 "id": "trust-ai-benchmarks-2025", 2287 "title": "Can We Trust AI Benchmarks? An Interdisciplinary Review of Current Issues in AI Evaluation" 2288 }, 2289 { 2290 "id": "wakefield-ileal-lymphoid-1998", 2291 "title": "Ileal-lymphoid-nodular hyperplasia, non-specific colitis, and pervasive developmental disorder in children" 2292 } 2293 ], 2294 "edges": [ 2295 { 2296 "source": "2025-ai-agent-2026", 2297 "target": "remote-labor-index-2025" 2298 }, 2299 { 2300 "source": "3dshape2vecset-3d-shape-2023", 2301 "target": "attention-is-all-you-need-2017" 2302 }, 2303 { 2304 "source": "3dshape2vecset-3d-shape-2023", 2305 "target": "bert-pretraining-deep-2018" 2306 }, 2307 { 2308 "source": "3dshape2vecset-3d-shape-2023", 2309 "target": "diffusionsdf-conditional-generative-2022" 2310 }, 2311 { 2312 "source": "a2hcoder-llmdriven-coding-2025", 2313 "target": "chatdev-communicative-agents-2023" 2314 }, 2315 { 2316 "source": "a2hcoder-llmdriven-coding-2025", 2317 "target": "metagpt-multi-agent-framework-2023" 2318 }, 2319 { 2320 "source": "a2hcoder-llmdriven-coding-2025", 2321 "target": "verimind-agentic-llm-2025" 2322 }, 2323 { 2324 "source": "a2hcoder-llmdriven-coding-2025", 2325 "target": "chateval-better-llmbased-2023" 2326 }, 2327 { 2328 "source": "a2hcoder-llmdriven-coding-2025", 2329 "target": "generative-agents-interactive-2023" 2330 }, 2331 { 2332 "source": "a2hcoder-llmdriven-coding-2025", 2333 "target": "gpt4-technical-report-2023" 2334 }, 2335 { 2336 "source": "aart-aiassisted-redteaming-2023", 2337 "target": "constitutional-ai-2022" 2338 }, 2339 { 2340 "source": "aart-aiassisted-redteaming-2023", 2341 "target": "chain-of-thought-prompting-2022" 2342 }, 2343 { 2344 "source": "aart-aiassisted-redteaming-2023", 2345 "target": "selfconsistency-improves-chain-2022" 2346 }, 2347 { 2348 "source": "aart-aiassisted-redteaming-2023", 2349 "target": "self-instruct-aligning-language-2022" 2350 }, 2351 { 2352 "source": "acar-adaptive-complexity-2026", 2353 "target": "scaffolded-model-capability-2023" 2354 }, 2355 { 2356 "source": "acar-adaptive-complexity-2026", 2357 "target": "react-synergizing-reasoning-2022" 2358 }, 2359 { 2360 "source": "accelerating-automatic-program-2025", 2361 "target": "rapgen-retrievalaugmented-patch-2023" 2362 }, 2363 { 2364 "source": "accelerating-automatic-program-2025", 2365 "target": "code-llama-2023" 2366 }, 2367 { 2368 "source": "accelerating-automatic-program-2025", 2369 "target": "starcoder2-2024" 2370 }, 2371 { 2372 "source": "accelerating-automatic-program-2025", 2373 "target": "empirical-study-finetuning-2023" 2374 }, 2375 { 2376 "source": "accelerating-automatic-program-2025", 2377 "target": "agentless-2024" 2378 }, 2379 { 2380 "source": "accelerating-automatic-program-2025", 2381 "target": "impact-code-language-2023" 2382 }, 2383 { 2384 "source": "accelerating-automatic-program-2025", 2385 "target": "repairllama-efficient-representations-2023" 2386 }, 2387 { 2388 "source": "accelerating-large-language-2023", 2389 "target": "fast-inference-from-2022" 2390 }, 2391 { 2392 "source": "accelerating-large-language-2023", 2393 "target": "codex-humaneval-2021" 2394 }, 2395 { 2396 "source": "accelerating-large-language-2023", 2397 "target": "chinchilla-compute-optimal-2022" 2398 }, 2399 { 2400 "source": "across-programming-language-2025", 2401 "target": "codex-humaneval-2021" 2402 }, 2403 { 2404 "source": "across-programming-language-2025", 2405 "target": "code-llama-2023" 2406 }, 2407 { 2408 "source": "across-programming-language-2025", 2409 "target": "coderagbench-can-retrieval-2024" 2410 }, 2411 { 2412 "source": "across-programming-language-2025", 2413 "target": "deepseek-coder-2024" 2414 }, 2415 { 2416 "source": "across-programming-language-2025", 2417 "target": "qwen25coder-technical-report-2024" 2418 }, 2419 { 2420 "source": "across-programming-language-2025", 2421 "target": "multiple-scalable-polyglot-2023" 2422 }, 2423 { 2424 "source": "across-programming-language-2025", 2425 "target": "empirical-study-retrievalaugmented-2025" 2426 }, 2427 { 2428 "source": "across-programming-language-2025", 2429 "target": "how-should-i-2025" 2430 }, 2431 { 2432 "source": "across-programming-language-2025", 2433 "target": "swe-bench-2023" 2434 }, 2435 { 2436 "source": "adafuse-adaptive-ensemble-2026", 2437 "target": "llama-3-herd-2024" 2438 }, 2439 { 2440 "source": "adafuse-adaptive-ensemble-2026", 2441 "target": "selfconsistency-improves-chain-2022" 2442 }, 2443 { 2444 "source": "adafuse-adaptive-ensemble-2026", 2445 "target": "scaffolded-model-capability-2023" 2446 }, 2447 { 2448 "source": "adafuse-adaptive-ensemble-2026", 2449 "target": "tree-thoughts-deliberate-2023" 2450 }, 2451 { 2452 "source": "adaplanner-adaptive-planning-2023", 2453 "target": "react-synergizing-reasoning-2022" 2454 }, 2455 { 2456 "source": "adaplanner-adaptive-planning-2023", 2457 "target": "reflexion-language-agents-2023" 2458 }, 2459 { 2460 "source": "adaplanner-adaptive-planning-2023", 2461 "target": "chain-of-thought-prompting-2022" 2462 }, 2463 { 2464 "source": "adaptevolve-improving-efficiency-2026", 2465 "target": "livecodebench-2024" 2466 }, 2467 { 2468 "source": "adaptevolve-improving-efficiency-2026", 2469 "target": "scaffolded-model-capability-2023" 2470 }, 2471 { 2472 "source": "adaptevolve-improving-efficiency-2026", 2473 "target": "selfconsistency-improves-chain-2022" 2474 }, 2475 { 2476 "source": "adapting-knowledge-prompt-2025", 2477 "target": "automated-program-repair-2022" 2478 }, 2479 { 2480 "source": "adapting-knowledge-prompt-2025", 2481 "target": "impact-code-language-2023" 2482 }, 2483 { 2484 "source": "adapting-knowledge-prompt-2025", 2485 "target": "empirical-study-finetuning-2023" 2486 }, 2487 { 2488 "source": "adapting-knowledge-prompt-2025", 2489 "target": "rapgen-retrievalaugmented-patch-2023" 2490 }, 2491 { 2492 "source": "adapting-knowledge-prompt-2025", 2493 "target": "how-effective-neural-2023" 2494 }, 2495 { 2496 "source": "adaptive-attacks-break-2025", 2497 "target": "agentdojo-dynamic-environment-2024" 2498 }, 2499 { 2500 "source": "adaptive-attacks-break-2025", 2501 "target": "jatmo-prompt-injection-2023" 2502 }, 2503 { 2504 "source": "adaptive-attacks-break-2025", 2505 "target": "not-what-youve-2023" 2506 }, 2507 { 2508 "source": "adaptive-attacks-break-2025", 2509 "target": "react-synergizing-reasoning-2022" 2510 }, 2511 { 2512 "source": "adaptive-attacks-bypass-defenses-2025", 2513 "target": "agentdojo-dynamic-environment-2024" 2514 }, 2515 { 2516 "source": "adaptive-attacks-bypass-defenses-2025", 2517 "target": "not-what-youve-2023" 2518 }, 2519 { 2520 "source": "adaptive-attacks-bypass-defenses-2025", 2521 "target": "jailbreaking-safety-aligned-llms-2024" 2522 }, 2523 { 2524 "source": "adaptive-attacks-bypass-defenses-2025", 2525 "target": "datasentinel-gametheoretic-detection-2025" 2526 }, 2527 { 2528 "source": "adaptive-attacks-bypass-defenses-2025", 2529 "target": "melon-provable-defense-2025" 2530 }, 2531 { 2532 "source": "adaptive-test-generation-2023", 2533 "target": "codamosa-escaping-coverage-2023" 2534 }, 2535 { 2536 "source": "adaptive-test-generation-2023", 2537 "target": "codex-humaneval-2021" 2538 }, 2539 { 2540 "source": "adaptrack-constrained-decoding-2025", 2541 "target": "codex-humaneval-2021" 2542 }, 2543 { 2544 "source": "adaptrack-constrained-decoding-2025", 2545 "target": "monitorguided-decoding-code-2023" 2546 }, 2547 { 2548 "source": "adaptrack-constrained-decoding-2025", 2549 "target": "copiloting-copilots-fusing-2023" 2550 }, 2551 { 2552 "source": "adaptrack-constrained-decoding-2025", 2553 "target": "swe-bench-2023" 2554 }, 2555 { 2556 "source": "adaptrack-constrained-decoding-2025", 2557 "target": "deepseek-coder-2024" 2558 }, 2559 { 2560 "source": "adas-automated-design-2024", 2561 "target": "swe-bench-2023" 2562 }, 2563 { 2564 "source": "adas-automated-design-2024", 2565 "target": "chain-of-thought-prompting-2022" 2566 }, 2567 { 2568 "source": "adas-automated-design-2024", 2569 "target": "reflexion-language-agents-2023" 2570 }, 2571 { 2572 "source": "adas-automated-design-2024", 2573 "target": "ai-scientist-fully-2024" 2574 }, 2575 { 2576 "source": "adas-automated-design-2024", 2577 "target": "improving-factuality-reasoning-2023" 2578 }, 2579 { 2580 "source": "adas-automated-design-2024", 2581 "target": "voyager-open-ended-2023" 2582 }, 2583 { 2584 "source": "adoption-generative-artificial-2026", 2585 "target": "metr-rct-2025" 2586 }, 2587 { 2588 "source": "adoption-generative-artificial-2026", 2589 "target": "copilot-productivity-controlled-2023" 2590 }, 2591 { 2592 "source": "adoption-generative-artificial-2026", 2593 "target": "navigating-complexity-generative-2023" 2594 }, 2595 { 2596 "source": "advancements-generative-ai-2023", 2597 "target": "attention-is-all-you-need-2017" 2598 }, 2599 { 2600 "source": "advancements-generative-ai-2023", 2601 "target": "gpt4-technical-report-2023" 2602 }, 2603 { 2604 "source": "advancements-generative-ai-2023", 2605 "target": "codex-humaneval-2021" 2606 }, 2607 { 2608 "source": "advancements-generative-ai-2023", 2609 "target": "alphacode-competition-level-2022" 2610 }, 2611 { 2612 "source": "advancing-code-generation-2025", 2613 "target": "codex-humaneval-2021" 2614 }, 2615 { 2616 "source": "advancing-code-generation-2025", 2617 "target": "your-code-generated-2023" 2618 }, 2619 { 2620 "source": "advancing-code-generation-2025", 2621 "target": "metagpt-multi-agent-framework-2023" 2622 }, 2623 { 2624 "source": "advancing-code-generation-2025", 2625 "target": "chatdev-communicative-agents-2023" 2626 }, 2627 { 2628 "source": "advancing-code-generation-2025", 2629 "target": "survey-llm-code-generation-2025" 2630 }, 2631 { 2632 "source": "advancing-code-generation-2025", 2633 "target": "rise-potential-large-2023" 2634 }, 2635 { 2636 "source": "advancing-code-generation-2025", 2637 "target": "clarifygpt-empowering-llmbased-2023" 2638 }, 2639 { 2640 "source": "advancing-language-model-2025", 2641 "target": "compute-optimal-inference-2024" 2642 }, 2643 { 2644 "source": "advancing-language-model-2025", 2645 "target": "chain-of-thought-prompting-2022" 2646 }, 2647 { 2648 "source": "advancing-language-model-2025", 2649 "target": "qwen25-technical-report-2024" 2650 }, 2651 { 2652 "source": "advancing-language-model-2025", 2653 "target": "gpqa-graduatelevel-googleproof-2023" 2654 }, 2655 { 2656 "source": "advancing-largemolecule-discovery-2025", 2657 "target": "attention-is-all-you-need-2017" 2658 }, 2659 { 2660 "source": "advancing-methodological-development-2025", 2661 "target": "emergent-abilities-large-2022" 2662 }, 2663 { 2664 "source": "advancing-methodological-development-2025", 2665 "target": "emergent-abilities-mirage-2023" 2666 }, 2667 { 2668 "source": "advancing-nursing-regulation-2025", 2669 "target": "metr-rct-2025" 2670 }, 2671 { 2672 "source": "advancing-software-quality-2025", 2673 "target": "codex-humaneval-2021" 2674 }, 2675 { 2676 "source": "advancing-software-quality-2025", 2677 "target": "test-driven-interactive-code-gen-2024" 2678 }, 2679 { 2680 "source": "advancing-software-quality-2025", 2681 "target": "aipowered-code-review-2024" 2682 }, 2683 { 2684 "source": "advancing-software-quality-2025", 2685 "target": "llmassisted-static-analysis-2024" 2686 }, 2687 { 2688 "source": "adversarial-bug-reports-2025", 2689 "target": "swe-bench-2023" 2690 }, 2691 { 2692 "source": "adversarial-bug-reports-2025", 2693 "target": "swe-agent-2024" 2694 }, 2695 { 2696 "source": "adversarial-bug-reports-2025", 2697 "target": "repairagent-llm-bug-repair-2024" 2698 }, 2699 { 2700 "source": "adversarial-bug-reports-2025", 2701 "target": "openhands-ai-sw-agent-2024" 2702 }, 2703 { 2704 "source": "adversarial-bug-reports-2025", 2705 "target": "not-what-youve-2023" 2706 }, 2707 { 2708 "source": "adversarial-threat-vectors-2025", 2709 "target": "attention-is-all-you-need-2017" 2710 }, 2711 { 2712 "source": "advevomarl-shaping-internalized-2025", 2713 "target": "livecodebench-2024" 2714 }, 2715 { 2716 "source": "advevomarl-shaping-internalized-2025", 2717 "target": "gpqa-graduatelevel-googleproof-2023" 2718 }, 2719 { 2720 "source": "advevomarl-shaping-internalized-2025", 2721 "target": "evomarl-coevolutionary-multiagent-2025" 2722 }, 2723 { 2724 "source": "aegis-automated-coevolutionary-2025", 2725 "target": "defending-against-indirect-2024" 2726 }, 2727 { 2728 "source": "aegis-automated-coevolutionary-2025", 2729 "target": "promptarmor-simple-yet-2025" 2730 }, 2731 { 2732 "source": "aegis20-diverse-ai-2025", 2733 "target": "beavertails-improved-safety-2023" 2734 }, 2735 { 2736 "source": "aegis20-diverse-ai-2025", 2737 "target": "aart-aiassisted-redteaming-2023" 2738 }, 2739 { 2740 "source": "aegisagent-autonomous-defense-2025", 2741 "target": "chain-of-thought-prompting-2022" 2742 }, 2743 { 2744 "source": "aegisagent-autonomous-defense-2025", 2745 "target": "selfconsistency-improves-chain-2022" 2746 }, 2747 { 2748 "source": "aegisagent-autonomous-defense-2025", 2749 "target": "prompt-injection-llm-apps-2023" 2750 }, 2751 { 2752 "source": "aegisagent-autonomous-defense-2025", 2753 "target": "gemma-open-models-2024" 2754 }, 2755 { 2756 "source": "agent-contracts-formal-2026", 2757 "target": "uc-berkeley-mast-2025" 2758 }, 2759 { 2760 "source": "agent-contracts-formal-2026", 2761 "target": "livecodebench-2024" 2762 }, 2763 { 2764 "source": "agent-contracts-formal-2026", 2765 "target": "metagpt-multi-agent-framework-2023" 2766 }, 2767 { 2768 "source": "agent-contracts-formal-2026", 2769 "target": "autogen-multi-agent-2023" 2770 }, 2771 { 2772 "source": "agent-contracts-formal-2026", 2773 "target": "react-synergizing-reasoning-2022" 2774 }, 2775 { 2776 "source": "agent-contracts-formal-2026", 2777 "target": "multi-agent-collaboration-survey-2025" 2778 }, 2779 { 2780 "source": "agent-developer-practices-2025", 2781 "target": "uc-berkeley-mast-2025" 2782 }, 2783 { 2784 "source": "agent-developer-practices-2025", 2785 "target": "metagpt-multi-agent-framework-2023" 2786 }, 2787 { 2788 "source": "agent-developer-practices-2025", 2789 "target": "agentless-2024" 2790 }, 2791 { 2792 "source": "agent-developer-practices-2025", 2793 "target": "react-synergizing-reasoning-2022" 2794 }, 2795 { 2796 "source": "agent-error-taxonomy-2025", 2797 "target": "react-synergizing-reasoning-2022" 2798 }, 2799 { 2800 "source": "agent-error-taxonomy-2025", 2801 "target": "tree-thoughts-deliberate-2023" 2802 }, 2803 { 2804 "source": "agent-error-taxonomy-2025", 2805 "target": "reflexion-language-agents-2023" 2806 }, 2807 { 2808 "source": "agent-error-taxonomy-2025", 2809 "target": "gaia-benchmark-general-2023" 2810 }, 2811 { 2812 "source": "agent-error-taxonomy-2025", 2813 "target": "uc-berkeley-mast-2025" 2814 }, 2815 { 2816 "source": "agent-error-taxonomy-2025", 2817 "target": "which-agent-causes-2025" 2818 }, 2819 { 2820 "source": "agent-error-taxonomy-2025", 2821 "target": "toolllm-facilitating-large-2023" 2822 }, 2823 { 2824 "source": "agent-security-bench-2024", 2825 "target": "agentdojo-dynamic-environment-2024" 2826 }, 2827 { 2828 "source": "agent-security-bench-2024", 2829 "target": "formalizing-benchmarking-prompt-2023" 2830 }, 2831 { 2832 "source": "agent-security-bench-2024", 2833 "target": "not-what-youve-2023" 2834 }, 2835 { 2836 "source": "agent-security-bench-2024", 2837 "target": "react-synergizing-reasoning-2022" 2838 }, 2839 { 2840 "source": "agent-security-bench-2024", 2841 "target": "sleeper-agents-2024" 2842 }, 2843 { 2844 "source": "agentasajudge-evaluate-agents-2024", 2845 "target": "judging-llmasajudge-mtbench-2023" 2846 }, 2847 { 2848 "source": "agentasajudge-evaluate-agents-2024", 2849 "target": "swe-bench-2023" 2850 }, 2851 { 2852 "source": "agentasajudge-evaluate-agents-2024", 2853 "target": "openhands-ai-sw-agent-2024" 2854 }, 2855 { 2856 "source": "agentasajudge-evaluate-agents-2024", 2857 "target": "metagpt-multi-agent-framework-2023" 2858 }, 2859 { 2860 "source": "agentasajudge-evaluate-agents-2024", 2861 "target": "mlebench-evaluating-machine-2024" 2862 }, 2863 { 2864 "source": "agentasajudge-evaluate-agents-2024", 2865 "target": "swe-agent-2024" 2866 }, 2867 { 2868 "source": "agentasajudge-evaluate-agents-2024", 2869 "target": "codex-humaneval-2021" 2870 }, 2871 { 2872 "source": "agentasajudge-evaluate-agents-2024", 2873 "target": "chateval-better-llmbased-2023" 2874 }, 2875 { 2876 "source": "agentasajudge-evaluate-agents-2024", 2877 "target": "agentless-2024" 2878 }, 2879 { 2880 "source": "agentask-multiagent-systems-2025", 2881 "target": "metagpt-multi-agent-framework-2023" 2882 }, 2883 { 2884 "source": "agentask-multiagent-systems-2025", 2885 "target": "chatdev-communicative-agents-2023" 2886 }, 2887 { 2888 "source": "agentask-multiagent-systems-2025", 2889 "target": "reflexion-language-agents-2023" 2890 }, 2891 { 2892 "source": "agentask-multiagent-systems-2025", 2893 "target": "agentbench-evaluating-llms-2023" 2894 }, 2895 { 2896 "source": "agentask-multiagent-systems-2025", 2897 "target": "which-agent-causes-2025" 2898 }, 2899 { 2900 "source": "agentask-multiagent-systems-2025", 2901 "target": "deepseek-r1-2025" 2902 }, 2903 { 2904 "source": "agentbased-evaluation-framework-2025", 2905 "target": "codejudge-evaluating-code-2024" 2906 }, 2907 { 2908 "source": "agentbased-evaluation-framework-2025", 2909 "target": "codex-humaneval-2021" 2910 }, 2911 { 2912 "source": "agentbased-evaluation-framework-2025", 2913 "target": "judging-llmasajudge-mtbench-2023" 2914 }, 2915 { 2916 "source": "agentbased-evaluation-framework-2025", 2917 "target": "rise-potential-large-2023" 2918 }, 2919 { 2920 "source": "agentbased-evaluation-framework-2025", 2921 "target": "llm-agents-se-survey-2024" 2922 }, 2923 { 2924 "source": "agentbased-evaluation-framework-2025", 2925 "target": "chatdev-communicative-agents-2023" 2926 }, 2927 { 2928 "source": "agentbased-evaluation-framework-2025", 2929 "target": "can-llms-replace-2025" 2930 }, 2931 { 2932 "source": "agentbench-evaluating-llms-2023", 2933 "target": "react-synergizing-reasoning-2022" 2934 }, 2935 { 2936 "source": "agentbench-evaluating-llms-2023", 2937 "target": "mind2web-generalist-agent-2023" 2938 }, 2939 { 2940 "source": "agentbench-evaluating-llms-2023", 2941 "target": "chain-of-thought-prompting-2022" 2942 }, 2943 { 2944 "source": "agentbench-evaluating-llms-2023", 2945 "target": "reflexion-language-agents-2023" 2946 }, 2947 { 2948 "source": "agentbench-evaluating-llms-2023", 2949 "target": "generative-agents-interactive-2023" 2950 }, 2951 { 2952 "source": "agentbench-evaluating-llms-2023", 2953 "target": "codex-humaneval-2021" 2954 }, 2955 { 2956 "source": "agentbench-evaluating-llms-2023", 2957 "target": "metagpt-multi-agent-framework-2023" 2958 }, 2959 { 2960 "source": "agentbench-evaluating-llms-2023", 2961 "target": "autogen-multi-agent-2023" 2962 }, 2963 { 2964 "source": "agentdojo-dynamic-environment-2024", 2965 "target": "agentbench-evaluating-llms-2023" 2966 }, 2967 { 2968 "source": "agentdojo-dynamic-environment-2024", 2969 "target": "webarena-autonomous-agents-2023" 2970 }, 2971 { 2972 "source": "agentdojo-dynamic-environment-2024", 2973 "target": "not-what-youve-2023" 2974 }, 2975 { 2976 "source": "agentdojo-dynamic-environment-2024", 2977 "target": "react-synergizing-reasoning-2022" 2978 }, 2979 { 2980 "source": "agentdojo-dynamic-environment-2024", 2981 "target": "defending-against-indirect-2024" 2982 }, 2983 { 2984 "source": "agentfuzzer-generic-blackbox-2025", 2985 "target": "agentdojo-dynamic-environment-2024" 2986 }, 2987 { 2988 "source": "agentfuzzer-generic-blackbox-2025", 2989 "target": "formalizing-benchmarking-prompt-2023" 2990 }, 2991 { 2992 "source": "agentfuzzer-generic-blackbox-2025", 2993 "target": "defending-against-indirect-2024" 2994 }, 2995 { 2996 "source": "agentfuzzer-generic-blackbox-2025", 2997 "target": "webarena-autonomous-agents-2023" 2998 }, 2999 { 3000 "source": "agentic-adoption-github-2026", 3001 "target": "metr-rct-2025" 3002 }, 3003 { 3004 "source": "agentic-adoption-github-2026", 3005 "target": "swe-bench-2023" 3006 }, 3007 { 3008 "source": "agentic-adoption-github-2026", 3009 "target": "cursor-speed-quality-tradeoff-2025" 3010 }, 3011 { 3012 "source": "agentic-adoption-github-2026", 3013 "target": "understanding-software-engineering-2025" 3014 }, 3015 { 3016 "source": "agentic-adoption-github-2026", 3017 "target": "copilot-productivity-controlled-2023" 3018 }, 3019 { 3020 "source": "agentic-adoption-github-2026", 3021 "target": "context-engineering-ai-2025" 3022 }, 3023 { 3024 "source": "agentic-adoption-github-2026", 3025 "target": "agentless-2024" 3026 }, 3027 { 3028 "source": "agentic-adoption-github-2026", 3029 "target": "promises-perils-timely-2026" 3030 }, 3031 { 3032 "source": "agentic-adoption-github-2026", 3033 "target": "measuring-ai-ability-2025" 3034 }, 3035 { 3036 "source": "agentic-ai-architectures-2026", 3037 "target": "react-synergizing-reasoning-2022" 3038 }, 3039 { 3040 "source": "agentic-ai-architectures-2026", 3041 "target": "swe-agent-2024" 3042 }, 3043 { 3044 "source": "agentic-ai-architectures-2026", 3045 "target": "survey-autonomous-llm-agents-2023" 3046 }, 3047 { 3048 "source": "agentic-ai-architectures-2026", 3049 "target": "metagpt-multi-agent-framework-2023" 3050 }, 3051 { 3052 "source": "agentic-ai-architectures-2026", 3053 "target": "voyager-open-ended-2023" 3054 }, 3055 { 3056 "source": "agentic-ai-architectures-2026", 3057 "target": "reflexion-language-agents-2023" 3058 }, 3059 { 3060 "source": "agentic-ai-architectures-2026", 3061 "target": "tree-thoughts-deliberate-2023" 3062 }, 3063 { 3064 "source": "agentic-ai-architectures-2026", 3065 "target": "autogen-enabling-nextgen-2023" 3066 }, 3067 { 3068 "source": "agentic-ai-architectures-2026", 3069 "target": "osworld-benchmarking-multimodal-2024" 3070 }, 3071 { 3072 "source": "agentic-ai-architectures-2026", 3073 "target": "gaia-benchmark-general-2023" 3074 }, 3075 { 3076 "source": "agentic-ai-architectures-2026", 3077 "target": "formalizing-benchmarking-prompt-2023" 3078 }, 3079 { 3080 "source": "agentic-ai-architectures-2026", 3081 "target": "agentbench-evaluating-llms-2023" 3082 }, 3083 { 3084 "source": "agentic-ai-assessment-framework-2025", 3085 "target": "uc-berkeley-mast-2025" 3086 }, 3087 { 3088 "source": "agentic-ai-assessment-framework-2025", 3089 "target": "which-agent-causes-2025" 3090 }, 3091 { 3092 "source": "agentic-ai-assessment-framework-2025", 3093 "target": "llm-long-term-memory-eval-2024" 3094 }, 3095 { 3096 "source": "agentic-ai-modernization-2026", 3097 "target": "autogen-multi-agent-2023" 3098 }, 3099 { 3100 "source": "agentic-ai-modernization-2026", 3101 "target": "agentmesh-cooperative-multiagent-2025" 3102 }, 3103 { 3104 "source": "agentic-ai-modernization-2026", 3105 "target": "codex-humaneval-2021" 3106 }, 3107 { 3108 "source": "agentic-ai-modernization-2026", 3109 "target": "survey-code-gen-llm-agents-2025" 3110 }, 3111 { 3112 "source": "agentic-ai-security-survey-2025", 3113 "target": "agentdojo-dynamic-environment-2024" 3114 }, 3115 { 3116 "source": "agentic-ai-security-survey-2025", 3117 "target": "webarena-autonomous-agents-2023" 3118 }, 3119 { 3120 "source": "agentic-ai-security-survey-2025", 3121 "target": "adaptive-attacks-break-2025" 3122 }, 3123 { 3124 "source": "agentic-ai-security-survey-2025", 3125 "target": "not-what-youve-2023" 3126 }, 3127 { 3128 "source": "agentic-ai-security-survey-2025", 3129 "target": "bench-benchmark-toolagentuser-2024" 3130 }, 3131 { 3132 "source": "agentic-ai-security-survey-2025", 3133 "target": "osworld-benchmarking-multimodal-2024" 3134 }, 3135 { 3136 "source": "agentic-ai-software-2025", 3137 "target": "swe-bench-2023" 3138 }, 3139 { 3140 "source": "agentic-ai-software-2025", 3141 "target": "swe-agent-2024" 3142 }, 3143 { 3144 "source": "agentic-ai-software-2025", 3145 "target": "openhands-ai-sw-agent-2024" 3146 }, 3147 { 3148 "source": "agentic-ai-software-2025", 3149 "target": "repoaudit-autonomous-llmagent-2025" 3150 }, 3151 { 3152 "source": "agentic-ai-software-2025-2", 3153 "target": "repairagent-llm-bug-repair-2024" 3154 }, 3155 { 3156 "source": "agentic-ai-software-2025-2", 3157 "target": "swelancer-can-frontier-2025" 3158 }, 3159 { 3160 "source": "agentic-ai-software-2025-2", 3161 "target": "codeaware-prompting-study-2024" 3162 }, 3163 { 3164 "source": "agentic-ai-software-2025-2", 3165 "target": "swe-agent-2024" 3166 }, 3167 { 3168 "source": "agentic-bug-reproduction-2025", 3169 "target": "swtbench-testing-validating-2024" 3170 }, 3171 { 3172 "source": "agentic-bug-reproduction-2025", 3173 "target": "swe-bench-2023" 3174 }, 3175 { 3176 "source": "agentic-bug-reproduction-2025", 3177 "target": "swe-agent-2024" 3178 }, 3179 { 3180 "source": "agentic-bug-reproduction-2025", 3181 "target": "react-synergizing-reasoning-2022" 3182 }, 3183 { 3184 "source": "agentic-bug-reproduction-2025", 3185 "target": "codex-humaneval-2021" 3186 }, 3187 { 3188 "source": "agentic-bug-reproduction-2025", 3189 "target": "llms-se-systematic-review-2023" 3190 }, 3191 { 3192 "source": "agentic-programming-survey-2025", 3193 "target": "swe-bench-2023" 3194 }, 3195 { 3196 "source": "agentic-programming-survey-2025", 3197 "target": "swe-agent-2024" 3198 }, 3199 { 3200 "source": "agentic-programming-survey-2025", 3201 "target": "chatdev-communicative-agents-2023" 3202 }, 3203 { 3204 "source": "agentic-programming-survey-2025", 3205 "target": "llms-se-systematic-review-2023" 3206 }, 3207 { 3208 "source": "agentic-programming-survey-2025", 3209 "target": "llm-agents-se-survey-2024" 3210 }, 3211 { 3212 "source": "agentic-programming-survey-2025", 3213 "target": "react-synergizing-reasoning-2022" 3214 }, 3215 { 3216 "source": "agentic-programming-survey-2025", 3217 "target": "voyager-open-ended-2023" 3218 }, 3219 { 3220 "source": "agentic-programming-survey-2025", 3221 "target": "openhands-ai-sw-agent-2024" 3222 }, 3223 { 3224 "source": "agentic-programming-survey-2025", 3225 "target": "livecodebench-2024" 3226 }, 3227 { 3228 "source": "agentic-programming-survey-2025", 3229 "target": "reflexion-language-agents-2023" 3230 }, 3231 { 3232 "source": "agentic-refactoring-empirical-2025", 3233 "target": "rise-ai-teammates-2025" 3234 }, 3235 { 3236 "source": "agentic-software-engineering-2025", 3237 "target": "software-engineering-by-2025" 3238 }, 3239 { 3240 "source": "agentic-software-engineering-2025", 3241 "target": "llms-se-systematic-review-2023" 3242 }, 3243 { 3244 "source": "agentic-software-engineering-2025", 3245 "target": "rise-ai-teammates-2025" 3246 }, 3247 { 3248 "source": "agentic-software-engineering-2025", 3249 "target": "agentic-ai-software-2025-2" 3250 }, 3251 { 3252 "source": "agentic-software-engineering-2025", 3253 "target": "agentic-programming-survey-2025" 3254 }, 3255 { 3256 "source": "agentless-2024", 3257 "target": "swe-bench-2023" 3258 }, 3259 { 3260 "source": "agentless-2024", 3261 "target": "swe-agent-2024" 3262 }, 3263 { 3264 "source": "agentless-2024", 3265 "target": "llm-agents-se-survey-2024" 3266 }, 3267 { 3268 "source": "agentless-2024", 3269 "target": "masai-modular-architecture-2024" 3270 }, 3271 { 3272 "source": "agentless-2024", 3273 "target": "automated-program-repair-2022" 3274 }, 3275 { 3276 "source": "agentless-2024", 3277 "target": "systematic-literature-review-2024" 3278 }, 3279 { 3280 "source": "agentless-2024", 3281 "target": "rise-potential-large-2023" 3282 }, 3283 { 3284 "source": "agentmesh-cooperative-multiagent-2025", 3285 "target": "chatdev-communicative-agents-2023" 3286 }, 3287 { 3288 "source": "agents-of-chaos-2026", 3289 "target": "not-what-youve-2023" 3290 }, 3291 { 3292 "source": "agents-of-chaos-2026", 3293 "target": "frontier-models-in-context-scheming-2024" 3294 }, 3295 { 3296 "source": "agents4plc-automating-closedloop-2024", 3297 "target": "chatdev-communicative-agents-2023" 3298 }, 3299 { 3300 "source": "agents4plc-automating-closedloop-2024", 3301 "target": "metagpt-multi-agent-framework-2023" 3302 }, 3303 { 3304 "source": "agents4plc-automating-closedloop-2024", 3305 "target": "mapcoder-multiagent-code-2024" 3306 }, 3307 { 3308 "source": "agents4plc-automating-closedloop-2024", 3309 "target": "autogen-enabling-nextgen-2023" 3310 }, 3311 { 3312 "source": "agents4plc-automating-closedloop-2024", 3313 "target": "llm-agents-se-survey-2024" 3314 }, 3315 { 3316 "source": "agents4plc-automating-closedloop-2024", 3317 "target": "code-llama-2023" 3318 }, 3319 { 3320 "source": "agents4plc-automating-closedloop-2024", 3321 "target": "automated-program-repair-2023" 3322 }, 3323 { 3324 "source": "agents4plc-automating-closedloop-2024", 3325 "target": "survey-autonomous-llm-agents-2023" 3326 }, 3327 { 3328 "source": "agents4plc-automating-closedloop-2024", 3329 "target": "llms-se-systematic-review-2023" 3330 }, 3331 { 3332 "source": "agentsllm-augmentative-generation-2025", 3333 "target": "chatbot-arena-open-2024" 3334 }, 3335 { 3336 "source": "agentsllm-augmentative-generation-2025", 3337 "target": "react-synergizing-reasoning-2022" 3338 }, 3339 { 3340 "source": "agentsllm-augmentative-generation-2025", 3341 "target": "autogen-multi-agent-2023" 3342 }, 3343 { 3344 "source": "agentsllm-augmentative-generation-2025", 3345 "target": "chain-of-thought-prompting-2022" 3346 }, 3347 { 3348 "source": "agentsllm-augmentative-generation-2025", 3349 "target": "toolformer-language-models-2023" 3350 }, 3351 { 3352 "source": "agentsnet-coordination-collaborative-2025", 3353 "target": "agentbench-evaluating-llms-2023" 3354 }, 3355 { 3356 "source": "agentsnet-coordination-collaborative-2025", 3357 "target": "llmcoordination-evaluating-analyzing-2023" 3358 }, 3359 { 3360 "source": "agentsnet-coordination-collaborative-2025", 3361 "target": "improving-factuality-reasoning-2023" 3362 }, 3363 { 3364 "source": "agentsnet-coordination-collaborative-2025", 3365 "target": "emergent-abilities-mirage-2023" 3366 }, 3367 { 3368 "source": "agentsnet-coordination-collaborative-2025", 3369 "target": "generative-agents-interactive-2023" 3370 }, 3371 { 3372 "source": "agentsnet-coordination-collaborative-2025", 3373 "target": "bench-benchmark-toolagentuser-2024" 3374 }, 3375 { 3376 "source": "agentspawn-adaptive-multiagent-2026", 3377 "target": "survey-code-gen-llm-agents-2025" 3378 }, 3379 { 3380 "source": "agentspawn-adaptive-multiagent-2026", 3381 "target": "metagpt-multi-agent-framework-2023" 3382 }, 3383 { 3384 "source": "agentspawn-adaptive-multiagent-2026", 3385 "target": "chatdev-communicative-agents-2023" 3386 }, 3387 { 3388 "source": "agentspawn-adaptive-multiagent-2026", 3389 "target": "memgpt-llms-as-2023" 3390 }, 3391 { 3392 "source": "agentspawn-adaptive-multiagent-2026", 3393 "target": "multi-agent-collaboration-survey-2025" 3394 }, 3395 { 3396 "source": "agentspawn-adaptive-multiagent-2026", 3397 "target": "plan-and-act-long-horizon-2025" 3398 }, 3399 { 3400 "source": "agentspawn-adaptive-multiagent-2026", 3401 "target": "agentic-programming-survey-2025" 3402 }, 3403 { 3404 "source": "agentspawn-adaptive-multiagent-2026", 3405 "target": "when-singleagent-skills-2026" 3406 }, 3407 { 3408 "source": "agenttypo-adaptive-typographic-2025", 3409 "target": "visualwebarena-evaluating-multimodal-2024" 3410 }, 3411 { 3412 "source": "agenttypo-adaptive-typographic-2025", 3413 "target": "not-what-youve-2023" 3414 }, 3415 { 3416 "source": "agentvigil-generic-blackbox-2025", 3417 "target": "agentdojo-dynamic-environment-2024" 3418 }, 3419 { 3420 "source": "agentvigil-generic-blackbox-2025", 3421 "target": "not-what-youve-2023" 3422 }, 3423 { 3424 "source": "agentvigil-generic-blackbox-2025", 3425 "target": "formalizing-benchmarking-prompt-2023" 3426 }, 3427 { 3428 "source": "agentvigil-generic-blackbox-2025", 3429 "target": "defending-against-indirect-2024" 3430 }, 3431 { 3432 "source": "agentvigil-generic-blackbox-2025", 3433 "target": "webarena-autonomous-agents-2023" 3434 }, 3435 { 3436 "source": "agentvigil-generic-blackbox-2025", 3437 "target": "visualwebarena-evaluating-multimodal-2024" 3438 }, 3439 { 3440 "source": "agint-agentic-graph-2025", 3441 "target": "masai-modular-architecture-2024" 3442 }, 3443 { 3444 "source": "agint-agentic-graph-2025", 3445 "target": "codex-humaneval-2021" 3446 }, 3447 { 3448 "source": "agint-agentic-graph-2025", 3449 "target": "challenges-paths-ai-2025" 3450 }, 3451 { 3452 "source": "agint-agentic-graph-2025", 3453 "target": "metagpt-multi-agent-framework-2023" 3454 }, 3455 { 3456 "source": "agint-agentic-graph-2025", 3457 "target": "swe-bench-2023" 3458 }, 3459 { 3460 "source": "agint-agentic-graph-2025", 3461 "target": "chatdev-communicative-agents-2023" 3462 }, 3463 { 3464 "source": "agint-agentic-graph-2025", 3465 "target": "llm-agents-se-survey-2024" 3466 }, 3467 { 3468 "source": "agint-agentic-graph-2025", 3469 "target": "tree-thoughts-deliberate-2023" 3470 }, 3471 { 3472 "source": "agint-agentic-graph-2025", 3473 "target": "diversity-empowers-intelligence-2024" 3474 }, 3475 { 3476 "source": "ai-alignment-contemporary-2025", 3477 "target": "constitutional-ai-2022" 3478 }, 3479 { 3480 "source": "ai-alignment-strategies-2025", 3481 "target": "sleeper-agents-2024" 3482 }, 3483 { 3484 "source": "ai-alignment-strategies-2025", 3485 "target": "alignment-faking-2024" 3486 }, 3487 { 3488 "source": "ai-alignment-strategies-2025", 3489 "target": "multiagent-risks-from-2025" 3490 }, 3491 { 3492 "source": "ai-as-cognitive-2025", 3493 "target": "metr-rct-2025" 3494 }, 3495 { 3496 "source": "ai-as-cognitive-2025", 3497 "target": "does-prompt-formatting-2024" 3498 }, 3499 { 3500 "source": "ai-assistance-legal-2023", 3501 "target": "experimental-evidence-productivity-2023" 3502 }, 3503 { 3504 "source": "ai-assistance-legal-2023", 3505 "target": "generative-ai-at-2023" 3506 }, 3507 { 3508 "source": "ai-assistance-legal-2023", 3509 "target": "copilot-productivity-controlled-2023" 3510 }, 3511 { 3512 "source": "ai-assistance-legal-2023", 3513 "target": "chain-of-thought-prompting-2022" 3514 }, 3515 { 3516 "source": "ai-code-maintainability-registered-report-2024", 3517 "target": "copilot-productivity-controlled-2023" 3518 }, 3519 { 3520 "source": "ai-code-not-reproducible-2025", 3521 "target": "codex-humaneval-2021" 3522 }, 3523 { 3524 "source": "ai-code-not-reproducible-2025", 3525 "target": "swe-bench-2023" 3526 }, 3527 { 3528 "source": "ai-code-not-reproducible-2025", 3529 "target": "livecodebench-2024" 3530 }, 3531 { 3532 "source": "ai-code-not-reproducible-2025", 3533 "target": "neurips-reproducibility-2021" 3534 }, 3535 { 3536 "source": "ai-code-review-2025", 3537 "target": "codex-humaneval-2021" 3538 }, 3539 { 3540 "source": "ai-code-survival-open-source-2026", 3541 "target": "codex-humaneval-2021" 3542 }, 3543 { 3544 "source": "ai-code-survival-open-source-2026", 3545 "target": "swe-bench-2023" 3546 }, 3547 { 3548 "source": "ai-code-survival-open-source-2026", 3549 "target": "where-do-ai-2026" 3550 }, 3551 { 3552 "source": "ai-code-survival-open-source-2026", 3553 "target": "rise-ai-teammates-2025" 3554 }, 3555 { 3556 "source": "ai-code-survival-open-source-2026", 3557 "target": "copilot-code-quality-empirical-2023" 3558 }, 3559 { 3560 "source": "ai-code-survival-open-source-2026", 3561 "target": "beyond-synthetic-benchmarks-2025" 3562 }, 3563 { 3564 "source": "ai-code-wild-2025", 3565 "target": "copilot-productivity-controlled-2023" 3566 }, 3567 { 3568 "source": "ai-code-wild-2025", 3569 "target": "copilot-security-weaknesses-2023" 3570 }, 3571 { 3572 "source": "ai-code-wild-2025", 3573 "target": "codex-humaneval-2021" 3574 }, 3575 { 3576 "source": "ai-code-wild-2025", 3577 "target": "codexity-secure-aiassisted-2024" 3578 }, 3579 { 3580 "source": "ai-code-wild-2025", 3581 "target": "security-degradation-iterative-2025" 3582 }, 3583 { 3584 "source": "ai-ides-vs-agents-impact-2026", 3585 "target": "metr-rct-2025" 3586 }, 3587 { 3588 "source": "ai-ides-vs-agents-impact-2026", 3589 "target": "cursor-speed-quality-tradeoff-2025" 3590 }, 3591 { 3592 "source": "ai-ides-vs-agents-impact-2026", 3593 "target": "code-me-me-2025" 3594 }, 3595 { 3596 "source": "ai-ides-vs-agents-impact-2026", 3597 "target": "rise-ai-teammates-2025" 3598 }, 3599 { 3600 "source": "ai-ides-vs-agents-impact-2026", 3601 "target": "aiassisted-programming-decreases-2025" 3602 }, 3603 { 3604 "source": "ai-ides-vs-agents-impact-2026", 3605 "target": "how-much-does-2024" 3606 }, 3607 { 3608 "source": "ai-ides-vs-agents-impact-2026", 3609 "target": "intuition-to-evidence-productivity-2025" 3610 }, 3611 { 3612 "source": "ai-ides-vs-agents-impact-2026", 3613 "target": "agentic-ai-software-2025" 3614 }, 3615 { 3616 "source": "ai-inference-falling-costs-2025", 3617 "target": "intelligence-per-watt-2025" 3618 }, 3619 { 3620 "source": "ai-productivity-index-2025", 3621 "target": "paperbench-evaluating-ais-2025" 3622 }, 3623 { 3624 "source": "ai-prs-code-quality-reuse-2026", 3625 "target": "rise-ai-teammates-2025" 3626 }, 3627 { 3628 "source": "ai-prs-code-quality-reuse-2026", 3629 "target": "metr-rct-2025" 3630 }, 3631 { 3632 "source": "ai-prs-code-quality-reuse-2026", 3633 "target": "swe-bench-2023" 3634 }, 3635 { 3636 "source": "ai-safety-subproblems-2023", 3637 "target": "codex-humaneval-2021" 3638 }, 3639 { 3640 "source": "ai-safety-subproblems-2023", 3641 "target": "constitutional-ai-2022" 3642 }, 3643 { 3644 "source": "ai-scientist-fully-2024", 3645 "target": "swe-bench-2023" 3646 }, 3647 { 3648 "source": "ai-scientist-fully-2024", 3649 "target": "reflexion-language-agents-2023" 3650 }, 3651 { 3652 "source": "ai-scientist-fully-2024", 3653 "target": "codex-humaneval-2021" 3654 }, 3655 { 3656 "source": "ai-scientist-fully-2024", 3657 "target": "judging-llmasajudge-mtbench-2023" 3658 }, 3659 { 3660 "source": "ai-scientistv2-workshoplevel-2025", 3661 "target": "ai-scientist-fully-2024" 3662 }, 3663 { 3664 "source": "ai-scientistv2-workshoplevel-2025", 3665 "target": "mlebench-evaluating-machine-2024" 3666 }, 3667 { 3668 "source": "ai-scientistv2-workshoplevel-2025", 3669 "target": "rebench-evaluating-frontier-2024" 3670 }, 3671 { 3672 "source": "ai-scientistv2-workshoplevel-2025", 3673 "target": "reflexion-language-agents-2023" 3674 }, 3675 { 3676 "source": "ai-software-engineering-2025", 3677 "target": "navigating-complexity-generative-2023" 3678 }, 3679 { 3680 "source": "ai-testing-should-2025", 3681 "target": "alignment-faking-2024" 3682 }, 3683 { 3684 "source": "ai-testing-should-2025", 3685 "target": "sleeper-agents-2024" 3686 }, 3687 { 3688 "source": "ai-testing-should-2025", 3689 "target": "frontier-models-in-context-scheming-2024" 3690 }, 3691 { 3692 "source": "aiassisted-code-editors-2025", 3693 "target": "codex-humaneval-2021" 3694 }, 3695 { 3696 "source": "aiassisted-code-editors-2025", 3697 "target": "codebert-pretrained-model-2020" 3698 }, 3699 { 3700 "source": "aiassisted-fixes-code-2025", 3701 "target": "automated-code-review-practice-2024" 3702 }, 3703 { 3704 "source": "aiassisted-fixes-code-2025", 3705 "target": "llama-3-herd-2024" 3706 }, 3707 { 3708 "source": "aiassisted-fixes-code-2025", 3709 "target": "swe-bench-2023" 3710 }, 3711 { 3712 "source": "aiassisted-fixes-code-2025", 3713 "target": "copilot-productivity-controlled-2023" 3714 }, 3715 { 3716 "source": "aiassisted-fixes-code-2025", 3717 "target": "how-much-does-2024" 3718 }, 3719 { 3720 "source": "aiassisted-fixes-code-2025", 3721 "target": "livecodebench-2024" 3722 }, 3723 { 3724 "source": "aiassisted-fixes-code-2025", 3725 "target": "codex-humaneval-2021" 3726 }, 3727 { 3728 "source": "aiassisted-programming-decreases-2025", 3729 "target": "copilot-productivity-controlled-2023" 3730 }, 3731 { 3732 "source": "aiassisted-programming-decreases-2025", 3733 "target": "impact-large-language-2024" 3734 }, 3735 { 3736 "source": "aiassisted-programming-decreases-2025", 3737 "target": "cursor-speed-quality-tradeoff-2025" 3738 }, 3739 { 3740 "source": "aiassisted-programming-decreases-2025", 3741 "target": "generative-ai-at-2023" 3742 }, 3743 { 3744 "source": "aiassisted-programming-decreases-2025", 3745 "target": "vibe-coding-practice-2025" 3746 }, 3747 { 3748 "source": "aidriven-scholarly-peer-2025", 3749 "target": "chain-of-thought-prompting-2022" 3750 }, 3751 { 3752 "source": "aidriven-scholarly-peer-2025", 3753 "target": "deepreview-improving-llmbased-2025" 3754 }, 3755 { 3756 "source": "aidriven-software-engineering-2024", 3757 "target": "copilot-productivity-controlled-2023" 3758 }, 3759 { 3760 "source": "aidriven-software-engineering-2024", 3761 "target": "alphacode-competition-level-2022" 3762 }, 3763 { 3764 "source": "aidriven-software-engineering-2024", 3765 "target": "gpt4-technical-report-2023" 3766 }, 3767 { 3768 "source": "aime-ai-system-2024", 3769 "target": "codex-humaneval-2021" 3770 }, 3771 { 3772 "source": "aime-ai-system-2024", 3773 "target": "reflexion-language-agents-2023" 3774 }, 3775 { 3776 "source": "aime-ai-system-2024", 3777 "target": "judging-llmasajudge-mtbench-2023" 3778 }, 3779 { 3780 "source": "aime-ai-system-2024", 3781 "target": "gpt4-technical-report-2023" 3782 }, 3783 { 3784 "source": "ainative-software-engineering-2024", 3785 "target": "cursor-speed-quality-tradeoff-2025" 3786 }, 3787 { 3788 "source": "ainative-software-engineering-2024", 3789 "target": "copilot-productivity-controlled-2023" 3790 }, 3791 { 3792 "source": "ainative-software-engineering-2024", 3793 "target": "llms-se-systematic-review-2023" 3794 }, 3795 { 3796 "source": "ainative-software-engineering-2024", 3797 "target": "openhands-ai-sw-agent-2024" 3798 }, 3799 { 3800 "source": "ainative-software-engineering-2024", 3801 "target": "compilernext-searchbased-compiler-2025" 3802 }, 3803 { 3804 "source": "ainstein-assessing-feasibility-2025", 3805 "target": "reflexion-language-agents-2023" 3806 }, 3807 { 3808 "source": "ainstein-assessing-feasibility-2025", 3809 "target": "emergent-abilities-mirage-2023" 3810 }, 3811 { 3812 "source": "aipowered-code-review-2023", 3813 "target": "codebert-pretrained-model-2020" 3814 }, 3815 { 3816 "source": "aipowered-code-review-2024", 3817 "target": "codex-humaneval-2021" 3818 }, 3819 { 3820 "source": "ais-environmental-cost-2025", 3821 "target": "emergent-abilities-mirage-2023" 3822 }, 3823 { 3824 "source": "aixamine-simplified-llm-2025", 3825 "target": "secodeplt-unified-platform-2024" 3826 }, 3827 { 3828 "source": "aixamine-simplified-llm-2025", 3829 "target": "deepseek-r1-2025" 3830 }, 3831 { 3832 "source": "aligned-query-expansion-2025", 3833 "target": "llama-3-herd-2024" 3834 }, 3835 { 3836 "source": "aligned-query-expansion-2025", 3837 "target": "survey-hallucination-large-2023-2" 3838 }, 3839 { 3840 "source": "aligned-query-expansion-2025", 3841 "target": "bayesian-reward-models-2024" 3842 }, 3843 { 3844 "source": "aligning-objective-llmbased-2024", 3845 "target": "less-training-more-2022" 3846 }, 3847 { 3848 "source": "aligning-objective-llmbased-2024", 3849 "target": "automated-program-repair-2023" 3850 }, 3851 { 3852 "source": "aligning-objective-llmbased-2024", 3853 "target": "rapgen-retrievalaugmented-patch-2023" 3854 }, 3855 { 3856 "source": "aligning-objective-llmbased-2024", 3857 "target": "copiloting-copilots-fusing-2023" 3858 }, 3859 { 3860 "source": "aligning-objective-llmbased-2024", 3861 "target": "impact-code-language-2023" 3862 }, 3863 { 3864 "source": "aligning-objective-llmbased-2024", 3865 "target": "survey-learningbased-automated-2023" 3866 }, 3867 { 3868 "source": "alignment-faking-2024", 3869 "target": "sleeper-agents-2024" 3870 }, 3871 { 3872 "source": "alignment-faking-2024", 3873 "target": "frontier-models-in-context-scheming-2024" 3874 }, 3875 { 3876 "source": "alignment-safety-llm-survey-2025", 3877 "target": "constitutional-ai-2022" 3878 }, 3879 { 3880 "source": "alignment-safety-llm-survey-2025", 3881 "target": "deepseek-r1-2025" 3882 }, 3883 { 3884 "source": "alignment-safety-llm-survey-2025", 3885 "target": "sleeper-agents-2024" 3886 }, 3887 { 3888 "source": "alignment-safety-llm-survey-2025", 3889 "target": "alignment-faking-2024" 3890 }, 3891 { 3892 "source": "alleviating-fear-losing-2025", 3893 "target": "beavertails-improved-safety-2023" 3894 }, 3895 { 3896 "source": "alphacode-competition-level-2022", 3897 "target": "codex-humaneval-2021" 3898 }, 3899 { 3900 "source": "alphacode-competition-level-2022", 3901 "target": "codebert-pretrained-model-2020" 3902 }, 3903 { 3904 "source": "ambigswe-interactive-agents-2025", 3905 "target": "swe-bench-2023" 3906 }, 3907 { 3908 "source": "ambigswe-interactive-agents-2025", 3909 "target": "openhands-ai-sw-agent-2024" 3910 }, 3911 { 3912 "source": "ambigswe-interactive-agents-2025", 3913 "target": "copilot-productivity-controlled-2023" 3914 }, 3915 { 3916 "source": "ambigswe-interactive-agents-2025", 3917 "target": "generative-ai-at-2023" 3918 }, 3919 { 3920 "source": "ambigswe-interactive-agents-2025", 3921 "target": "clarifygpt-empowering-llmbased-2023" 3922 }, 3923 { 3924 "source": "ambigswe-interactive-agents-2025", 3925 "target": "test-driven-interactive-code-gen-2024" 3926 }, 3927 { 3928 "source": "ambigswe-interactive-agents-2025", 3929 "target": "ai-scientist-fully-2024" 3930 }, 3931 { 3932 "source": "among-us-measuring-2026", 3933 "target": "improving-factuality-reasoning-2023" 3934 }, 3935 { 3936 "source": "among-us-measuring-2026", 3937 "target": "adaptive-attacks-break-2025" 3938 }, 3939 { 3940 "source": "among-us-measuring-2026", 3941 "target": "moco-onestop-shop-2026" 3942 }, 3943 { 3944 "source": "among-us-sandbox-2025", 3945 "target": "alignment-faking-2024" 3946 }, 3947 { 3948 "source": "among-us-sandbox-2025", 3949 "target": "sleeper-agents-2024" 3950 }, 3951 { 3952 "source": "among-us-sandbox-2025", 3953 "target": "chatbot-arena-open-2024" 3954 }, 3955 { 3956 "source": "analysis-evaluation-synthetic-2025", 3957 "target": "scaling-laws-2020" 3958 }, 3959 { 3960 "source": "analysis-research-status-2025", 3961 "target": "utboost-rigorous-evaluation-2025" 3962 }, 3963 { 3964 "source": "analysis-studentllm-interaction-2025", 3965 "target": "empirical-study-usage-2024" 3966 }, 3967 { 3968 "source": "analysis-studentllm-interaction-2025", 3969 "target": "llm-agents-se-survey-2024" 3970 }, 3971 { 3972 "source": "anatomy-capability-emergence-2026", 3973 "target": "emergent-abilities-large-2022" 3974 }, 3975 { 3976 "source": "anatomy-capability-emergence-2026", 3977 "target": "emergent-abilities-mirage-2023" 3978 }, 3979 { 3980 "source": "ancoder-anchored-code-2026", 3981 "target": "codex-humaneval-2021" 3982 }, 3983 { 3984 "source": "ancoder-anchored-code-2026", 3985 "target": "qwen25coder-technical-report-2024" 3986 }, 3987 { 3988 "source": "ancoder-anchored-code-2026", 3989 "target": "constrained-decoding-diffusion-2025" 3990 }, 3991 { 3992 "source": "animagents-coordinating-multistage-2025", 3993 "target": "metagpt-multi-agent-framework-2023" 3994 }, 3995 { 3996 "source": "animagents-coordinating-multistage-2025", 3997 "target": "swe-agent-2024" 3998 }, 3999 { 4000 "source": "animagents-coordinating-multistage-2025", 4001 "target": "mapcoder-multiagent-code-2024" 4002 }, 4003 { 4004 "source": "animagents-coordinating-multistage-2025", 4005 "target": "survey-autonomous-llm-agents-2023" 4006 }, 4007 { 4008 "source": "animagents-coordinating-multistage-2025", 4009 "target": "rise-potential-large-2023" 4010 }, 4011 { 4012 "source": "animagents-coordinating-multistage-2025", 4013 "target": "multi-agent-collaboration-survey-2025" 4014 }, 4015 { 4016 "source": "animagents-coordinating-multistage-2025", 4017 "target": "generative-ai-at-2023" 4018 }, 4019 { 4020 "source": "annotation-alignment-comparing-2024", 4021 "target": "constitutional-ai-2022" 4022 }, 4023 { 4024 "source": "annotation-alignment-comparing-2024", 4025 "target": "chatbot-arena-open-2024" 4026 }, 4027 { 4028 "source": "antiregulatory-ai-how-2025", 4029 "target": "constitutional-ai-2022" 4030 }, 4031 { 4032 "source": "antiregulatory-ai-how-2025", 4033 "target": "sleeper-agents-2024" 4034 }, 4035 { 4036 "source": "appatch-automated-adaptive-2024", 4037 "target": "chain-of-thought-prompting-2022" 4038 }, 4039 { 4040 "source": "appatch-automated-adaptive-2024", 4041 "target": "how-effective-neural-2023" 4042 }, 4043 { 4044 "source": "applying-rlaif-code-2024", 4045 "target": "gorilla-large-language-2023" 4046 }, 4047 { 4048 "source": "applying-rlaif-code-2024", 4049 "target": "constitutional-ai-2022" 4050 }, 4051 { 4052 "source": "applying-rlaif-code-2024", 4053 "target": "toolformer-language-models-2023" 4054 }, 4055 { 4056 "source": "applying-rlaif-code-2024", 4057 "target": "toolllm-facilitating-large-2023" 4058 }, 4059 { 4060 "source": "appworld-controllable-world-2024", 4061 "target": "react-synergizing-reasoning-2022" 4062 }, 4063 { 4064 "source": "appworld-controllable-world-2024", 4065 "target": "swe-bench-2023" 4066 }, 4067 { 4068 "source": "appworld-controllable-world-2024", 4069 "target": "codex-humaneval-2021" 4070 }, 4071 { 4072 "source": "appworld-controllable-world-2024", 4073 "target": "toolllm-facilitating-large-2023" 4074 }, 4075 { 4076 "source": "appworld-controllable-world-2024", 4077 "target": "executable-code-actions-2024" 4078 }, 4079 { 4080 "source": "appworld-controllable-world-2024", 4081 "target": "reflexion-language-agents-2023" 4082 }, 4083 { 4084 "source": "appworld-controllable-world-2024", 4085 "target": "webarena-autonomous-agents-2023" 4086 }, 4087 { 4088 "source": "appworld-controllable-world-2024", 4089 "target": "gorilla-large-language-2023" 4090 }, 4091 { 4092 "source": "appworld-controllable-world-2024", 4093 "target": "osworld-benchmarking-multimodal-2024" 4094 }, 4095 { 4096 "source": "apr-llm-survey-2025", 4097 "target": "agentless-2024" 4098 }, 4099 { 4100 "source": "apr-llm-survey-2025", 4101 "target": "swe-agent-2024" 4102 }, 4103 { 4104 "source": "apr-llm-survey-2025", 4105 "target": "swe-bench-2023" 4106 }, 4107 { 4108 "source": "apr-llm-survey-2025", 4109 "target": "repairagent-llm-bug-repair-2024" 4110 }, 4111 { 4112 "source": "apr-llm-survey-2025", 4113 "target": "openhands-ai-sw-agent-2024" 4114 }, 4115 { 4116 "source": "apr-llm-survey-2025", 4117 "target": "repairllama-efficient-representations-2023" 4118 }, 4119 { 4120 "source": "apr-llm-survey-2025", 4121 "target": "systematic-literature-review-2024" 4122 }, 4123 { 4124 "source": "april-api-synthesis-2025", 4125 "target": "fuzz4all-universal-fuzzing-2023" 4126 }, 4127 { 4128 "source": "aptserve-adaptive-request-2025", 4129 "target": "codex-humaneval-2021" 4130 }, 4131 { 4132 "source": "aquallm-evaluating-accuracy-2025", 4133 "target": "not-what-youve-2023" 4134 }, 4135 { 4136 "source": "aquallm-evaluating-accuracy-2025", 4137 "target": "sleeper-agents-2024" 4138 }, 4139 { 4140 "source": "arcmemo-abstract-reasoning-2025", 4141 "target": "reflexion-language-agents-2023" 4142 }, 4143 { 4144 "source": "arcmemo-abstract-reasoning-2025", 4145 "target": "voyager-open-ended-2023" 4146 }, 4147 { 4148 "source": "arcmemo-abstract-reasoning-2025", 4149 "target": "memgpt-llms-as-2023" 4150 }, 4151 { 4152 "source": "arcmemo-abstract-reasoning-2025", 4153 "target": "generative-agents-interactive-2023" 4154 }, 4155 { 4156 "source": "arcs-agentic-retrievalaugmented-2025", 4157 "target": "codex-humaneval-2021" 4158 }, 4159 { 4160 "source": "arcs-agentic-retrievalaugmented-2025", 4161 "target": "alphacode-competition-level-2022" 4162 }, 4163 { 4164 "source": "arcs-agentic-retrievalaugmented-2025", 4165 "target": "empirical-study-retrievalaugmented-2025" 4166 }, 4167 { 4168 "source": "arcs-agentic-retrievalaugmented-2025", 4169 "target": "enhancing-code-translation-2024" 4170 }, 4171 { 4172 "source": "arcs-agentic-retrievalaugmented-2025", 4173 "target": "deepseek-coder-v2-2024" 4174 }, 4175 { 4176 "source": "arena-hard-auto-2024", 4177 "target": "judging-llmasajudge-mtbench-2023" 4178 }, 4179 { 4180 "source": "arena-hard-auto-2024", 4181 "target": "chatbot-arena-open-2024" 4182 }, 4183 { 4184 "source": "arena-hard-auto-2024", 4185 "target": "swe-bench-2023" 4186 }, 4187 { 4188 "source": "arena-hard-auto-2024", 4189 "target": "bigcodebench-2024" 4190 }, 4191 { 4192 "source": "arena-hard-auto-2024", 4193 "target": "livecodebench-2024" 4194 }, 4195 { 4196 "source": "arena-hard-auto-2024", 4197 "target": "agentbench-evaluating-llms-2023" 4198 }, 4199 { 4200 "source": "arena-hard-auto-2024", 4201 "target": "codex-humaneval-2021" 4202 }, 4203 { 4204 "source": "arena-hard-auto-2024", 4205 "target": "nlp-evaluation-trouble-2023" 4206 }, 4207 { 4208 "source": "ares-automated-evaluation-2023", 4209 "target": "judging-llmasajudge-mtbench-2023" 4210 }, 4211 { 4212 "source": "ares-automated-evaluation-2023", 4213 "target": "augmented-language-models-2023" 4214 }, 4215 { 4216 "source": "argus-defending-against-2025", 4217 "target": "secalign-defending-against-2024" 4218 }, 4219 { 4220 "source": "argus-defending-against-2025", 4221 "target": "agenttypo-adaptive-typographic-2025" 4222 }, 4223 { 4224 "source": "argus-defending-against-2025", 4225 "target": "defense-against-prompt-2024" 4226 }, 4227 { 4228 "source": "argus-defending-against-2025", 4229 "target": "can-indirect-prompt-2025" 4230 }, 4231 { 4232 "source": "argus-defending-against-2025", 4233 "target": "defending-against-indirect-2024" 4234 }, 4235 { 4236 "source": "argus-defending-against-2025", 4237 "target": "formalizing-benchmarking-prompt-2023" 4238 }, 4239 { 4240 "source": "argus-defending-against-2025", 4241 "target": "manipulating-multimodal-agents-2025" 4242 }, 4243 { 4244 "source": "arks-active-retrieval-2024", 4245 "target": "swe-bench-2023" 4246 }, 4247 { 4248 "source": "arks-active-retrieval-2024", 4249 "target": "reflexion-language-agents-2023" 4250 }, 4251 { 4252 "source": "arks-active-retrieval-2024", 4253 "target": "codex-humaneval-2021" 4254 }, 4255 { 4256 "source": "arks-active-retrieval-2024", 4257 "target": "starcoder-2023" 4258 }, 4259 { 4260 "source": "arks-active-retrieval-2024", 4261 "target": "your-code-generated-2023" 4262 }, 4263 { 4264 "source": "art-adaptive-response-2025", 4265 "target": "selfconsistency-improves-chain-2022" 4266 }, 4267 { 4268 "source": "art-adaptive-response-2025", 4269 "target": "improving-factuality-reasoning-2023" 4270 }, 4271 { 4272 "source": "art-adaptive-response-2025", 4273 "target": "tree-thoughts-deliberate-2023" 4274 }, 4275 { 4276 "source": "art-adaptive-response-2025", 4277 "target": "constitutional-ai-2022" 4278 }, 4279 { 4280 "source": "art-adaptive-response-2025", 4281 "target": "scaffolded-model-capability-2023" 4282 }, 4283 { 4284 "source": "art-adaptive-response-2025", 4285 "target": "judging-llmasajudge-mtbench-2023" 4286 }, 4287 { 4288 "source": "art-adaptive-response-2025", 4289 "target": "chain-of-thought-prompting-2022" 4290 }, 4291 { 4292 "source": "art-repair-optimizing-2025", 4293 "target": "automated-program-repair-2022" 4294 }, 4295 { 4296 "source": "art-repair-optimizing-2025", 4297 "target": "repairagent-llm-bug-repair-2024" 4298 }, 4299 { 4300 "source": "art-repair-optimizing-2025", 4301 "target": "repairllama-efficient-representations-2023" 4302 }, 4303 { 4304 "source": "art-repair-optimizing-2025", 4305 "target": "automated-program-repair-2023" 4306 }, 4307 { 4308 "source": "art-repair-optimizing-2025", 4309 "target": "cigar-costefficient-program-2024" 4310 }, 4311 { 4312 "source": "art-scaling-test-time-compute-2025", 4313 "target": "compute-optimal-inference-2024" 4314 }, 4315 { 4316 "source": "art-scaling-test-time-compute-2025", 4317 "target": "selfconsistency-improves-chain-2022" 4318 }, 4319 { 4320 "source": "art-scaling-test-time-compute-2025", 4321 "target": "chain-of-thought-prompting-2022" 4322 }, 4323 { 4324 "source": "art-scaling-test-time-compute-2025", 4325 "target": "dont-overthink-it-2025" 4326 }, 4327 { 4328 "source": "art-scaling-test-time-compute-2025", 4329 "target": "reflexion-language-agents-2023" 4330 }, 4331 { 4332 "source": "art-scaling-test-time-compute-2025", 4333 "target": "tree-thoughts-deliberate-2023" 4334 }, 4335 { 4336 "source": "art-scaling-test-time-compute-2025", 4337 "target": "gpqa-graduatelevel-googleproof-2023" 4338 }, 4339 { 4340 "source": "art-scaling-test-time-compute-2025", 4341 "target": "dapo-opensource-llm-2025" 4342 }, 4343 { 4344 "source": "artifactsbench-bridging-visualinteractive-2025", 4345 "target": "codex-humaneval-2021" 4346 }, 4347 { 4348 "source": "artifactsbench-bridging-visualinteractive-2025", 4349 "target": "swe-bench-2023" 4350 }, 4351 { 4352 "source": "artifactsbench-bridging-visualinteractive-2025", 4353 "target": "judging-llmasajudge-mtbench-2023" 4354 }, 4355 { 4356 "source": "artifactsbench-bridging-visualinteractive-2025", 4357 "target": "deepseek-r1-2025" 4358 }, 4359 { 4360 "source": "artifactsbench-bridging-visualinteractive-2025", 4361 "target": "webbench-llm-code-2025" 4362 }, 4363 { 4364 "source": "artifactsbench-bridging-visualinteractive-2025", 4365 "target": "webarena-autonomous-agents-2023" 4366 }, 4367 { 4368 "source": "artifactsbench-bridging-visualinteractive-2025", 4369 "target": "codecriticbench-holistic-code-2025" 4370 }, 4371 { 4372 "source": "artificial-brain-neuroscience-2026", 4373 "target": "uc-berkeley-mast-2025" 4374 }, 4375 { 4376 "source": "artificial-brain-neuroscience-2026", 4377 "target": "scaffolded-model-capability-2023" 4378 }, 4379 { 4380 "source": "artificial-human-intelligence-2025", 4381 "target": "generative-ai-at-2023" 4382 }, 4383 { 4384 "source": "artificial-human-intelligence-2025", 4385 "target": "canaries-coal-mine-2025" 4386 }, 4387 { 4388 "source": "artificial-human-intelligence-2025", 4389 "target": "experimental-evidence-productivity-2023" 4390 }, 4391 { 4392 "source": "artificial-human-intelligence-2025", 4393 "target": "emergent-abilities-large-2022" 4394 }, 4395 { 4396 "source": "artificial-human-intelligence-2025", 4397 "target": "emergent-abilities-mirage-2023" 4398 }, 4399 { 4400 "source": "artificial-intelligence-assistance-2026", 4401 "target": "attention-is-all-you-need-2017" 4402 }, 4403 { 4404 "source": "artificial-just-artful-2025", 4405 "target": "sleeper-agents-2024" 4406 }, 4407 { 4408 "source": "artificial-just-artful-2025", 4409 "target": "bigcodebench-2024" 4410 }, 4411 { 4412 "source": "artificial-just-artful-2025", 4413 "target": "rethinking-verification-llm-2025" 4414 }, 4415 { 4416 "source": "artificial-just-artful-2025", 4417 "target": "hallucination-by-code-2025" 4418 }, 4419 { 4420 "source": "artificial-just-artful-2025", 4421 "target": "survey-llm-code-generation-2025" 4422 }, 4423 { 4424 "source": "artificial-organisations-2026", 4425 "target": "constitutional-ai-2022" 4426 }, 4427 { 4428 "source": "artificial-organisations-2026", 4429 "target": "improving-factuality-reasoning-2023" 4430 }, 4431 { 4432 "source": "artificial-organisations-2026", 4433 "target": "sleeper-agents-2024" 4434 }, 4435 { 4436 "source": "artificial-organisations-2026", 4437 "target": "autogen-multi-agent-2023" 4438 }, 4439 { 4440 "source": "artificial-organisations-2026", 4441 "target": "metagpt-multi-agent-framework-2023" 4442 }, 4443 { 4444 "source": "artificial-organisations-2026", 4445 "target": "camel-communicative-agents-2023" 4446 }, 4447 { 4448 "source": "ask-me-anything-2022", 4449 "target": "selfconsistency-improves-chain-2022" 4450 }, 4451 { 4452 "source": "ask-me-anything-2022", 4453 "target": "chain-of-thought-prompting-2022" 4454 }, 4455 { 4456 "source": "ask-me-anything-2022", 4457 "target": "emergent-abilities-large-2022" 4458 }, 4459 { 4460 "source": "ask-me-anything-2022", 4461 "target": "scaling-laws-2020" 4462 }, 4463 { 4464 "source": "askeda-design-assistant-2024", 4465 "target": "gpt4-technical-report-2023" 4466 }, 4467 { 4468 "source": "askeda-design-assistant-2024", 4469 "target": "granite-code-models-2024" 4470 }, 4471 { 4472 "source": "assessing-answerability-queries-2024", 4473 "target": "codex-humaneval-2021" 4474 }, 4475 { 4476 "source": "assessing-answerability-queries-2024", 4477 "target": "selfcheckgpt-zeroresource-blackbox-2023" 4478 }, 4479 { 4480 "source": "assessing-correctness-llmbased-2025", 4481 "target": "livecodebench-2024" 4482 }, 4483 { 4484 "source": "assessing-correctness-llmbased-2025", 4485 "target": "deepseek-r1-2025" 4486 }, 4487 { 4488 "source": "assessing-correctness-llmbased-2025", 4489 "target": "your-code-generated-2023" 4490 }, 4491 { 4492 "source": "assessing-domainlevel-susceptibility-2026", 4493 "target": "sleeper-agents-2024" 4494 }, 4495 { 4496 "source": "assessing-domainlevel-susceptibility-2026", 4497 "target": "poisoning-attacks-llms-2025" 4498 }, 4499 { 4500 "source": "assessing-domainlevel-susceptibility-2026", 4501 "target": "thinking-llms-lie-2025" 4502 }, 4503 { 4504 "source": "assessing-domainlevel-susceptibility-2026", 4505 "target": "exposing-privacy-gaps-2024" 4506 }, 4507 { 4508 "source": "assessing-impact-code-2025", 4509 "target": "codex-humaneval-2021" 4510 }, 4511 { 4512 "source": "assessing-impact-code-2025", 4513 "target": "data-contamination-benchmarks-2023" 4514 }, 4515 { 4516 "source": "assessing-impact-code-2025", 4517 "target": "your-code-generated-2023" 4518 }, 4519 { 4520 "source": "assessing-impact-code-2025", 4521 "target": "livecodebench-2024" 4522 }, 4523 { 4524 "source": "assessing-latent-automated-2024", 4525 "target": "impact-code-language-2023" 4526 }, 4527 { 4528 "source": "assessing-latent-automated-2024", 4529 "target": "codex-humaneval-2021" 4530 }, 4531 { 4532 "source": "assessing-latent-automated-2024", 4533 "target": "automated-program-repair-2022" 4534 }, 4535 { 4536 "source": "assessing-latent-automated-2024", 4537 "target": "automated-program-repair-2023" 4538 }, 4539 { 4540 "source": "assessing-latent-automated-2024", 4541 "target": "less-training-more-2022" 4542 }, 4543 { 4544 "source": "assessing-latent-automated-2024", 4545 "target": "reflexion-language-agents-2023" 4546 }, 4547 { 4548 "source": "assessing-latent-automated-2024", 4549 "target": "gpt4-technical-report-2023" 4550 }, 4551 { 4552 "source": "assessing-latent-automated-2024", 4553 "target": "starcoder-2023" 4554 }, 4555 { 4556 "source": "assessing-latent-automated-2024", 4557 "target": "survey-learningbased-automated-2023" 4558 }, 4559 { 4560 "source": "assessing-verifying-task-2024", 4561 "target": "autogen-multi-agent-2023" 4562 }, 4563 { 4564 "source": "assessing-verifying-task-2024", 4565 "target": "agentbench-evaluating-llms-2023" 4566 }, 4567 { 4568 "source": "assessing-verifying-task-2024", 4569 "target": "react-synergizing-reasoning-2022" 4570 }, 4571 { 4572 "source": "assessing-verifying-task-2024", 4573 "target": "chateval-better-llmbased-2023" 4574 }, 4575 { 4576 "source": "assessing-verifying-task-2024", 4577 "target": "metagpt-multi-agent-framework-2023" 4578 }, 4579 { 4580 "source": "assessing-verifying-task-2024", 4581 "target": "camel-communicative-agents-2023" 4582 }, 4583 { 4584 "source": "astrovisbench-code-benchmark-2025", 4585 "target": "swe-bench-2023" 4586 }, 4587 { 4588 "source": "astrovisbench-code-benchmark-2025", 4589 "target": "bigcodebench-2024" 4590 }, 4591 { 4592 "source": "astrovisbench-code-benchmark-2025", 4593 "target": "codex-humaneval-2021" 4594 }, 4595 { 4596 "source": "astrovisbench-code-benchmark-2025", 4597 "target": "ai-scientist-fully-2024" 4598 }, 4599 { 4600 "source": "astrovisbench-code-benchmark-2025", 4601 "target": "paperbench-evaluating-ais-2025" 4602 }, 4603 { 4604 "source": "astrovisbench-code-benchmark-2025", 4605 "target": "mlebench-evaluating-machine-2024" 4606 }, 4607 { 4608 "source": "astrovisbench-code-benchmark-2025", 4609 "target": "matplotagent-method-evaluation-2024" 4610 }, 4611 { 4612 "source": "astrovisbench-code-benchmark-2025", 4613 "target": "your-code-generated-2023" 4614 }, 4615 { 4616 "source": "asymptotic-study-incontext-2025", 4617 "target": "attention-is-all-you-need-2017" 4618 }, 4619 { 4620 "source": "asymptotic-study-incontext-2025", 4621 "target": "emergent-abilities-large-2022" 4622 }, 4623 { 4624 "source": "asymptotic-study-incontext-2025", 4625 "target": "emergent-abilities-mirage-2023" 4626 }, 4627 { 4628 "source": "atlas-artifact-generation-2025", 4629 "target": "guiding-llms-right-2024" 4630 }, 4631 { 4632 "source": "atlas-artifact-generation-2025", 4633 "target": "reflexion-language-agents-2023" 4634 }, 4635 { 4636 "source": "atlas-artifact-generation-2025", 4637 "target": "llms-se-systematic-review-2023" 4638 }, 4639 { 4640 "source": "atlas-artifact-generation-2025", 4641 "target": "no-need-lift-2023" 4642 }, 4643 { 4644 "source": "atom-thoughts-markov-2025", 4645 "target": "tree-thoughts-deliberate-2023" 4646 }, 4647 { 4648 "source": "atom-thoughts-markov-2025", 4649 "target": "chain-of-thought-prompting-2022" 4650 }, 4651 { 4652 "source": "atom-thoughts-markov-2025", 4653 "target": "compute-optimal-inference-2024" 4654 }, 4655 { 4656 "source": "atom-thoughts-markov-2025", 4657 "target": "selfconsistency-improves-chain-2022" 4658 }, 4659 { 4660 "source": "atom-thoughts-markov-2025", 4661 "target": "livecodebench-2024" 4662 }, 4663 { 4664 "source": "atom-thoughts-markov-2025", 4665 "target": "deepseek-r1-2025" 4666 }, 4667 { 4668 "source": "atom-thoughts-markov-2025", 4669 "target": "more-llm-calls-2024" 4670 }, 4671 { 4672 "source": "attacking-llms-ai-2025", 4673 "target": "sleeper-agents-2024" 4674 }, 4675 { 4676 "source": "attacking-llms-ai-2025", 4677 "target": "not-what-youve-2023" 4678 }, 4679 { 4680 "source": "attacks-by-content-2025", 4681 "target": "not-what-youve-2023" 4682 }, 4683 { 4684 "source": "attacks-by-content-2025", 4685 "target": "secalign-defending-against-2024" 4686 }, 4687 { 4688 "source": "attacks-by-content-2025", 4689 "target": "react-synergizing-reasoning-2022" 4690 }, 4691 { 4692 "source": "attention-all-you-2025", 4693 "target": "formalizing-benchmarking-prompt-2023" 4694 }, 4695 { 4696 "source": "attention-all-you-2025", 4697 "target": "secalign-defending-against-2024" 4698 }, 4699 { 4700 "source": "attention-all-you-2025", 4701 "target": "attention-tracker-detecting-2024" 4702 }, 4703 { 4704 "source": "attention-all-you-2025", 4705 "target": "neural-exec-learning-2024" 4706 }, 4707 { 4708 "source": "attention-all-you-2025", 4709 "target": "not-what-youve-2023" 4710 }, 4711 { 4712 "source": "attention-all-you-2025", 4713 "target": "injecguard-benchmarking-mitigating-2024" 4714 }, 4715 { 4716 "source": "attention-pruning-automated-2025", 4717 "target": "gpt4-technical-report-2023" 4718 }, 4719 { 4720 "source": "attention-tracker-detecting-2024", 4721 "target": "formalizing-benchmarking-prompt-2023" 4722 }, 4723 { 4724 "source": "attention-tracker-detecting-2024", 4725 "target": "not-what-youve-2023" 4726 }, 4727 { 4728 "source": "attention-tracker-detecting-2024", 4729 "target": "prompt-injection-attacks-2024" 4730 }, 4731 { 4732 "source": "attention-tracker-detecting-2024", 4733 "target": "jatmo-prompt-injection-2023" 4734 }, 4735 { 4736 "source": "attention-tracker-detecting-2024", 4737 "target": "automatic-universal-prompt-2024" 4738 }, 4739 { 4740 "source": "attention-tracker-detecting-2024", 4741 "target": "neural-exec-learning-2024" 4742 }, 4743 { 4744 "source": "audit-trails-accountability-2026", 4745 "target": "emergent-abilities-mirage-2023" 4746 }, 4747 { 4748 "source": "auditing-fairness-under-2026", 4749 "target": "emergent-abilities-mirage-2023" 4750 }, 4751 { 4752 "source": "augmented-language-models-2023", 4753 "target": "chain-of-thought-prompting-2022" 4754 }, 4755 { 4756 "source": "augmented-language-models-2023", 4757 "target": "toolformer-language-models-2023" 4758 }, 4759 { 4760 "source": "augmented-language-models-2023", 4761 "target": "react-synergizing-reasoning-2022" 4762 }, 4763 { 4764 "source": "augmented-language-models-2023", 4765 "target": "codex-humaneval-2021" 4766 }, 4767 { 4768 "source": "autocodebench-large-language-2025", 4769 "target": "codex-humaneval-2021" 4770 }, 4771 { 4772 "source": "autocodebench-large-language-2025", 4773 "target": "swe-bench-2023" 4774 }, 4775 { 4776 "source": "autocodebench-large-language-2025", 4777 "target": "livecodebench-2024" 4778 }, 4779 { 4780 "source": "autocodebench-large-language-2025", 4781 "target": "bigcodebench-2024" 4782 }, 4783 { 4784 "source": "autocodebench-large-language-2025", 4785 "target": "deepseek-coder-v2-2024" 4786 }, 4787 { 4788 "source": "autocodebench-large-language-2025", 4789 "target": "qwen25coder-technical-report-2024" 4790 }, 4791 { 4792 "source": "autocypher-improving-llms-2024", 4793 "target": "gpt4-technical-report-2023" 4794 }, 4795 { 4796 "source": "autocypher-improving-llms-2024", 4797 "target": "wizardcoder-empowering-code-2023" 4798 }, 4799 { 4800 "source": "autocypher-improving-llms-2024", 4801 "target": "llama-3-herd-2024" 4802 }, 4803 { 4804 "source": "autocypher-improving-llms-2024", 4805 "target": "qwen25coder-technical-report-2024" 4806 }, 4807 { 4808 "source": "autoflow-automated-workflow-2024", 4809 "target": "autogen-multi-agent-2023" 4810 }, 4811 { 4812 "source": "autoflow-automated-workflow-2024", 4813 "target": "react-synergizing-reasoning-2022" 4814 }, 4815 { 4816 "source": "autoflow-automated-workflow-2024", 4817 "target": "dspy-compiling-declarative-2023" 4818 }, 4819 { 4820 "source": "autoflow-automated-workflow-2024", 4821 "target": "chain-of-thought-prompting-2022" 4822 }, 4823 { 4824 "source": "autoflow-automated-workflow-2024", 4825 "target": "camel-communicative-agents-2023" 4826 }, 4827 { 4828 "source": "autoflow-automated-workflow-2024", 4829 "target": "chatdev-communicative-agents-2023" 4830 }, 4831 { 4832 "source": "autoflow-automated-workflow-2024", 4833 "target": "survey-autonomous-llm-agents-2023" 4834 }, 4835 { 4836 "source": "autoflow-automated-workflow-2024", 4837 "target": "tree-thoughts-deliberate-2023" 4838 }, 4839 { 4840 "source": "autogen-enabling-nextgen-2023", 4841 "target": "metagpt-multi-agent-framework-2023" 4842 }, 4843 { 4844 "source": "autogen-enabling-nextgen-2023", 4845 "target": "improving-factuality-reasoning-2023" 4846 }, 4847 { 4848 "source": "autogen-enabling-nextgen-2023", 4849 "target": "react-synergizing-reasoning-2022" 4850 }, 4851 { 4852 "source": "autogen-enabling-nextgen-2023", 4853 "target": "voyager-open-ended-2023" 4854 }, 4855 { 4856 "source": "autogen-enabling-nextgen-2023", 4857 "target": "generative-agents-interactive-2023" 4858 }, 4859 { 4860 "source": "autogen-enabling-nextgen-2023", 4861 "target": "rise-potential-large-2023" 4862 }, 4863 { 4864 "source": "autogen-enabling-nextgen-2023", 4865 "target": "survey-autonomous-llm-agents-2023" 4866 }, 4867 { 4868 "source": "autogen-multi-agent-2023", 4869 "target": "improving-factuality-reasoning-2023" 4870 }, 4871 { 4872 "source": "autogen-multi-agent-2023", 4873 "target": "metagpt-multi-agent-framework-2023" 4874 }, 4875 { 4876 "source": "autogen-multi-agent-2023", 4877 "target": "voyager-open-ended-2023" 4878 }, 4879 { 4880 "source": "autogen-multi-agent-2023", 4881 "target": "generative-agents-interactive-2023" 4882 }, 4883 { 4884 "source": "autogen-multi-agent-2023", 4885 "target": "react-synergizing-reasoning-2022" 4886 }, 4887 { 4888 "source": "autogen-multi-agent-2023", 4889 "target": "rise-potential-large-2023" 4890 }, 4891 { 4892 "source": "autogen-multi-agent-2023", 4893 "target": "survey-autonomous-llm-agents-2023" 4894 }, 4895 { 4896 "source": "autokaggle-multiagent-framework-2024", 4897 "target": "mlebench-evaluating-machine-2024" 4898 }, 4899 { 4900 "source": "autokaggle-multiagent-framework-2024", 4901 "target": "reflexion-language-agents-2023" 4902 }, 4903 { 4904 "source": "autokaggle-multiagent-framework-2024", 4905 "target": "react-synergizing-reasoning-2022" 4906 }, 4907 { 4908 "source": "autokaggle-multiagent-framework-2024", 4909 "target": "camel-communicative-agents-2023" 4910 }, 4911 { 4912 "source": "autokaggle-multiagent-framework-2024", 4913 "target": "rise-potential-large-2023" 4914 }, 4915 { 4916 "source": "automated-bug-detection-2025", 4917 "target": "survey-learningbased-automated-2023" 4918 }, 4919 { 4920 "source": "automated-bug-detection-2025", 4921 "target": "codebert-pretrained-model-2020" 4922 }, 4923 { 4924 "source": "automated-cc-program-2024", 4925 "target": "automated-program-repair-2022" 4926 }, 4927 { 4928 "source": "automated-code-generation-2025", 4929 "target": "llmbased-retrievalaugmented-control-2024" 4930 }, 4931 { 4932 "source": "automated-code-generation-2025", 4933 "target": "reacc-retrievalaugmented-code-2022" 4934 }, 4935 { 4936 "source": "automated-code-review-practice-2024", 4937 "target": "aiassisted-assessment-coding-2024" 4938 }, 4939 { 4940 "source": "automated-code-review-practice-2024", 4941 "target": "aipowered-code-review-2024" 4942 }, 4943 { 4944 "source": "automated-code-review-practice-2024", 4945 "target": "tales-from-trenches-2024" 4946 }, 4947 { 4948 "source": "automated-code-review-practice-2024", 4949 "target": "gpt4-technical-report-2023" 4950 }, 4951 { 4952 "source": "automated-discovery-test-2025", 4953 "target": "fuzz4all-universal-fuzzing-2023" 4954 }, 4955 { 4956 "source": "automated-discovery-test-2025", 4957 "target": "automated-program-repair-2023" 4958 }, 4959 { 4960 "source": "automated-extraction-mechanical-2026", 4961 "target": "chain-of-thought-prompting-2022" 4962 }, 4963 { 4964 "source": "automated-extraction-mechanical-2026", 4965 "target": "rise-potential-large-2023" 4966 }, 4967 { 4968 "source": "automated-extraction-mechanical-2026", 4969 "target": "efficient-guided-generation-2023" 4970 }, 4971 { 4972 "source": "automated-formalization-conceptual-2025", 4973 "target": "deepseek-v3-2024" 4974 }, 4975 { 4976 "source": "automated-knowledge-component-2025", 4977 "target": "codebert-pretrained-model-2020" 4978 }, 4979 { 4980 "source": "automated-knowledge-component-2025", 4981 "target": "llama-3-herd-2024" 4982 }, 4983 { 4984 "source": "automated-program-repair-2022", 4985 "target": "codex-humaneval-2021" 4986 }, 4987 { 4988 "source": "automated-program-repair-2022", 4989 "target": "less-training-more-2022" 4990 }, 4991 { 4992 "source": "automated-program-repair-2022", 4993 "target": "alphacode-competition-level-2022" 4994 }, 4995 { 4996 "source": "automated-program-repair-2022", 4997 "target": "scaling-laws-2020" 4998 }, 4999 { 5000 "source": "automated-program-repair-2023", 5001 "target": "codex-humaneval-2021" 5002 }, 5003 { 5004 "source": "automated-program-repair-2023", 5005 "target": "automated-program-repair-2022" 5006 }, 5007 { 5008 "source": "automated-program-repair-2023", 5009 "target": "less-training-more-2022" 5010 }, 5011 { 5012 "source": "automated-program-repair-2023-2", 5013 "target": "codex-humaneval-2021" 5014 }, 5015 { 5016 "source": "automated-program-repair-2024", 5017 "target": "automated-program-repair-2022" 5018 }, 5019 { 5020 "source": "automated-program-repair-2024", 5021 "target": "less-training-more-2022" 5022 }, 5023 { 5024 "source": "automated-program-repair-2024", 5025 "target": "codex-humaneval-2021" 5026 }, 5027 { 5028 "source": "automated-program-repair-2024", 5029 "target": "runbugrun-executable-dataset-2023" 5030 }, 5031 { 5032 "source": "automated-program-repair-2024", 5033 "target": "gpt4-technical-report-2023" 5034 }, 5035 { 5036 "source": "automated-program-repair-2024", 5037 "target": "multiple-scalable-polyglot-2023" 5038 }, 5039 { 5040 "source": "automated-program-repair-2024", 5041 "target": "impact-code-language-2023" 5042 }, 5043 { 5044 "source": "automated-program-repair-2025", 5045 "target": "automated-program-repair-2022" 5046 }, 5047 { 5048 "source": "automated-program-repair-2025-2", 5049 "target": "automated-program-repair-2023-3" 5050 }, 5051 { 5052 "source": "automated-program-repair-2025-2", 5053 "target": "automated-program-repair-2024-3" 5054 }, 5055 { 5056 "source": "automated-repair-c-2025", 5057 "target": "codex-humaneval-2021" 5058 }, 5059 { 5060 "source": "automated-repair-c-2025", 5061 "target": "automated-program-repair-2023" 5062 }, 5063 { 5064 "source": "automated-repair-c-2025", 5065 "target": "contrastrepair-enhancing-conversationbased-2024" 5066 }, 5067 { 5068 "source": "automated-repair-c-2025", 5069 "target": "agentless-2024" 5070 }, 5071 { 5072 "source": "automated-repair-c-2025", 5073 "target": "swe-bench-2023" 5074 }, 5075 { 5076 "source": "automated-repair-c-2025", 5077 "target": "cigar-costefficient-program-2024" 5078 }, 5079 { 5080 "source": "automated-repair-c-2025", 5081 "target": "aligning-objective-llmbased-2024" 5082 }, 5083 { 5084 "source": "automated-repair-c-2025", 5085 "target": "copiloting-copilots-fusing-2023" 5086 }, 5087 { 5088 "source": "automated-smart-contract-2025", 5089 "target": "codex-humaneval-2021" 5090 }, 5091 { 5092 "source": "automated-smart-contract-2025", 5093 "target": "deepseek-coder-2024" 5094 }, 5095 { 5096 "source": "automated-smart-contract-2025", 5097 "target": "code-llama-2023" 5098 }, 5099 { 5100 "source": "automated-smart-contract-2025", 5101 "target": "repocoder-repositorylevel-code-2023" 5102 }, 5103 { 5104 "source": "automated-smart-contract-2025", 5105 "target": "soleval-benchmarking-large-2025" 5106 }, 5107 { 5108 "source": "automated-smart-contract-2025", 5109 "target": "starcoder-2023" 5110 }, 5111 { 5112 "source": "automated-structural-testing-2026", 5113 "target": "uc-berkeley-mast-2025" 5114 }, 5115 { 5116 "source": "automated-structural-testing-2026", 5117 "target": "testdriven-development-llmbased-2024" 5118 }, 5119 { 5120 "source": "automated-structural-testing-2026", 5121 "target": "judging-llmasajudge-mtbench-2023" 5122 }, 5123 { 5124 "source": "automated-test-case-2024", 5125 "target": "llms-se-systematic-review-2023" 5126 }, 5127 { 5128 "source": "automated-test-case-2024", 5129 "target": "codex-humaneval-2021" 5130 }, 5131 { 5132 "source": "automated-test-case-2024", 5133 "target": "impact-code-language-2023" 5134 }, 5135 { 5136 "source": "automated-test-case-2024", 5137 "target": "survey-learningbased-automated-2023" 5138 }, 5139 { 5140 "source": "automated-test-case-2024", 5141 "target": "less-training-more-2022" 5142 }, 5143 { 5144 "source": "automated-test-generation-2024", 5145 "target": "autogen-multi-agent-2023" 5146 }, 5147 { 5148 "source": "automated-test-generation-2024", 5149 "target": "agentbench-evaluating-llms-2023" 5150 }, 5151 { 5152 "source": "automated-test-generation-2024", 5153 "target": "toolllm-facilitating-large-2023" 5154 }, 5155 { 5156 "source": "automated-test-generation-2024", 5157 "target": "gorilla-large-language-2023" 5158 }, 5159 { 5160 "source": "automated-test-generation-2024", 5161 "target": "your-code-generated-2023" 5162 }, 5163 { 5164 "source": "automated-test-generation-2024", 5165 "target": "beyond-functional-correctness-2024" 5166 }, 5167 { 5168 "source": "automated-test-generation-2024", 5169 "target": "gaia-benchmark-general-2023" 5170 }, 5171 { 5172 "source": "automated-unit-test-2024", 5173 "target": "no-more-manual-2023" 5174 }, 5175 { 5176 "source": "automated-unit-test-2024", 5177 "target": "adaptive-test-generation-2023" 5178 }, 5179 { 5180 "source": "automated-unit-test-2024", 5181 "target": "codamosa-escaping-coverage-2023" 5182 }, 5183 { 5184 "source": "automated-unit-test-2024", 5185 "target": "codex-humaneval-2021" 5186 }, 5187 { 5188 "source": "automated-unit-test-2024", 5189 "target": "code-llama-2023" 5190 }, 5191 { 5192 "source": "automatic-generation-benchmarks-2024", 5193 "target": "codex-humaneval-2021" 5194 }, 5195 { 5196 "source": "automatic-generation-benchmarks-2024", 5197 "target": "crosscodeeval-diverse-multilingual-2023" 5198 }, 5199 { 5200 "source": "automatic-generation-benchmarks-2024", 5201 "target": "swe-bench-2023" 5202 }, 5203 { 5204 "source": "automatic-universal-prompt-2024", 5205 "target": "not-what-youve-2023" 5206 }, 5207 { 5208 "source": "automatic-universal-prompt-2024", 5209 "target": "formalizing-benchmarking-prompt-2023" 5210 }, 5211 { 5212 "source": "automatic-universal-prompt-2024", 5213 "target": "prompt-injection-llm-apps-2023" 5214 }, 5215 { 5216 "source": "automatic-universal-prompt-2024", 5217 "target": "jatmo-prompt-injection-2023" 5218 }, 5219 { 5220 "source": "automatically-benchmarking-code-agents-2025", 5221 "target": "swe-bench-2023" 5222 }, 5223 { 5224 "source": "automatically-benchmarking-code-agents-2025", 5225 "target": "mlebench-evaluating-machine-2024" 5226 }, 5227 { 5228 "source": "automatically-benchmarking-code-agents-2025", 5229 "target": "paperbench-evaluating-ais-2025" 5230 }, 5231 { 5232 "source": "automatically-benchmarking-code-agents-2025", 5233 "target": "agentasajudge-evaluate-agents-2024" 5234 }, 5235 { 5236 "source": "automatically-benchmarking-code-agents-2025", 5237 "target": "corecodebench-decoupling-code-2025" 5238 }, 5239 { 5240 "source": "automatically-benchmarking-code-agents-2025", 5241 "target": "agentbased-evaluation-framework-2025" 5242 }, 5243 { 5244 "source": "automatically-benchmarking-code-agents-2025", 5245 "target": "projecteval-benchmark-programming-2025" 5246 }, 5247 { 5248 "source": "automatically-benchmarking-code-agents-2025", 5249 "target": "bigcodebench-2024" 5250 }, 5251 { 5252 "source": "automatically-generating-web-2025", 5253 "target": "demystifying-llmbased-software-2025" 5254 }, 5255 { 5256 "source": "automatically-generating-web-2025", 5257 "target": "testdriven-development-llmbased-2024" 5258 }, 5259 { 5260 "source": "automatically-generating-web-2025", 5261 "target": "codex-humaneval-2021" 5262 }, 5263 { 5264 "source": "automatically-generating-web-2025", 5265 "target": "survey-code-gen-llm-agents-2025" 5266 }, 5267 { 5268 "source": "automatically-generating-web-2025", 5269 "target": "repairagent-llm-bug-repair-2024" 5270 }, 5271 { 5272 "source": "automatically-generating-web-2025", 5273 "target": "metr-rct-2025" 5274 }, 5275 { 5276 "source": "automatically-surfacing-opportunities-2025", 5277 "target": "uc-berkeley-mast-2025" 5278 }, 5279 { 5280 "source": "automatically-surfacing-opportunities-2025", 5281 "target": "llama-3-herd-2024" 5282 }, 5283 { 5284 "source": "automating-deception-scalable-2025", 5285 "target": "judging-llmasajudge-mtbench-2023" 5286 }, 5287 { 5288 "source": "automating-rest-api-2024", 5289 "target": "automated-unit-test-2024" 5290 }, 5291 { 5292 "source": "automating-structural-engineering-2025", 5293 "target": "autogen-multi-agent-2023" 5294 }, 5295 { 5296 "source": "automating-structural-engineering-2025", 5297 "target": "uc-berkeley-mast-2025" 5298 }, 5299 { 5300 "source": "automating-structural-engineering-2025", 5301 "target": "magenticone-generalist-multiagent-2024" 5302 }, 5303 { 5304 "source": "automating-structural-engineering-2025", 5305 "target": "chatdev-communicative-agents-2023" 5306 }, 5307 { 5308 "source": "automating-structural-engineering-2025", 5309 "target": "metagpt-multi-agent-framework-2023" 5310 }, 5311 { 5312 "source": "automating-structural-engineering-2025", 5313 "target": "hyperagent-generalist-software-2024" 5314 }, 5315 { 5316 "source": "automating-structural-engineering-2025", 5317 "target": "react-synergizing-reasoning-2022" 5318 }, 5319 { 5320 "source": "automating-structural-engineering-2025", 5321 "target": "tree-thoughts-deliberate-2023" 5322 }, 5323 { 5324 "source": "automating-structural-engineering-2025", 5325 "target": "compounding-reliability-2025" 5326 }, 5327 { 5328 "source": "automation-ai-intergenerational-2025", 5329 "target": "generative-ai-at-2023" 5330 }, 5331 { 5332 "source": "automation-ai-intergenerational-2025", 5333 "target": "experimental-evidence-productivity-2023" 5334 }, 5335 { 5336 "source": "automation-ai-intergenerational-2025", 5337 "target": "copilot-productivity-controlled-2023" 5338 }, 5339 { 5340 "source": "autonomous-normative-multiagent-2025", 5341 "target": "metagpt-multi-agent-framework-2023" 5342 }, 5343 { 5344 "source": "autonomous-normative-multiagent-2025", 5345 "target": "chatdev-communicative-agents-2023" 5346 }, 5347 { 5348 "source": "autonomous-normative-multiagent-2025", 5349 "target": "large-language-model-2024" 5350 }, 5351 { 5352 "source": "autonomous-normative-multiagent-2025", 5353 "target": "survey-llm-code-generation-2025" 5354 }, 5355 { 5356 "source": "autonomous-normative-multiagent-2025", 5357 "target": "survey-hallucination-large-2023-2" 5358 }, 5359 { 5360 "source": "autonomous-supplier-evaluation-2025", 5361 "target": "attention-is-all-you-need-2017" 5362 }, 5363 { 5364 "source": "autop2c-llmbased-agent-2025", 5365 "target": "metagpt-multi-agent-framework-2023" 5366 }, 5367 { 5368 "source": "autop2c-llmbased-agent-2025", 5369 "target": "autogen-multi-agent-2023" 5370 }, 5371 { 5372 "source": "autop2c-llmbased-agent-2025", 5373 "target": "swe-agent-2024" 5374 }, 5375 { 5376 "source": "autop2c-llmbased-agent-2025", 5377 "target": "your-code-generated-2023" 5378 }, 5379 { 5380 "source": "autop2c-llmbased-agent-2025", 5381 "target": "paperbench-evaluating-ais-2025" 5382 }, 5383 { 5384 "source": "autop2c-llmbased-agent-2025", 5385 "target": "judging-llmasajudge-mtbench-2023" 5386 }, 5387 { 5388 "source": "autop2c-llmbased-agent-2025", 5389 "target": "deepseek-r1-2025" 5390 }, 5391 { 5392 "source": "autostreampipe-llm-assisted-2025", 5393 "target": "chain-of-thought-prompting-2022" 5394 }, 5395 { 5396 "source": "autostreampipe-llm-assisted-2025", 5397 "target": "tree-thoughts-deliberate-2023" 5398 }, 5399 { 5400 "source": "autostreampipe-llm-assisted-2025", 5401 "target": "gpt4-technical-report-2023" 5402 }, 5403 { 5404 "source": "autostreampipe-llm-assisted-2025", 5405 "target": "code-llama-2023" 5406 }, 5407 { 5408 "source": "autostreampipe-llm-assisted-2025", 5409 "target": "alphacode-competition-level-2022" 5410 }, 5411 { 5412 "source": "autostreampipe-llm-assisted-2025", 5413 "target": "survey-code-gen-llm-agents-2025" 5414 }, 5415 { 5416 "source": "autostreampipe-llm-assisted-2025", 5417 "target": "starcoder-2023" 5418 }, 5419 { 5420 "source": "autostreampipe-llm-assisted-2025", 5421 "target": "autoflow-automated-workflow-2024" 5422 }, 5423 { 5424 "source": "autotom-scaling-modelbased-2025", 5425 "target": "deepseek-r1-2025" 5426 }, 5427 { 5428 "source": "autotom-scaling-modelbased-2025", 5429 "target": "gpt4-technical-report-2023" 5430 }, 5431 { 5432 "source": "autotom-scaling-modelbased-2025", 5433 "target": "llama-3-herd-2024" 5434 }, 5435 { 5436 "source": "autovcoder-systematic-framework-2024", 5437 "target": "codex-humaneval-2021" 5438 }, 5439 { 5440 "source": "autovcoder-systematic-framework-2024", 5441 "target": "gpt4-technical-report-2023" 5442 }, 5443 { 5444 "source": "autovcoder-systematic-framework-2024", 5445 "target": "verilogeval-evaluating-large-2023" 5446 }, 5447 { 5448 "source": "autovcoder-systematic-framework-2024", 5449 "target": "benchmarking-large-language-2022" 5450 }, 5451 { 5452 "source": "autovcoder-systematic-framework-2024", 5453 "target": "code-llama-2023" 5454 }, 5455 { 5456 "source": "autovcoder-systematic-framework-2024", 5457 "target": "deepseek-coder-2024" 5458 }, 5459 { 5460 "source": "autoverus-automated-proof-2024", 5461 "target": "codex-humaneval-2021" 5462 }, 5463 { 5464 "source": "autoverus-automated-proof-2024", 5465 "target": "your-code-generated-2023" 5466 }, 5467 { 5468 "source": "autoverus-automated-proof-2024", 5469 "target": "automated-program-repair-2023" 5470 }, 5471 { 5472 "source": "awcp-workspace-delegation-2026", 5473 "target": "survey-autonomous-llm-agents-2023" 5474 }, 5475 { 5476 "source": "awcp-workspace-delegation-2026", 5477 "target": "uc-berkeley-mast-2025" 5478 }, 5479 { 5480 "source": "awcp-workspace-delegation-2026", 5481 "target": "autogen-multi-agent-2023" 5482 }, 5483 { 5484 "source": "awcp-workspace-delegation-2026", 5485 "target": "swe-bench-2023" 5486 }, 5487 { 5488 "source": "awcp-workspace-delegation-2026", 5489 "target": "swe-agent-2024" 5490 }, 5491 { 5492 "source": "awcp-workspace-delegation-2026", 5493 "target": "openhands-ai-sw-agent-2024" 5494 }, 5495 { 5496 "source": "awcp-workspace-delegation-2026", 5497 "target": "metagpt-multi-agent-framework-2023" 5498 }, 5499 { 5500 "source": "awcp-workspace-delegation-2026", 5501 "target": "engineering-multiagent-llms-2025" 5502 }, 5503 { 5504 "source": "backdoor-attribution-elucidating-2025", 5505 "target": "sleeper-agents-2024" 5506 }, 5507 { 5508 "source": "backdoor-attribution-elucidating-2025", 5509 "target": "trustworthy-llm-agents-survey-2025" 5510 }, 5511 { 5512 "source": "backdoor-samples-detection-2025", 5513 "target": "sleeper-agents-2024" 5514 }, 5515 { 5516 "source": "backdoored-retrievers-prompt-2024", 5517 "target": "not-what-youve-2023" 5518 }, 5519 { 5520 "source": "backdoored-retrievers-prompt-2024", 5521 "target": "prompt-injection-llm-apps-2023" 5522 }, 5523 { 5524 "source": "backdoored-retrievers-prompt-2024", 5525 "target": "survey-large-language-2023" 5526 }, 5527 { 5528 "source": "backdooring-bias-large-2026", 5529 "target": "sleeper-agents-2024" 5530 }, 5531 { 5532 "source": "backdooring-bias-large-2026", 5533 "target": "poisoning-attacks-llms-2025" 5534 }, 5535 { 5536 "source": "backdooring-bias-large-2026", 5537 "target": "judging-llmasajudge-mtbench-2023" 5538 }, 5539 { 5540 "source": "backdoorpowered-prompt-injection-2025", 5541 "target": "sleeper-agents-2024" 5542 }, 5543 { 5544 "source": "backdoorpowered-prompt-injection-2025", 5545 "target": "secalign-defending-against-2024" 5546 }, 5547 { 5548 "source": "backdoorpowered-prompt-injection-2025", 5549 "target": "formalizing-benchmarking-prompt-2023" 5550 }, 5551 { 5552 "source": "backportbench-multilingual-benchmark-2025", 5553 "target": "swe-bench-2023" 5554 }, 5555 { 5556 "source": "backportbench-multilingual-benchmark-2025", 5557 "target": "swe-agent-2024" 5558 }, 5559 { 5560 "source": "backportbench-multilingual-benchmark-2025", 5561 "target": "agentless-2024" 5562 }, 5563 { 5564 "source": "backportbench-multilingual-benchmark-2025", 5565 "target": "codex-humaneval-2021" 5566 }, 5567 { 5568 "source": "backportbench-multilingual-benchmark-2025", 5569 "target": "openhands-ai-sw-agent-2024" 5570 }, 5571 { 5572 "source": "backportbench-multilingual-benchmark-2025", 5573 "target": "llm-agents-se-survey-2024" 5574 }, 5575 { 5576 "source": "backportbench-multilingual-benchmark-2025", 5577 "target": "survey-code-gen-llm-agents-2025" 5578 }, 5579 { 5580 "source": "bamas-structuring-budgetaware-2025", 5581 "target": "autogen-multi-agent-2023" 5582 }, 5583 { 5584 "source": "bamas-structuring-budgetaware-2025", 5585 "target": "metagpt-multi-agent-framework-2023" 5586 }, 5587 { 5588 "source": "bamas-structuring-budgetaware-2025", 5589 "target": "chatdev-communicative-agents-2023" 5590 }, 5591 { 5592 "source": "bamas-structuring-budgetaware-2025", 5593 "target": "reflexion-language-agents-2023" 5594 }, 5595 { 5596 "source": "bamas-structuring-budgetaware-2025", 5597 "target": "react-synergizing-reasoning-2022" 5598 }, 5599 { 5600 "source": "bamas-structuring-budgetaware-2025", 5601 "target": "tree-thoughts-deliberate-2023" 5602 }, 5603 { 5604 "source": "bamas-structuring-budgetaware-2025", 5605 "target": "llm-agents-se-survey-2024" 5606 }, 5607 { 5608 "source": "bamboo-comprehensive-benchmark-2023", 5609 "target": "codex-humaneval-2021" 5610 }, 5611 { 5612 "source": "bamboo-comprehensive-benchmark-2023", 5613 "target": "lost-middle-how-2023" 5614 }, 5615 { 5616 "source": "bamboo-comprehensive-benchmark-2023", 5617 "target": "judging-llmasajudge-mtbench-2023" 5618 }, 5619 { 5620 "source": "banglaforge-llm-collaboration-2025", 5621 "target": "codex-humaneval-2021" 5622 }, 5623 { 5624 "source": "banglaforge-llm-collaboration-2025", 5625 "target": "tigercoder-novel-suite-2025" 5626 }, 5627 { 5628 "source": "bashexplainer-retrievalaugmented-bash-2022", 5629 "target": "codebert-pretrained-model-2020" 5630 }, 5631 { 5632 "source": "battleagentbench-benchmark-evaluating-2024", 5633 "target": "agentbench-evaluating-llms-2023" 5634 }, 5635 { 5636 "source": "battleagentbench-benchmark-evaluating-2024", 5637 "target": "autogen-multi-agent-2023" 5638 }, 5639 { 5640 "source": "battleagentbench-benchmark-evaluating-2024", 5641 "target": "large-language-model-2024" 5642 }, 5643 { 5644 "source": "battleagentbench-benchmark-evaluating-2024", 5645 "target": "building-cooperative-embodied-2023" 5646 }, 5647 { 5648 "source": "battleagentbench-benchmark-evaluating-2024", 5649 "target": "gpt4-technical-report-2023" 5650 }, 5651 { 5652 "source": "beavertails-improved-safety-2023", 5653 "target": "constitutional-ai-2022" 5654 }, 5655 { 5656 "source": "behavior-alignment-new-2024", 5657 "target": "emergent-abilities-large-2022" 5658 }, 5659 { 5660 "source": "bench-benchmark-toolagentuser-2024", 5661 "target": "swe-bench-2023" 5662 }, 5663 { 5664 "source": "bench-benchmark-toolagentuser-2024", 5665 "target": "agentbench-evaluating-llms-2023" 5666 }, 5667 { 5668 "source": "bench-benchmark-toolagentuser-2024", 5669 "target": "webarena-autonomous-agents-2023" 5670 }, 5671 { 5672 "source": "bench-benchmark-toolagentuser-2024", 5673 "target": "react-synergizing-reasoning-2022" 5674 }, 5675 { 5676 "source": "bench-benchmark-toolagentuser-2024", 5677 "target": "codex-humaneval-2021" 5678 }, 5679 { 5680 "source": "bench-benchmark-toolagentuser-2024", 5681 "target": "autogen-multi-agent-2023" 5682 }, 5683 { 5684 "source": "bench-benchmark-toolagentuser-2024", 5685 "target": "generative-agents-interactive-2023" 5686 }, 5687 { 5688 "source": "bench-benchmark-toolagentuser-2024", 5689 "target": "toolformer-language-models-2023" 5690 }, 5691 { 5692 "source": "benchmark-contamination-survey-2024", 5693 "target": "codex-humaneval-2021" 5694 }, 5695 { 5696 "source": "benchmark-contamination-survey-2024", 5697 "target": "quantifying-contamination-evaluating-2024" 5698 }, 5699 { 5700 "source": "benchmark-contamination-survey-2024", 5701 "target": "livecodebench-2024" 5702 }, 5703 { 5704 "source": "benchmark-contamination-survey-2024", 5705 "target": "top-leaderboard-ranking-2024" 5706 }, 5707 { 5708 "source": "benchmark-contamination-survey-2024", 5709 "target": "chatbot-arena-open-2024" 5710 }, 5711 { 5712 "source": "benchmark-contamination-survey-2024", 5713 "target": "rethinking-benchmark-contamination-2023" 5714 }, 5715 { 5716 "source": "benchmark-contamination-survey-2024", 5717 "target": "concerned-data-contamination-2024" 5718 }, 5719 { 5720 "source": "benchmark-contamination-survey-2024", 5721 "target": "judging-llmasajudge-mtbench-2023" 5722 }, 5723 { 5724 "source": "benchmark-expertlevel-academic-2025", 5725 "target": "mmlu-measuring-massive-2020" 5726 }, 5727 { 5728 "source": "benchmark-expertlevel-academic-2025", 5729 "target": "gpqa-graduatelevel-googleproof-2023" 5730 }, 5731 { 5732 "source": "benchmark-expertlevel-academic-2025", 5733 "target": "swe-bench-2023" 5734 }, 5735 { 5736 "source": "benchmark-expertlevel-academic-2025", 5737 "target": "frontiermath-benchmark-evaluating-2024" 5738 }, 5739 { 5740 "source": "benchmark-expertlevel-academic-2025", 5741 "target": "codex-humaneval-2021" 5742 }, 5743 { 5744 "source": "benchmark-expertlevel-academic-2025", 5745 "target": "mlebench-evaluating-machine-2024" 5746 }, 5747 { 5748 "source": "benchmark-expertlevel-academic-2025", 5749 "target": "rebench-evaluating-frontier-2024" 5750 }, 5751 { 5752 "source": "benchmark-test-time-scaling-agents-2026", 5753 "target": "swe-bench-2023" 5754 }, 5755 { 5756 "source": "benchmark-test-time-scaling-agents-2026", 5757 "target": "swe-bench-plus-2024" 5758 }, 5759 { 5760 "source": "benchmark-test-time-scaling-agents-2026", 5761 "target": "swe-agent-2024" 5762 }, 5763 { 5764 "source": "benchmark-test-time-scaling-agents-2026", 5765 "target": "compute-optimal-inference-2024" 5766 }, 5767 { 5768 "source": "benchmark-test-time-scaling-agents-2026", 5769 "target": "deepseek-r1-2025" 5770 }, 5771 { 5772 "source": "benchmark-test-time-scaling-agents-2026", 5773 "target": "gaia-benchmark-general-2023" 5774 }, 5775 { 5776 "source": "benchmark-test-time-scaling-agents-2026", 5777 "target": "agentbench-evaluating-llms-2023" 5778 }, 5779 { 5780 "source": "benchmarking-ai-models-2025", 5781 "target": "codex-humaneval-2021" 5782 }, 5783 { 5784 "source": "benchmarking-ai-models-2025", 5785 "target": "your-code-generated-2023" 5786 }, 5787 { 5788 "source": "benchmarking-ai-models-2025", 5789 "target": "livecodebench-2024" 5790 }, 5791 { 5792 "source": "benchmarking-ai-models-2025", 5793 "target": "swe-bench-2023" 5794 }, 5795 { 5796 "source": "benchmarking-ai-models-2025", 5797 "target": "agentbench-evaluating-llms-2023" 5798 }, 5799 { 5800 "source": "benchmarking-ai-models-2025", 5801 "target": "top-leaderboard-ranking-2024" 5802 }, 5803 { 5804 "source": "benchmarking-ai-models-2025", 5805 "target": "codeelo-benchmarking-competitionlevel-2025" 5806 }, 5807 { 5808 "source": "benchmarking-ai-models-2025-2", 5809 "target": "codex-humaneval-2021" 5810 }, 5811 { 5812 "source": "benchmarking-ai-models-2025-2", 5813 "target": "your-code-generated-2023" 5814 }, 5815 { 5816 "source": "benchmarking-ai-models-2025-2", 5817 "target": "livecodebench-2024" 5818 }, 5819 { 5820 "source": "benchmarking-ai-models-2025-2", 5821 "target": "swe-bench-2023" 5822 }, 5823 { 5824 "source": "benchmarking-ai-models-2025-2", 5825 "target": "agentbench-evaluating-llms-2023" 5826 }, 5827 { 5828 "source": "benchmarking-ai-models-2025-2", 5829 "target": "bigcodebench-2024" 5830 }, 5831 { 5832 "source": "benchmarking-ai-models-2025-2", 5833 "target": "top-leaderboard-ranking-2024" 5834 }, 5835 { 5836 "source": "benchmarking-ai-models-2025-2", 5837 "target": "alphacode-competition-level-2022" 5838 }, 5839 { 5840 "source": "benchmarking-epistemology-construct-2025", 5841 "target": "emergent-abilities-mirage-2023" 5842 }, 5843 { 5844 "source": "benchmarking-hallucination-large-2024", 5845 "target": "chain-of-thought-prompting-2022" 5846 }, 5847 { 5848 "source": "benchmarking-hallucination-large-2024", 5849 "target": "gpt4-technical-report-2023" 5850 }, 5851 { 5852 "source": "benchmarking-large-language-2022", 5853 "target": "codex-humaneval-2021" 5854 }, 5855 { 5856 "source": "benchmarking-large-language-2024", 5857 "target": "codex-humaneval-2021" 5858 }, 5859 { 5860 "source": "benchmarking-large-language-2024", 5861 "target": "frontiermath-benchmark-evaluating-2024" 5862 }, 5863 { 5864 "source": "benchmarking-large-language-2024", 5865 "target": "arcs-agentic-retrievalaugmented-2025" 5866 }, 5867 { 5868 "source": "benchmarking-large-language-2024", 5869 "target": "enhancing-code-translation-2024" 5870 }, 5871 { 5872 "source": "benchmarking-llms-unit-2025", 5873 "target": "testgeneval-real-world-2024" 5874 }, 5875 { 5876 "source": "benchmarking-llms-unit-2025", 5877 "target": "swe-bench-2023" 5878 }, 5879 { 5880 "source": "benchmarking-llms-unit-2025", 5881 "target": "swtbench-testing-validating-2024" 5882 }, 5883 { 5884 "source": "benchmarking-llms-unit-2025", 5885 "target": "lessleakbench-first-investigation-2025" 5886 }, 5887 { 5888 "source": "benchmarking-llms-unit-2025", 5889 "target": "codamosa-escaping-coverage-2023" 5890 }, 5891 { 5892 "source": "benchmarking-llms-unit-2025", 5893 "target": "bigcodebench-2024" 5894 }, 5895 { 5896 "source": "benchmarking-llms-unit-2025", 5897 "target": "llms-se-systematic-review-2023" 5898 }, 5899 { 5900 "source": "benchmarking-llms-unit-2025", 5901 "target": "reflexion-language-agents-2023" 5902 }, 5903 { 5904 "source": "benchmarking-llms-unit-2025", 5905 "target": "largescale-independent-comprehensive-2024" 5906 }, 5907 { 5908 "source": "benchmarks-automated-commonsense-2023", 5909 "target": "beyond-imitation-game-2022" 5910 }, 5911 { 5912 "source": "bert-pretraining-deep-2018", 5913 "target": "attention-is-all-you-need-2017" 5914 }, 5915 { 5916 "source": "best-practices-ai-2023", 5917 "target": "gpt4-technical-report-2023" 5918 }, 5919 { 5920 "source": "beyond-automation-job-redesign-2025", 5921 "target": "experimental-evidence-productivity-2023" 5922 }, 5923 { 5924 "source": "beyond-automation-job-redesign-2025", 5925 "target": "generative-ai-at-2023" 5926 }, 5927 { 5928 "source": "beyond-automation-job-redesign-2025", 5929 "target": "canaries-coal-mine-2025" 5930 }, 5931 { 5932 "source": "beyond-benchmark-innovative-2025", 5933 "target": "formalizing-benchmarking-prompt-2023" 5934 }, 5935 { 5936 "source": "beyond-benchmark-innovative-2025", 5937 "target": "automatic-universal-prompt-2024" 5938 }, 5939 { 5940 "source": "beyond-benchmark-innovative-2025", 5941 "target": "ai-scientist-fully-2024" 5942 }, 5943 { 5944 "source": "beyond-chinchillaoptimal-accounting-2023", 5945 "target": "chinchilla-compute-optimal-2022" 5946 }, 5947 { 5948 "source": "beyond-chinchillaoptimal-accounting-2023", 5949 "target": "scaling-laws-2020" 5950 }, 5951 { 5952 "source": "beyond-chinchillaoptimal-accounting-2023", 5953 "target": "llama-open-efficient-2023" 5954 }, 5955 { 5956 "source": "beyond-chunks-graphs-2025", 5957 "target": "react-synergizing-reasoning-2022" 5958 }, 5959 { 5960 "source": "beyond-commit-developer-perspectives-2026", 5961 "target": "copilot-productivity-controlled-2023" 5962 }, 5963 { 5964 "source": "beyond-commit-developer-perspectives-2026", 5965 "target": "code-me-me-2025" 5966 }, 5967 { 5968 "source": "beyond-correctness-benchmarking-2024", 5969 "target": "codex-humaneval-2021" 5970 }, 5971 { 5972 "source": "beyond-correctness-benchmarking-2024", 5973 "target": "your-code-generated-2023" 5974 }, 5975 { 5976 "source": "beyond-correctness-benchmarking-2024", 5977 "target": "livecodebench-2024" 5978 }, 5979 { 5980 "source": "beyond-correctness-benchmarking-2024", 5981 "target": "quantifying-contamination-evaluating-2024" 5982 }, 5983 { 5984 "source": "beyond-correctness-benchmarking-2024", 5985 "target": "classeval-manuallycrafted-benchmark-2023" 5986 }, 5987 { 5988 "source": "beyond-correctness-benchmarking-2024", 5989 "target": "deepseek-coder-2024" 5990 }, 5991 { 5992 "source": "beyond-correctness-benchmarking-2024", 5993 "target": "reflexion-language-agents-2023" 5994 }, 5995 { 5996 "source": "beyond-correctness-benchmarking-2024", 5997 "target": "code-llama-2023" 5998 }, 5999 { 6000 "source": "beyond-correctness-benchmarking-2024", 6001 "target": "wizardcoder-empowering-code-2023" 6002 }, 6003 { 6004 "source": "beyond-correctness-rewarding-2025", 6005 "target": "deepseek-r1-2025" 6006 }, 6007 { 6008 "source": "beyond-functional-correctness-2024", 6009 "target": "codex-humaneval-2021" 6010 }, 6011 { 6012 "source": "beyond-functional-correctness-2024", 6013 "target": "codemirage-hallucinations-code-2024" 6014 }, 6015 { 6016 "source": "beyond-functional-correctness-2024", 6017 "target": "collubench-benchmark-predicting-2024" 6018 }, 6019 { 6020 "source": "beyond-functional-correctness-2024", 6021 "target": "llm-hallucinations-code-practical-2024" 6022 }, 6023 { 6024 "source": "beyond-functional-correctness-2024", 6025 "target": "what-wrong-your-2024" 6026 }, 6027 { 6028 "source": "beyond-functional-correctness-2024", 6029 "target": "bugs-large-language-2024" 6030 }, 6031 { 6032 "source": "beyond-functional-correctness-2024", 6033 "target": "your-code-generated-2023" 6034 }, 6035 { 6036 "source": "beyond-functional-correctness-2024", 6037 "target": "deepseek-r1-2025" 6038 }, 6039 { 6040 "source": "beyond-functional-correctness-2024", 6041 "target": "survey-hallucination-large-2023-2" 6042 }, 6043 { 6044 "source": "beyond-hype-comprehensive-2024", 6045 "target": "how-far-we-2023" 6046 }, 6047 { 6048 "source": "beyond-hype-comprehensive-2024", 6049 "target": "how-beginning-programmers-2024" 6050 }, 6051 { 6052 "source": "beyond-imitation-game-2022", 6053 "target": "codex-humaneval-2021" 6054 }, 6055 { 6056 "source": "beyond-imitation-game-2022", 6057 "target": "palm-scaling-language-2022" 6058 }, 6059 { 6060 "source": "beyond-imitation-game-2022", 6061 "target": "scaling-laws-2020" 6062 }, 6063 { 6064 "source": "beyond-imitation-game-2022", 6065 "target": "mmlu-measuring-massive-2020" 6066 }, 6067 { 6068 "source": "beyond-imitation-game-2022", 6069 "target": "chinchilla-compute-optimal-2022" 6070 }, 6071 { 6072 "source": "beyond-imitation-game-2022", 6073 "target": "lamda-language-models-2022" 6074 }, 6075 { 6076 "source": "beyond-mimicry-preference-2025", 6077 "target": "emergent-abilities-mirage-2023" 6078 }, 6079 { 6080 "source": "beyond-mimicry-preference-2025", 6081 "target": "alignment-faking-2024" 6082 }, 6083 { 6084 "source": "beyond-promptinduced-lies-2025", 6085 "target": "alignment-faking-2024" 6086 }, 6087 { 6088 "source": "beyond-promptinduced-lies-2025", 6089 "target": "sleeper-agents-2024" 6090 }, 6091 { 6092 "source": "beyond-quantity-trajectory-2026", 6093 "target": "gorilla-large-language-2023" 6094 }, 6095 { 6096 "source": "beyond-quantity-trajectory-2026", 6097 "target": "toolllm-facilitating-large-2023" 6098 }, 6099 { 6100 "source": "beyond-quantity-trajectory-2026", 6101 "target": "swerebench-automated-pipeline-2025" 6102 }, 6103 { 6104 "source": "beyond-quantity-trajectory-2026", 6105 "target": "livecodebench-2024" 6106 }, 6107 { 6108 "source": "beyond-quantity-trajectory-2026", 6109 "target": "react-synergizing-reasoning-2022" 6110 }, 6111 { 6112 "source": "beyond-quantity-trajectory-2026", 6113 "target": "toolformer-language-models-2023" 6114 }, 6115 { 6116 "source": "beyond-quantity-trajectory-2026", 6117 "target": "survey-code-gen-llm-agents-2025" 6118 }, 6119 { 6120 "source": "beyond-singleagent-safety-2025", 6121 "target": "uc-berkeley-mast-2025" 6122 }, 6123 { 6124 "source": "beyond-singleagent-safety-2025", 6125 "target": "multiagent-risks-from-2025" 6126 }, 6127 { 6128 "source": "beyond-singleagent-safety-2025", 6129 "target": "autogen-multi-agent-2023" 6130 }, 6131 { 6132 "source": "beyond-singleagent-safety-2025", 6133 "target": "swe-agent-2024" 6134 }, 6135 { 6136 "source": "beyond-singleagent-safety-2025", 6137 "target": "voyager-open-ended-2023" 6138 }, 6139 { 6140 "source": "beyond-static-datasets-2023", 6141 "target": "palm-scaling-language-2022" 6142 }, 6143 { 6144 "source": "beyond-static-datasets-2023", 6145 "target": "gpt4-technical-report-2023" 6146 }, 6147 { 6148 "source": "beyond-static-datasets-2023", 6149 "target": "judging-llmasajudge-mtbench-2023" 6150 }, 6151 { 6152 "source": "beyond-static-pattern-2025", 6153 "target": "automated-program-repair-2023" 6154 }, 6155 { 6156 "source": "beyond-static-pattern-2025", 6157 "target": "your-code-generated-2023" 6158 }, 6159 { 6160 "source": "beyond-synthetic-benchmarks-2025", 6161 "target": "classeval-manuallycrafted-benchmark-2023" 6162 }, 6163 { 6164 "source": "beyond-synthetic-benchmarks-2025", 6165 "target": "codex-humaneval-2021" 6166 }, 6167 { 6168 "source": "beyond-synthetic-benchmarks-2025", 6169 "target": "your-code-generated-2023" 6170 }, 6171 { 6172 "source": "beyond-synthetic-benchmarks-2025", 6173 "target": "graphcodeagent-dual-graphguided-2025" 6174 }, 6175 { 6176 "source": "beyond-synthetic-benchmarks-2025", 6177 "target": "retrievalaugmented-code-generation-2025" 6178 }, 6179 { 6180 "source": "beyond-synthetic-benchmarks-2025", 6181 "target": "bugs-large-language-2024" 6182 }, 6183 { 6184 "source": "beyond-synthetic-benchmarks-2025", 6185 "target": "coderagbench-can-retrieval-2024" 6186 }, 6187 { 6188 "source": "beyond-synthetic-benchmarks-2025", 6189 "target": "survey-llm-code-generation-2025" 6190 }, 6191 { 6192 "source": "beyond-synthetic-benchmarks-2025", 6193 "target": "repairagent-llm-bug-repair-2024" 6194 }, 6195 { 6196 "source": "beyond-textual-context-2025", 6197 "target": "gpt4-technical-report-2023" 6198 }, 6199 { 6200 "source": "beyond-token-probes-2025", 6201 "target": "selfcheckgpt-zeroresource-blackbox-2023" 6202 }, 6203 { 6204 "source": "bias-assessment-mitigation-2023", 6205 "target": "codex-humaneval-2021" 6206 }, 6207 { 6208 "source": "bias-assessment-mitigation-2023", 6209 "target": "swe-bench-2023" 6210 }, 6211 { 6212 "source": "bias-assessment-mitigation-2023", 6213 "target": "your-code-generated-2023" 6214 }, 6215 { 6216 "source": "bias-assessment-mitigation-2023", 6217 "target": "codamosa-escaping-coverage-2023" 6218 }, 6219 { 6220 "source": "bias-unveiled-investigating-2024", 6221 "target": "codex-humaneval-2021" 6222 }, 6223 { 6224 "source": "bias-unveiled-investigating-2024", 6225 "target": "starcoder-2023" 6226 }, 6227 { 6228 "source": "bias-unveiled-investigating-2024", 6229 "target": "code-llama-2023" 6230 }, 6231 { 6232 "source": "biasalert-plugandplay-tool-2024", 6233 "target": "beavertails-improved-safety-2023" 6234 }, 6235 { 6236 "source": "biasalert-plugandplay-tool-2024", 6237 "target": "gpt4-technical-report-2023" 6238 }, 6239 { 6240 "source": "bigcodebench-2024", 6241 "target": "codex-humaneval-2021" 6242 }, 6243 { 6244 "source": "bigcodebench-2024", 6245 "target": "swe-bench-2023" 6246 }, 6247 { 6248 "source": "bigcodebench-2024", 6249 "target": "livecodebench-2024" 6250 }, 6251 { 6252 "source": "bigcodebench-2024", 6253 "target": "your-code-generated-2023" 6254 }, 6255 { 6256 "source": "bigcodebench-2024", 6257 "target": "code-llama-2023" 6258 }, 6259 { 6260 "source": "bigcodebench-2024", 6261 "target": "starcoder2-2024" 6262 }, 6263 { 6264 "source": "bigcodebench-2024", 6265 "target": "swe-agent-2024" 6266 }, 6267 { 6268 "source": "bigrpo-bidirectional-optimization-2025", 6269 "target": "sleeper-agents-2024" 6270 }, 6271 { 6272 "source": "bigrpo-bidirectional-optimization-2025", 6273 "target": "deepseek-r1-2025" 6274 }, 6275 { 6276 "source": "bigrpo-bidirectional-optimization-2025", 6277 "target": "survey-large-language-2023" 6278 }, 6279 { 6280 "source": "bioplanner-automatic-evaluation-2023", 6281 "target": "voyager-open-ended-2023" 6282 }, 6283 { 6284 "source": "bioplanner-automatic-evaluation-2023", 6285 "target": "sparks-agi-early-2023" 6286 }, 6287 { 6288 "source": "bioplanner-automatic-evaluation-2023", 6289 "target": "toolformer-language-models-2023" 6290 }, 6291 { 6292 "source": "bioplanner-automatic-evaluation-2023", 6293 "target": "tree-thoughts-deliberate-2023" 6294 }, 6295 { 6296 "source": "bioplanner-automatic-evaluation-2023", 6297 "target": "gpt4-technical-report-2023" 6298 }, 6299 { 6300 "source": "bioragent-retrievalaugmented-generation-2024", 6301 "target": "gemini-15-technical-report-2024" 6302 }, 6303 { 6304 "source": "biotrouble-multiagent-workflow-2026", 6305 "target": "survey-autonomous-llm-agents-2023" 6306 }, 6307 { 6308 "source": "biotrouble-multiagent-workflow-2026", 6309 "target": "judging-llmasajudge-mtbench-2023" 6310 }, 6311 { 6312 "source": "biotrouble-multiagent-workflow-2026", 6313 "target": "rise-potential-large-2023" 6314 }, 6315 { 6316 "source": "bitsaicr-automated-code-2025", 6317 "target": "aiassisted-assessment-coding-2024" 6318 }, 6319 { 6320 "source": "bitsaicr-automated-code-2025", 6321 "target": "aipowered-code-review-2024" 6322 }, 6323 { 6324 "source": "bitsaicr-automated-code-2025", 6325 "target": "llm-critics-help-2024" 6326 }, 6327 { 6328 "source": "bitsaicr-automated-code-2025", 6329 "target": "judging-llmasajudge-mtbench-2023" 6330 }, 6331 { 6332 "source": "blockdialect-blockwise-finegrained-2025", 6333 "target": "smoothquant-accurate-efficient-2022" 6334 }, 6335 { 6336 "source": "boosting-llm-reasoning-2025", 6337 "target": "deepseek-r1-2025" 6338 }, 6339 { 6340 "source": "boosting-llm-reasoning-2025", 6341 "target": "reflexion-language-agents-2023" 6342 }, 6343 { 6344 "source": "boosting-llm-reasoning-2025", 6345 "target": "autogen-multi-agent-2023" 6346 }, 6347 { 6348 "source": "boosting-redundancybased-automated-2023", 6349 "target": "less-training-more-2022" 6350 }, 6351 { 6352 "source": "boosting-redundancybased-automated-2023", 6353 "target": "copiloting-copilots-fusing-2023" 6354 }, 6355 { 6356 "source": "boosting-redundancybased-automated-2023", 6357 "target": "gamma-revisiting-templatebased-2023" 6358 }, 6359 { 6360 "source": "boosting-redundancybased-automated-2023", 6361 "target": "impact-code-language-2023" 6362 }, 6363 { 6364 "source": "boosting-redundancybased-automated-2023", 6365 "target": "automated-program-repair-2022" 6366 }, 6367 { 6368 "source": "boosting-redundancybased-automated-2023", 6369 "target": "systematic-literature-review-2024" 6370 }, 6371 { 6372 "source": "boosting-redundancybased-automated-2023", 6373 "target": "hybrid-automated-program-2024" 6374 }, 6375 { 6376 "source": "bottomup-domainspecific-superintelligence-2025", 6377 "target": "deepseek-r1-2025" 6378 }, 6379 { 6380 "source": "bottomup-domainspecific-superintelligence-2025", 6381 "target": "compute-optimal-inference-2024" 6382 }, 6383 { 6384 "source": "bottomup-domainspecific-superintelligence-2025", 6385 "target": "emergent-abilities-large-2022" 6386 }, 6387 { 6388 "source": "bottomup-domainspecific-superintelligence-2025", 6389 "target": "emergent-abilities-mirage-2023" 6390 }, 6391 { 6392 "source": "bottomup-domainspecific-superintelligence-2025", 6393 "target": "chain-of-thought-prompting-2022" 6394 }, 6395 { 6396 "source": "bpo-staying-close-2024", 6397 "target": "gemma-open-models-2024" 6398 }, 6399 { 6400 "source": "break-sequential-dependency-2024", 6401 "target": "accelerating-large-language-2023" 6402 }, 6403 { 6404 "source": "break-sequential-dependency-2024", 6405 "target": "fast-inference-from-2022" 6406 }, 6407 { 6408 "source": "break-sequential-dependency-2024", 6409 "target": "codex-humaneval-2021" 6410 }, 6411 { 6412 "source": "break-sequential-dependency-2024", 6413 "target": "judging-llmasajudge-mtbench-2023" 6414 }, 6415 { 6416 "source": "breaking-prompt-wall-2025", 6417 "target": "jailbreaking-safety-aligned-llms-2024" 6418 }, 6419 { 6420 "source": "breaking-prompt-wall-2025", 6421 "target": "not-what-youve-2023" 6422 }, 6423 { 6424 "source": "bridging-human-interpretation-2026", 6425 "target": "uc-berkeley-mast-2025" 6426 }, 6427 { 6428 "source": "bridging-human-interpretation-2026", 6429 "target": "autogen-multi-agent-2023" 6430 }, 6431 { 6432 "source": "bridging-human-interpretation-2026", 6433 "target": "metagpt-multi-agent-framework-2023" 6434 }, 6435 { 6436 "source": "bridging-human-interpretation-2026", 6437 "target": "chatdev-communicative-agents-2023" 6438 }, 6439 { 6440 "source": "bridging-human-interpretation-2026", 6441 "target": "measuring-agents-production-2025" 6442 }, 6443 { 6444 "source": "bridging-human-interpretation-2026", 6445 "target": "gpt4-technical-report-2023" 6446 }, 6447 { 6448 "source": "bridging-llmgenerated-code-2025", 6449 "target": "codex-humaneval-2021" 6450 }, 6451 { 6452 "source": "bridging-llmgenerated-code-2025", 6453 "target": "codejudge-evaluating-code-2024" 6454 }, 6455 { 6456 "source": "broken-neural-scaling-2022", 6457 "target": "scaling-laws-2020" 6458 }, 6459 { 6460 "source": "broken-neural-scaling-2022", 6461 "target": "emergent-abilities-large-2022" 6462 }, 6463 { 6464 "source": "broken-neural-scaling-2022", 6465 "target": "beyond-imitation-game-2022" 6466 }, 6467 { 6468 "source": "broken-neural-scaling-2022", 6469 "target": "gpt4-technical-report-2023" 6470 }, 6471 { 6472 "source": "broken-neural-scaling-2022", 6473 "target": "grokking-generalization-beyond-2022" 6474 }, 6475 { 6476 "source": "broken-neural-scaling-2022", 6477 "target": "chain-of-thought-prompting-2022" 6478 }, 6479 { 6480 "source": "browserarena-web-agents-2025", 6481 "target": "chatbot-arena-open-2024" 6482 }, 6483 { 6484 "source": "browserarena-web-agents-2025", 6485 "target": "webarena-autonomous-agents-2023" 6486 }, 6487 { 6488 "source": "browserarena-web-agents-2025", 6489 "target": "visualwebarena-evaluating-multimodal-2024" 6490 }, 6491 { 6492 "source": "browserarena-web-agents-2025", 6493 "target": "openhands-ai-sw-agent-2024" 6494 }, 6495 { 6496 "source": "browserarena-web-agents-2025", 6497 "target": "copilot-arena-platform-2025" 6498 }, 6499 { 6500 "source": "browserarena-web-agents-2025", 6501 "target": "osworld-benchmarking-multimodal-2024" 6502 }, 6503 { 6504 "source": "browsesafe-understanding-preventing-2025", 6505 "target": "agent-security-bench-2024" 6506 }, 6507 { 6508 "source": "browsesafe-understanding-preventing-2025", 6509 "target": "wasp-benchmarking-web-2025" 6510 }, 6511 { 6512 "source": "browsesafe-understanding-preventing-2025", 6513 "target": "formalizing-benchmarking-prompt-2023" 6514 }, 6515 { 6516 "source": "browsesafe-understanding-preventing-2025", 6517 "target": "adaptive-attacks-bypass-defenses-2025" 6518 }, 6519 { 6520 "source": "budgetaware-agentic-routing-2026", 6521 "target": "appworld-controllable-world-2024" 6522 }, 6523 { 6524 "source": "bugdar-aiaugmented-secure-2025", 6525 "target": "codex-humaneval-2021" 6526 }, 6527 { 6528 "source": "bugdar-aiaugmented-secure-2025", 6529 "target": "gpt4-technical-report-2023" 6530 }, 6531 { 6532 "source": "bugs-large-language-2024", 6533 "target": "codex-humaneval-2021" 6534 }, 6535 { 6536 "source": "bugs-large-language-2024", 6537 "target": "your-code-generated-2023" 6538 }, 6539 { 6540 "source": "bugs-large-language-2024", 6541 "target": "lost-translation-study-2024" 6542 }, 6543 { 6544 "source": "bugs-large-language-2024", 6545 "target": "adaptive-test-generation-2023" 6546 }, 6547 { 6548 "source": "bugs-large-language-2024", 6549 "target": "codamosa-escaping-coverage-2023" 6550 }, 6551 { 6552 "source": "build-your-personalized-2025", 6553 "target": "ai-scientist-fully-2024" 6554 }, 6555 { 6556 "source": "build-your-personalized-2025", 6557 "target": "ai-scientistv2-workshoplevel-2025" 6558 }, 6559 { 6560 "source": "build-your-personalized-2025", 6561 "target": "uc-berkeley-mast-2025" 6562 }, 6563 { 6564 "source": "build-your-personalized-2025", 6565 "target": "sleeper-agents-2024" 6566 }, 6567 { 6568 "source": "build-your-personalized-2025", 6569 "target": "react-synergizing-reasoning-2022" 6570 }, 6571 { 6572 "source": "build-your-personalized-2025", 6573 "target": "metagpt-multi-agent-framework-2023" 6574 }, 6575 { 6576 "source": "build-your-personalized-2025", 6577 "target": "adas-automated-design-2024" 6578 }, 6579 { 6580 "source": "build-your-personalized-2025", 6581 "target": "lhdeception-simulating-understanding-2025" 6582 }, 6583 { 6584 "source": "build-your-personalized-2025", 6585 "target": "large-language-model-2024" 6586 }, 6587 { 6588 "source": "building-coding-assistant-2024", 6589 "target": "codex-humaneval-2021" 6590 }, 6591 { 6592 "source": "building-coding-assistant-2024", 6593 "target": "reacc-retrievalaugmented-code-2022" 6594 }, 6595 { 6596 "source": "building-coding-assistant-2024", 6597 "target": "codebert-pretrained-model-2020" 6598 }, 6599 { 6600 "source": "building-cooperative-embodied-2023", 6601 "target": "gpt4-technical-report-2023" 6602 }, 6603 { 6604 "source": "building-cooperative-embodied-2023", 6605 "target": "generative-agents-interactive-2023" 6606 }, 6607 { 6608 "source": "building-cooperative-embodied-2023", 6609 "target": "voyager-open-ended-2023" 6610 }, 6611 { 6612 "source": "building-cooperative-embodied-2023", 6613 "target": "improving-factuality-reasoning-2023" 6614 }, 6615 { 6616 "source": "building-cooperative-embodied-2023", 6617 "target": "camel-communicative-agents-2023" 6618 }, 6619 { 6620 "source": "building-cooperative-embodied-2023", 6621 "target": "sparks-agi-early-2023" 6622 }, 6623 { 6624 "source": "building-cooperative-embodied-2023", 6625 "target": "survey-autonomous-llm-agents-2023" 6626 }, 6627 { 6628 "source": "building-cooperative-embodied-2023", 6629 "target": "rise-potential-large-2023" 6630 }, 6631 { 6632 "source": "building-understandable-messaging-2024", 6633 "target": "react-synergizing-reasoning-2022" 6634 }, 6635 { 6636 "source": "building-understandable-messaging-2024", 6637 "target": "rise-potential-large-2023" 6638 }, 6639 { 6640 "source": "building-understandable-messaging-2024", 6641 "target": "chain-of-thought-prompting-2022" 6642 }, 6643 { 6644 "source": "bypassing-llm-guardrails-2025", 6645 "target": "automatic-universal-prompt-2024" 6646 }, 6647 { 6648 "source": "bypassing-llm-guardrails-2025", 6649 "target": "llama-3-herd-2024" 6650 }, 6651 { 6652 "source": "bypassing-llm-guardrails-2025", 6653 "target": "judging-llmasajudge-mtbench-2023" 6654 }, 6655 { 6656 "source": "bytesized32refactored-extensible-interactive-2025", 6657 "target": "codex-humaneval-2021" 6658 }, 6659 { 6660 "source": "bytesized32refactored-extensible-interactive-2025", 6661 "target": "swe-bench-2023" 6662 }, 6663 { 6664 "source": "bytesized32refactored-extensible-interactive-2025", 6665 "target": "livecodebench-2024" 6666 }, 6667 { 6668 "source": "bytesized32refactored-extensible-interactive-2025", 6669 "target": "bigcodebench-2024" 6670 }, 6671 { 6672 "source": "bytesized32refactored-extensible-interactive-2025", 6673 "target": "survey-code-gen-llm-agents-2025" 6674 }, 6675 { 6676 "source": "bytesized32refactored-extensible-interactive-2025", 6677 "target": "survey-llm-code-generation-2025" 6678 }, 6679 { 6680 "source": "c3po-optimized-large-2025", 6681 "target": "deepseek-r1-2025" 6682 }, 6683 { 6684 "source": "c3po-optimized-large-2025", 6685 "target": "chain-of-thought-prompting-2022" 6686 }, 6687 { 6688 "source": "c3po-optimized-large-2025", 6689 "target": "selfconsistency-improves-chain-2022" 6690 }, 6691 { 6692 "source": "cacheprune-neuralbased-attribution-2025", 6693 "target": "not-what-youve-2023" 6694 }, 6695 { 6696 "source": "cacheprune-neuralbased-attribution-2025", 6697 "target": "secalign-defending-against-2024" 6698 }, 6699 { 6700 "source": "cacheprune-neuralbased-attribution-2025", 6701 "target": "defending-against-indirect-2024" 6702 }, 6703 { 6704 "source": "cacheprune-neuralbased-attribution-2025", 6705 "target": "fath-authenticationbased-testtime-2024" 6706 }, 6707 { 6708 "source": "cacheprune-neuralbased-attribution-2025", 6709 "target": "task-shield-enforcing-2024" 6710 }, 6711 { 6712 "source": "cacheprune-neuralbased-attribution-2025", 6713 "target": "jatmo-prompt-injection-2023" 6714 }, 6715 { 6716 "source": "cacheprune-neuralbased-attribution-2025", 6717 "target": "automatic-universal-prompt-2024" 6718 }, 6719 { 6720 "source": "calibrating-llm-judges-2025", 6721 "target": "judging-llmasajudge-mtbench-2023" 6722 }, 6723 { 6724 "source": "calibrating-llm-judges-2025", 6725 "target": "selfconsistency-improves-chain-2022" 6726 }, 6727 { 6728 "source": "calibration-large-language-2026", 6729 "target": "selfconsistency-improves-chain-2022" 6730 }, 6731 { 6732 "source": "calibration-large-language-2026", 6733 "target": "compute-optimal-inference-2024" 6734 }, 6735 { 6736 "source": "calibration-large-language-2026", 6737 "target": "codex-humaneval-2021" 6738 }, 6739 { 6740 "source": "camel-communicative-agents-2023", 6741 "target": "codex-humaneval-2021" 6742 }, 6743 { 6744 "source": "camel-communicative-agents-2023", 6745 "target": "your-code-generated-2023" 6746 }, 6747 { 6748 "source": "camel-communicative-agents-2023", 6749 "target": "llama-open-efficient-2023" 6750 }, 6751 { 6752 "source": "camel-communicative-agents-2023", 6753 "target": "self-instruct-aligning-language-2022" 6754 }, 6755 { 6756 "source": "camel-communicative-agents-2023", 6757 "target": "toolformer-language-models-2023" 6758 }, 6759 { 6760 "source": "camel-communicative-agents-2023", 6761 "target": "react-synergizing-reasoning-2022" 6762 }, 6763 { 6764 "source": "camel-communicative-agents-2023", 6765 "target": "reflexion-language-agents-2023" 6766 }, 6767 { 6768 "source": "camel-communicative-agents-2023", 6769 "target": "constitutional-ai-2022" 6770 }, 6771 { 6772 "source": "camel-communicative-agents-2023", 6773 "target": "webarena-autonomous-agents-2023" 6774 }, 6775 { 6776 "source": "can-1b-llm-2025", 6777 "target": "compute-optimal-inference-2024" 6778 }, 6779 { 6780 "source": "can-1b-llm-2025", 6781 "target": "deepseek-r1-2025" 6782 }, 6783 { 6784 "source": "can-1b-llm-2025", 6785 "target": "inference-scaling-laws-2024" 6786 }, 6787 { 6788 "source": "can-1b-llm-2025", 6789 "target": "selfconsistency-improves-chain-2022" 6790 }, 6791 { 6792 "source": "can-chatgpt-support-2024", 6793 "target": "codex-humaneval-2021" 6794 }, 6795 { 6796 "source": "can-chatgpt-support-2024", 6797 "target": "your-code-generated-2023" 6798 }, 6799 { 6800 "source": "can-chatgpt-support-2024", 6801 "target": "can-llm-replace-2023" 6802 }, 6803 { 6804 "source": "can-indirect-prompt-2025", 6805 "target": "not-what-youve-2023" 6806 }, 6807 { 6808 "source": "can-indirect-prompt-2025", 6809 "target": "defending-against-indirect-2024" 6810 }, 6811 { 6812 "source": "can-indirect-prompt-2025", 6813 "target": "automatic-universal-prompt-2024" 6814 }, 6815 { 6816 "source": "can-indirect-prompt-2025", 6817 "target": "formalizing-benchmarking-prompt-2023" 6818 }, 6819 { 6820 "source": "can-indirect-prompt-2025", 6821 "target": "jatmo-prompt-injection-2023" 6822 }, 6823 { 6824 "source": "can-indirect-prompt-2025", 6825 "target": "prompt-injection-llm-apps-2023" 6826 }, 6827 { 6828 "source": "can-large-language-2025", 6829 "target": "sleeper-agents-2024" 6830 }, 6831 { 6832 "source": "can-llm-replace-2023", 6833 "target": "codex-humaneval-2021" 6834 }, 6835 { 6836 "source": "can-llm-replace-2023", 6837 "target": "swe-bench-2023" 6838 }, 6839 { 6840 "source": "can-llm-replace-2023", 6841 "target": "your-code-generated-2023" 6842 }, 6843 { 6844 "source": "can-llm-replace-2023", 6845 "target": "gorilla-large-language-2023" 6846 }, 6847 { 6848 "source": "can-llms-replace-2025", 6849 "target": "judging-llmasajudge-mtbench-2023" 6850 }, 6851 { 6852 "source": "can-llms-replace-2025", 6853 "target": "codex-humaneval-2021" 6854 }, 6855 { 6856 "source": "can-llms-replace-2025", 6857 "target": "deepseek-coder-v2-2024" 6858 }, 6859 { 6860 "source": "can-llms-replace-2025", 6861 "target": "llms-se-systematic-review-2023" 6862 }, 6863 { 6864 "source": "can-llms-replace-2025", 6865 "target": "compute-optimal-inference-2024" 6866 }, 6867 { 6868 "source": "can-reasoning-models-2025", 6869 "target": "alignment-faking-2024" 6870 }, 6871 { 6872 "source": "can-reasoning-models-2025", 6873 "target": "sleeper-agents-2024" 6874 }, 6875 { 6876 "source": "can-vibe-coding-2025", 6877 "target": "metr-rct-2025" 6878 }, 6879 { 6880 "source": "can-vibe-coding-2025", 6881 "target": "codex-humaneval-2021" 6882 }, 6883 { 6884 "source": "can-vibe-coding-2025", 6885 "target": "swe-bench-2023" 6886 }, 6887 { 6888 "source": "can-vibe-coding-2025", 6889 "target": "livecodebench-2024" 6890 }, 6891 { 6892 "source": "can-vibe-coding-2025", 6893 "target": "survey-llm-code-generation-2025" 6894 }, 6895 { 6896 "source": "can-vibe-coding-2025", 6897 "target": "alphacode-competition-level-2022" 6898 }, 6899 { 6900 "source": "can-vibe-coding-2025", 6901 "target": "bigcodebench-2024" 6902 }, 6903 { 6904 "source": "can-vibe-coding-2025", 6905 "target": "swe-bench-pro-2025" 6906 }, 6907 { 6908 "source": "canaries-coal-mine-2025", 6909 "target": "generative-ai-at-2023" 6910 }, 6911 { 6912 "source": "canaries-coal-mine-2025", 6913 "target": "copilot-productivity-controlled-2023" 6914 }, 6915 { 6916 "source": "capability-ceilings-autoregressive-2025", 6917 "target": "scaling-laws-2020" 6918 }, 6919 { 6920 "source": "capability-ceilings-autoregressive-2025", 6921 "target": "chinchilla-compute-optimal-2022" 6922 }, 6923 { 6924 "source": "capability-ceilings-autoregressive-2025", 6925 "target": "mmlu-measuring-massive-2020" 6926 }, 6927 { 6928 "source": "capability-ceilings-autoregressive-2025", 6929 "target": "emergent-abilities-large-2022" 6930 }, 6931 { 6932 "source": "capability-ceilings-autoregressive-2025", 6933 "target": "emergent-abilities-mirage-2023" 6934 }, 6935 { 6936 "source": "capabilityoriented-training-induced-2026", 6937 "target": "alignment-faking-2024" 6938 }, 6939 { 6940 "source": "capabilityoriented-training-induced-2026", 6941 "target": "sleeper-agents-2024" 6942 }, 6943 { 6944 "source": "capabilityoriented-training-induced-2026", 6945 "target": "deepseek-r1-2025" 6946 }, 6947 { 6948 "source": "capabilityoriented-training-induced-2026", 6949 "target": "livecodebench-2024" 6950 }, 6951 { 6952 "source": "capture-contextaware-prompt-2025", 6953 "target": "not-what-youve-2023" 6954 }, 6955 { 6956 "source": "capture-contextaware-prompt-2025", 6957 "target": "formalizing-benchmarking-prompt-2023" 6958 }, 6959 { 6960 "source": "capture-contextaware-prompt-2025", 6961 "target": "injecguard-benchmarking-mitigating-2024" 6962 }, 6963 { 6964 "source": "capture-contextaware-prompt-2025", 6965 "target": "prompt-injection-llm-apps-2023" 6966 }, 6967 { 6968 "source": "capture-contextaware-prompt-2025", 6969 "target": "jatmo-prompt-injection-2023" 6970 }, 6971 { 6972 "source": "carbon-footprint-evaluation-2025", 6973 "target": "codex-humaneval-2021" 6974 }, 6975 { 6976 "source": "carbon-footprint-evaluation-2025", 6977 "target": "learn-code-sustainably-2024" 6978 }, 6979 { 6980 "source": "carbon-footprint-evaluation-2025", 6981 "target": "your-code-generated-2023" 6982 }, 6983 { 6984 "source": "carbon-footprint-evaluation-2025", 6985 "target": "scaffolded-model-capability-2023" 6986 }, 6987 { 6988 "source": "caredio-cultural-alignment-2025", 6989 "target": "unintended-impacts-llm-2024" 6990 }, 6991 { 6992 "source": "case-4bit-precision-2022", 6993 "target": "smoothquant-accurate-efficient-2022" 6994 }, 6995 { 6996 "source": "case-4bit-precision-2022", 6997 "target": "scaling-laws-2020" 6998 }, 6999 { 7000 "source": "case-study-transformative-2025", 7001 "target": "codex-humaneval-2021" 7002 }, 7003 { 7004 "source": "case-study-transformative-2025", 7005 "target": "no-need-lift-2023" 7006 }, 7007 { 7008 "source": "case-study-transformative-2025", 7009 "target": "performance-study-llmgenerated-2024" 7010 }, 7011 { 7012 "source": "case-study-transformative-2025", 7013 "target": "your-code-generated-2023" 7014 }, 7015 { 7016 "source": "cast-enhancing-code-2025", 7017 "target": "swe-bench-2023" 7018 }, 7019 { 7020 "source": "cast-enhancing-code-2025", 7021 "target": "coderagbench-can-retrieval-2024" 7022 }, 7023 { 7024 "source": "cast-enhancing-code-2025", 7025 "target": "agentless-2024" 7026 }, 7027 { 7028 "source": "cast-enhancing-code-2025", 7029 "target": "codex-humaneval-2021" 7030 }, 7031 { 7032 "source": "cast-enhancing-code-2025", 7033 "target": "starcoder2-2024" 7034 }, 7035 { 7036 "source": "cast-enhancing-code-2025", 7037 "target": "code-llama-2023" 7038 }, 7039 { 7040 "source": "cast-enhancing-code-2025", 7041 "target": "codegrag-bridging-gap-2024" 7042 }, 7043 { 7044 "source": "cast-enhancing-code-2025", 7045 "target": "repocoder-repositorylevel-code-2023" 7046 }, 7047 { 7048 "source": "caster-breaking-costperformance-2026", 7049 "target": "scaffolded-model-capability-2023" 7050 }, 7051 { 7052 "source": "caster-breaking-costperformance-2026", 7053 "target": "metagpt-multi-agent-framework-2023" 7054 }, 7055 { 7056 "source": "caster-breaking-costperformance-2026", 7057 "target": "swe-agent-2024" 7058 }, 7059 { 7060 "source": "caster-breaking-costperformance-2026", 7061 "target": "chatdev-communicative-agents-2023" 7062 }, 7063 { 7064 "source": "catarena-evaluating-evolutionary-2025", 7065 "target": "swe-agent-2024" 7066 }, 7067 { 7068 "source": "catarena-evaluating-evolutionary-2025", 7069 "target": "codex-humaneval-2021" 7070 }, 7071 { 7072 "source": "catarena-evaluating-evolutionary-2025", 7073 "target": "reflexion-language-agents-2023" 7074 }, 7075 { 7076 "source": "catarena-evaluating-evolutionary-2025", 7077 "target": "redcode-risky-code-2024" 7078 }, 7079 { 7080 "source": "catarena-evaluating-evolutionary-2025", 7081 "target": "survey-code-gen-llm-agents-2025" 7082 }, 7083 { 7084 "source": "catch-me-if-2025", 7085 "target": "constitutional-ai-2022" 7086 }, 7087 { 7088 "source": "catch-me-if-2025", 7089 "target": "sleeper-agents-2024" 7090 }, 7091 { 7092 "source": "catch-me-if-2025", 7093 "target": "preventing-rogue-agents-2025" 7094 }, 7095 { 7096 "source": "catch-me-if-2025", 7097 "target": "llm-strategic-deception-under-pressure-2023" 7098 }, 7099 { 7100 "source": "catdb-datacatalogguided-llmbased-2025", 7101 "target": "autogen-multi-agent-2023" 7102 }, 7103 { 7104 "source": "causalarmor-efficient-indirect-2026", 7105 "target": "drift-dynamic-rulebased-2025" 7106 }, 7107 { 7108 "source": "causalarmor-efficient-indirect-2026", 7109 "target": "melon-provable-defense-2025" 7110 }, 7111 { 7112 "source": "causalarmor-efficient-indirect-2026", 7113 "target": "promptarmor-simple-yet-2025" 7114 }, 7115 { 7116 "source": "causalarmor-efficient-indirect-2026", 7117 "target": "adaptive-attacks-bypass-defenses-2025" 7118 }, 7119 { 7120 "source": "causalarmor-efficient-indirect-2026", 7121 "target": "indirect-prompt-injections-2025" 7122 }, 7123 { 7124 "source": "causalarmor-efficient-indirect-2026", 7125 "target": "agent-security-bench-2024" 7126 }, 7127 { 7128 "source": "cbfllm-safe-control-2024", 7129 "target": "constitutional-ai-2022" 7130 }, 7131 { 7132 "source": "cbfllm-safe-control-2024", 7133 "target": "llama-3-herd-2024" 7134 }, 7135 { 7136 "source": "cctest-testing-repairing-2022", 7137 "target": "codex-humaneval-2021" 7138 }, 7139 { 7140 "source": "cctest-testing-repairing-2022", 7141 "target": "alphacode-competition-level-2022" 7142 }, 7143 { 7144 "source": "cctest-testing-repairing-2022", 7145 "target": "codebert-pretrained-model-2020" 7146 }, 7147 { 7148 "source": "chain-of-thought-prompting-2022", 7149 "target": "codex-humaneval-2021" 7150 }, 7151 { 7152 "source": "chain-of-thought-prompting-2022", 7153 "target": "selfconsistency-improves-chain-2022" 7154 }, 7155 { 7156 "source": "chain-of-thought-prompting-2022", 7157 "target": "emergent-abilities-large-2022" 7158 }, 7159 { 7160 "source": "chain-of-thought-prompting-2022", 7161 "target": "lamda-language-models-2022" 7162 }, 7163 { 7164 "source": "chainofthought-prompting-obscures-2025", 7165 "target": "selfcheckgpt-zeroresource-blackbox-2023" 7166 }, 7167 { 7168 "source": "chainofthought-prompting-obscures-2025", 7169 "target": "incontext-sharpness-as-2024" 7170 }, 7171 { 7172 "source": "chainofthought-prompting-obscures-2025", 7173 "target": "deepseek-r1-2025" 7174 }, 7175 { 7176 "source": "chainofthought-prompting-obscures-2025", 7177 "target": "unsupervised-realtime-hallucination-2024" 7178 }, 7179 { 7180 "source": "chainpoll-high-efficacy-2023", 7181 "target": "selfcheckgpt-zeroresource-blackbox-2023" 7182 }, 7183 { 7184 "source": "chainpoll-high-efficacy-2023", 7185 "target": "gpt4-technical-report-2023" 7186 }, 7187 { 7188 "source": "challenge-optimization-context-2025", 7189 "target": "codefill-multitoken-code-2022" 7190 }, 7191 { 7192 "source": "challenge-optimization-context-2025", 7193 "target": "repocoder-repositorylevel-code-2023" 7194 }, 7195 { 7196 "source": "challenge-optimization-context-2025", 7197 "target": "code-llama-2023" 7198 }, 7199 { 7200 "source": "challenge-optimization-context-2025", 7201 "target": "repoformer-selective-retrieval-2024" 7202 }, 7203 { 7204 "source": "challenge-optimization-context-2025", 7205 "target": "rlcoder-reinforcement-learning-2024" 7206 }, 7207 { 7208 "source": "challenge-optimization-context-2025", 7209 "target": "qwen25coder-technical-report-2024" 7210 }, 7211 { 7212 "source": "challenges-humanagent-communication-2024", 7213 "target": "autogen-multi-agent-2023" 7214 }, 7215 { 7216 "source": "challenges-humanagent-communication-2024", 7217 "target": "magenticone-generalist-multiagent-2024" 7218 }, 7219 { 7220 "source": "challenges-humanagent-communication-2024", 7221 "target": "openhands-ai-sw-agent-2024" 7222 }, 7223 { 7224 "source": "challenges-paths-ai-2025", 7225 "target": "swe-bench-2023" 7226 }, 7227 { 7228 "source": "challenges-paths-ai-2025", 7229 "target": "swe-agent-2024" 7230 }, 7231 { 7232 "source": "challenges-paths-ai-2025", 7233 "target": "deepseek-r1-2025" 7234 }, 7235 { 7236 "source": "challenges-paths-ai-2025", 7237 "target": "copilot-arena-platform-2025" 7238 }, 7239 { 7240 "source": "challenges-paths-ai-2025", 7241 "target": "livecodebench-2024" 7242 }, 7243 { 7244 "source": "challenges-paths-ai-2025", 7245 "target": "codex-humaneval-2021" 7246 }, 7247 { 7248 "source": "challenges-paths-ai-2025", 7249 "target": "agentless-2024" 7250 }, 7251 { 7252 "source": "challenges-paths-ai-2025", 7253 "target": "openhands-ai-sw-agent-2024" 7254 }, 7255 { 7256 "source": "changes-coding-behavior-2026", 7257 "target": "metr-rct-2025" 7258 }, 7259 { 7260 "source": "changes-coding-behavior-2026", 7261 "target": "empirical-study-usage-2024" 7262 }, 7263 { 7264 "source": "characterizing-llm-inference-2025", 7265 "target": "scaffolded-model-capability-2023" 7266 }, 7267 { 7268 "source": "characterizing-llm-inference-2025", 7269 "target": "sloaware-gpu-dvfs-2024" 7270 }, 7271 { 7272 "source": "chasing-progress-not-2024", 7273 "target": "chain-of-thought-prompting-2022" 7274 }, 7275 { 7276 "source": "chat-bankmanfried-exploration-2024", 7277 "target": "llm-strategic-deception-under-pressure-2023" 7278 }, 7279 { 7280 "source": "chatassert-llmbased-test-2025", 7281 "target": "codamosa-escaping-coverage-2023" 7282 }, 7283 { 7284 "source": "chatassert-llmbased-test-2025", 7285 "target": "automated-program-repair-2023" 7286 }, 7287 { 7288 "source": "chatassert-llmbased-test-2025", 7289 "target": "codex-humaneval-2021" 7290 }, 7291 { 7292 "source": "chatassert-llmbased-test-2025", 7293 "target": "chain-of-thought-prompting-2022" 7294 }, 7295 { 7296 "source": "chatassert-llmbased-test-2025", 7297 "target": "less-training-more-2022" 7298 }, 7299 { 7300 "source": "chatbot-arena-open-2024", 7301 "target": "judging-llmasajudge-mtbench-2023" 7302 }, 7303 { 7304 "source": "chatbot-arena-open-2024", 7305 "target": "codex-humaneval-2021" 7306 }, 7307 { 7308 "source": "chatbot-arena-open-2024", 7309 "target": "rethinking-benchmark-contamination-2023" 7310 }, 7311 { 7312 "source": "chatbot-arena-open-2024", 7313 "target": "gpt4-technical-report-2023" 7314 }, 7315 { 7316 "source": "chatdev-communicative-agents-2023", 7317 "target": "metagpt-multi-agent-framework-2023" 7318 }, 7319 { 7320 "source": "chatdev-communicative-agents-2023", 7321 "target": "generative-agents-interactive-2023" 7322 }, 7323 { 7324 "source": "chatdev-communicative-agents-2023", 7325 "target": "voyager-open-ended-2023" 7326 }, 7327 { 7328 "source": "chatdev-communicative-agents-2023", 7329 "target": "codex-humaneval-2021" 7330 }, 7331 { 7332 "source": "chatdev-communicative-agents-2023", 7333 "target": "toolllm-facilitating-large-2023" 7334 }, 7335 { 7336 "source": "chatdev-communicative-agents-2023", 7337 "target": "chateval-better-llmbased-2023" 7338 }, 7339 { 7340 "source": "chatdev-communicative-agents-2023", 7341 "target": "webarena-autonomous-agents-2023" 7342 }, 7343 { 7344 "source": "chateval-better-llmbased-2023", 7345 "target": "judging-llmasajudge-mtbench-2023" 7346 }, 7347 { 7348 "source": "chateval-better-llmbased-2023", 7349 "target": "improving-factuality-reasoning-2023" 7350 }, 7351 { 7352 "source": "chateval-better-llmbased-2023", 7353 "target": "camel-communicative-agents-2023" 7354 }, 7355 { 7356 "source": "chateval-better-llmbased-2023", 7357 "target": "chatdev-communicative-agents-2023" 7358 }, 7359 { 7360 "source": "chateval-better-llmbased-2023", 7361 "target": "generative-agents-interactive-2023" 7362 }, 7363 { 7364 "source": "chatgpt-agent-system-2025", 7365 "target": "building-early-warning-2024" 7366 }, 7367 { 7368 "source": "chatgpt-agent-system-2025", 7369 "target": "paperbench-evaluating-ais-2025" 7370 }, 7371 { 7372 "source": "chatgpt-not-all-2023", 7373 "target": "codex-humaneval-2021" 7374 }, 7375 { 7376 "source": "chatgpt-not-all-2023", 7377 "target": "alphacode-competition-level-2022" 7378 }, 7379 { 7380 "source": "chatgpt-not-all-2023", 7381 "target": "lamda-language-models-2022" 7382 }, 7383 { 7384 "source": "chatinject-abusing-chat-2025", 7385 "target": "adaptive-attacks-break-2025" 7386 }, 7387 { 7388 "source": "chatinject-abusing-chat-2025", 7389 "target": "not-what-youve-2023" 7390 }, 7391 { 7392 "source": "chatinject-abusing-chat-2025", 7393 "target": "secalign-defending-against-2024" 7394 }, 7395 { 7396 "source": "chatinject-abusing-chat-2025", 7397 "target": "agent-security-bench-2024" 7398 }, 7399 { 7400 "source": "chatinject-abusing-chat-2025", 7401 "target": "react-synergizing-reasoning-2022" 7402 }, 7403 { 7404 "source": "chatinject-abusing-chat-2025", 7405 "target": "defending-against-indirect-2024" 7406 }, 7407 { 7408 "source": "chatofthought-collaborative-multiagent-2025", 7409 "target": "gpt4-technical-report-2023" 7410 }, 7411 { 7412 "source": "chatofthought-collaborative-multiagent-2025", 7413 "target": "generative-agents-interactive-2023" 7414 }, 7415 { 7416 "source": "chatofthought-collaborative-multiagent-2025", 7417 "target": "autogen-multi-agent-2023" 7418 }, 7419 { 7420 "source": "chatofthought-collaborative-multiagent-2025", 7421 "target": "llama-open-efficient-2023" 7422 }, 7423 { 7424 "source": "chatunitest-framework-llmbased-2023", 7425 "target": "codex-humaneval-2021" 7426 }, 7427 { 7428 "source": "chatunitest-framework-llmbased-2023", 7429 "target": "adaptive-test-generation-2023" 7430 }, 7431 { 7432 "source": "chatunitest-framework-llmbased-2023", 7433 "target": "no-more-manual-2023" 7434 }, 7435 { 7436 "source": "chatunitest-framework-llmbased-2023", 7437 "target": "code-llama-2023" 7438 }, 7439 { 7440 "source": "chatunitest-framework-llmbased-2023", 7441 "target": "coverup-effective-high-2024" 7442 }, 7443 { 7444 "source": "chatunitest-framework-llmbased-2023", 7445 "target": "codeaware-prompting-study-2024" 7446 }, 7447 { 7448 "source": "chatunitest-framework-llmbased-2023", 7449 "target": "lost-middle-how-2023" 7450 }, 7451 { 7452 "source": "check-your-facts-2023", 7453 "target": "toolformer-language-models-2023" 7454 }, 7455 { 7456 "source": "checkpointgcg-auditing-attacking-2025", 7457 "target": "secalign-defending-against-2024" 7458 }, 7459 { 7460 "source": "checkpointgcg-auditing-attacking-2025", 7461 "target": "not-what-youve-2023" 7462 }, 7463 { 7464 "source": "chinchilla-compute-optimal-2022", 7465 "target": "scaling-laws-2020" 7466 }, 7467 { 7468 "source": "chinchilla-compute-optimal-2022", 7469 "target": "mmlu-measuring-massive-2020" 7470 }, 7471 { 7472 "source": "chinchilla-compute-optimal-2022", 7473 "target": "unified-scaling-laws-2022" 7474 }, 7475 { 7476 "source": "chipbench-nextstep-benchmark-2026", 7477 "target": "verilogeval-evaluating-large-2023" 7478 }, 7479 { 7480 "source": "chipbench-nextstep-benchmark-2026", 7481 "target": "deepseek-r1-2025" 7482 }, 7483 { 7484 "source": "chipbench-nextstep-benchmark-2026", 7485 "target": "survey-code-gen-llm-agents-2025" 7486 }, 7487 { 7488 "source": "chopchop-programmable-framework-2025", 7489 "target": "monitorguided-decoding-code-2023" 7490 }, 7491 { 7492 "source": "chopchop-programmable-framework-2025", 7493 "target": "syncode-llm-generation-2024" 7494 }, 7495 { 7496 "source": "chopchop-programmable-framework-2025", 7497 "target": "efficient-guided-generation-2023" 7498 }, 7499 { 7500 "source": "chorus-zeroshot-hierarchical-2025", 7501 "target": "llama-3-herd-2024" 7502 }, 7503 { 7504 "source": "chorus-zeroshot-hierarchical-2025", 7505 "target": "phi4-technical-report-2024" 7506 }, 7507 { 7508 "source": "chorus-zeroshot-hierarchical-2025", 7509 "target": "deepseek-r1-2025" 7510 }, 7511 { 7512 "source": "chorus-zeroshot-hierarchical-2025", 7513 "target": "qwen25coder-technical-report-2024" 7514 }, 7515 { 7516 "source": "chorus-zeroshot-hierarchical-2025", 7517 "target": "gpt4-technical-report-2023" 7518 }, 7519 { 7520 "source": "chorus-zeroshot-hierarchical-2025", 7521 "target": "chain-of-thought-prompting-2022" 7522 }, 7523 { 7524 "source": "ciata-risk-assessment-2025", 7525 "target": "not-what-youve-2023" 7526 }, 7527 { 7528 "source": "ciata-risk-assessment-2025", 7529 "target": "prompt-injection-llm-apps-2023" 7530 }, 7531 { 7532 "source": "ciata-risk-assessment-2025", 7533 "target": "automatic-universal-prompt-2024" 7534 }, 7535 { 7536 "source": "ciata-risk-assessment-2025", 7537 "target": "sleeper-agents-2024" 7538 }, 7539 { 7540 "source": "ciata-risk-assessment-2025", 7541 "target": "constitutional-ai-2022" 7542 }, 7543 { 7544 "source": "cigar-costefficient-program-2024", 7545 "target": "automated-program-repair-2023" 7546 }, 7547 { 7548 "source": "cigar-costefficient-program-2024", 7549 "target": "impact-code-language-2023" 7550 }, 7551 { 7552 "source": "cigar-costefficient-program-2024", 7553 "target": "scaffolded-model-capability-2023" 7554 }, 7555 { 7556 "source": "cigar-costefficient-program-2024", 7557 "target": "codex-humaneval-2021" 7558 }, 7559 { 7560 "source": "cigar-costefficient-program-2024", 7561 "target": "automated-program-repair-2022" 7562 }, 7563 { 7564 "source": "cigar-costefficient-program-2024", 7565 "target": "critical-review-large-2023" 7566 }, 7567 { 7568 "source": "citationenhanced-generation-llmbased-2024", 7569 "target": "selfcheckgpt-zeroresource-blackbox-2023" 7570 }, 7571 { 7572 "source": "citationenhanced-generation-llmbased-2024", 7573 "target": "sirens-song-ai-2023" 7574 }, 7575 { 7576 "source": "citationenhanced-generation-llmbased-2024", 7577 "target": "judging-llmasajudge-mtbench-2023" 7578 }, 7579 { 7580 "source": "citationgrounded-code-comprehension-2025", 7581 "target": "codex-humaneval-2021" 7582 }, 7583 { 7584 "source": "citationgrounded-code-comprehension-2025", 7585 "target": "codebert-pretrained-model-2020" 7586 }, 7587 { 7588 "source": "citationgrounded-code-comprehension-2025", 7589 "target": "code-llama-2023" 7590 }, 7591 { 7592 "source": "citywalk-enhancing-llmbased-2025", 7593 "target": "chatunitest-framework-llmbased-2023" 7594 }, 7595 { 7596 "source": "citywalk-enhancing-llmbased-2025", 7597 "target": "hits-highcoverage-llmbased-2024" 7598 }, 7599 { 7600 "source": "citywalk-enhancing-llmbased-2025", 7601 "target": "codeaware-prompting-study-2024" 7602 }, 7603 { 7604 "source": "citywalk-enhancing-llmbased-2025", 7605 "target": "llm-unit-test-generation-empirical-2024" 7606 }, 7607 { 7608 "source": "citywalk-enhancing-llmbased-2025", 7609 "target": "automated-unit-test-2024" 7610 }, 7611 { 7612 "source": "citywalk-enhancing-llmbased-2025", 7613 "target": "deepseek-v3-2024" 7614 }, 7615 { 7616 "source": "citywalk-enhancing-llmbased-2025", 7617 "target": "llm-hallucinations-code-practical-2024" 7618 }, 7619 { 7620 "source": "ckgfuzzer-llmbased-fuzz-2025", 7621 "target": "llms-se-systematic-review-2023" 7622 }, 7623 { 7624 "source": "ckgfuzzer-llmbased-fuzz-2025", 7625 "target": "llm-fuzzing-challenges-2024" 7626 }, 7627 { 7628 "source": "clarifygpt-empowering-llmbased-2023", 7629 "target": "codex-humaneval-2021" 7630 }, 7631 { 7632 "source": "clarifygpt-empowering-llmbased-2023", 7633 "target": "chain-of-thought-prompting-2022" 7634 }, 7635 { 7636 "source": "clarifygpt-empowering-llmbased-2023", 7637 "target": "your-code-generated-2023" 7638 }, 7639 { 7640 "source": "clarifygpt-empowering-llmbased-2023", 7641 "target": "codamosa-escaping-coverage-2023" 7642 }, 7643 { 7644 "source": "clarifygpt-empowering-llmbased-2023", 7645 "target": "gpt4-technical-report-2023" 7646 }, 7647 { 7648 "source": "clarifygpt-empowering-llmbased-2023", 7649 "target": "interactive-code-generation-2022" 7650 }, 7651 { 7652 "source": "clarifygpt-empowering-llmbased-2023", 7653 "target": "alphacode-competition-level-2022" 7654 }, 7655 { 7656 "source": "clarifygpt-framework-enhancing-2024", 7657 "target": "codex-humaneval-2021" 7658 }, 7659 { 7660 "source": "clarifygpt-framework-enhancing-2024", 7661 "target": "your-code-generated-2023" 7662 }, 7663 { 7664 "source": "clarifygpt-framework-enhancing-2024", 7665 "target": "codamosa-escaping-coverage-2023" 7666 }, 7667 { 7668 "source": "clarifygpt-framework-enhancing-2024", 7669 "target": "interactive-code-generation-2022" 7670 }, 7671 { 7672 "source": "clarifygpt-framework-enhancing-2024", 7673 "target": "chain-of-thought-prompting-2022" 7674 }, 7675 { 7676 "source": "classeval-manuallycrafted-benchmark-2023", 7677 "target": "codex-humaneval-2021" 7678 }, 7679 { 7680 "source": "classeval-manuallycrafted-benchmark-2023", 7681 "target": "your-code-generated-2023" 7682 }, 7683 { 7684 "source": "classeval-manuallycrafted-benchmark-2023", 7685 "target": "wizardcoder-empowering-code-2023" 7686 }, 7687 { 7688 "source": "classeval-manuallycrafted-benchmark-2023", 7689 "target": "starcoder-2023" 7690 }, 7691 { 7692 "source": "classeval-manuallycrafted-benchmark-2023", 7693 "target": "lost-middle-how-2023" 7694 }, 7695 { 7696 "source": "classeval-manuallycrafted-benchmark-2023", 7697 "target": "gpt4-technical-report-2023" 7698 }, 7699 { 7700 "source": "classeval-manuallycrafted-benchmark-2023", 7701 "target": "alphacode-competition-level-2022" 7702 }, 7703 { 7704 "source": "classifying-addressing-diversity-2025", 7705 "target": "ares-automated-evaluation-2023" 7706 }, 7707 { 7708 "source": "classifying-addressing-diversity-2025", 7709 "target": "judging-llmasajudge-mtbench-2023" 7710 }, 7711 { 7712 "source": "classifying-addressing-diversity-2025", 7713 "target": "survey-hallucination-large-2023-2" 7714 }, 7715 { 7716 "source": "classifying-addressing-diversity-2025", 7717 "target": "selfconsistency-improves-chain-2022" 7718 }, 7719 { 7720 "source": "classit-conversational-lecturealigned-2025", 7721 "target": "emergent-abilities-large-2022" 7722 }, 7723 { 7724 "source": "classit-conversational-lecturealigned-2025", 7725 "target": "emergent-abilities-mirage-2023" 7726 }, 7727 { 7728 "source": "classit-conversational-lecturealigned-2025", 7729 "target": "llama-3-herd-2024" 7730 }, 7731 { 7732 "source": "claude-sonnet-45-2025", 7733 "target": "alignment-faking-2024" 7734 }, 7735 { 7736 "source": "cloud-platforms-developing-2024", 7737 "target": "gpt4-technical-report-2023" 7738 }, 7739 { 7740 "source": "cloud-platforms-developing-2024", 7741 "target": "llama-3-herd-2024" 7742 }, 7743 { 7744 "source": "cloudevalyaml-practical-benchmark-2023", 7745 "target": "codex-humaneval-2021" 7746 }, 7747 { 7748 "source": "cloudevalyaml-practical-benchmark-2023", 7749 "target": "code-llama-2023" 7750 }, 7751 { 7752 "source": "cloudevalyaml-practical-benchmark-2023", 7753 "target": "wizardcoder-empowering-code-2023" 7754 }, 7755 { 7756 "source": "cloudfix-automated-policy-2025", 7757 "target": "llms-se-systematic-review-2023" 7758 }, 7759 { 7760 "source": "cloudfix-automated-policy-2025", 7761 "target": "systematic-literature-review-2024" 7762 }, 7763 { 7764 "source": "cloudfix-automated-policy-2025", 7765 "target": "hybrid-automated-program-2024" 7766 }, 7767 { 7768 "source": "cloudfix-automated-policy-2025", 7769 "target": "deep-dive-into-2024-2" 7770 }, 7771 { 7772 "source": "cloudfix-automated-policy-2025", 7773 "target": "automated-program-repair-2022" 7774 }, 7775 { 7776 "source": "cloudfix-automated-policy-2025", 7777 "target": "exploring-generalizable-automated-2025" 7778 }, 7779 { 7780 "source": "cloudfix-automated-policy-2025", 7781 "target": "survey-hallucination-large-2023-2" 7782 }, 7783 { 7784 "source": "cmoe-converting-mixtureofexperts-2025", 7785 "target": "deepseek-v3-2024" 7786 }, 7787 { 7788 "source": "cocomic-code-completion-2022", 7789 "target": "codex-humaneval-2021" 7790 }, 7791 { 7792 "source": "cocomic-code-completion-2022", 7793 "target": "repocoder-repositorylevel-code-2023" 7794 }, 7795 { 7796 "source": "cocomic-code-completion-2022", 7797 "target": "alphacode-competition-level-2022" 7798 }, 7799 { 7800 "source": "codamosa-escaping-coverage-2023", 7801 "target": "codex-humaneval-2021" 7802 }, 7803 { 7804 "source": "codamosa-escaping-coverage-2023", 7805 "target": "productivity-assessment-neural-2022" 7806 }, 7807 { 7808 "source": "codamosa-escaping-coverage-2023", 7809 "target": "alphacode-competition-level-2022" 7810 }, 7811 { 7812 "source": "code-aesthetics-agentic-2025", 7813 "target": "drawing-pandas-benchmark-2024" 7814 }, 7815 { 7816 "source": "code-aesthetics-agentic-2025", 7817 "target": "judging-llmasajudge-mtbench-2023" 7818 }, 7819 { 7820 "source": "code-hallucination-2024", 7821 "target": "codex-humaneval-2021" 7822 }, 7823 { 7824 "source": "code-hallucination-2024", 7825 "target": "beyond-functional-correctness-2024" 7826 }, 7827 { 7828 "source": "code-hallucination-2024", 7829 "target": "swe-bench-2023" 7830 }, 7831 { 7832 "source": "code-hallucination-2024", 7833 "target": "your-code-generated-2023" 7834 }, 7835 { 7836 "source": "code-hallucination-2024", 7837 "target": "llms-se-systematic-review-2023" 7838 }, 7839 { 7840 "source": "code-hallucination-2024", 7841 "target": "hallucination-inevitable-innate-2024" 7842 }, 7843 { 7844 "source": "code-hallucinations-slr-2025", 7845 "target": "hallucination-by-code-2025" 7846 }, 7847 { 7848 "source": "code-hallucinations-slr-2025", 7849 "target": "beyond-functional-correctness-2024" 7850 }, 7851 { 7852 "source": "code-hallucinations-slr-2025", 7853 "target": "llm-hallucinations-code-practical-2024" 7854 }, 7855 { 7856 "source": "code-hallucinations-slr-2025", 7857 "target": "codemirage-hallucinations-code-2024" 7858 }, 7859 { 7860 "source": "code-hallucinations-slr-2025", 7861 "target": "survey-hallucination-large-2023-2" 7862 }, 7863 { 7864 "source": "code-hallucinations-slr-2025", 7865 "target": "dehallucinator-mitigating-llm-2024" 7866 }, 7867 { 7868 "source": "code-hallucinations-slr-2025", 7869 "target": "mitigating-api-hallucination-2025" 7870 }, 7871 { 7872 "source": "code-hallucinations-slr-2025", 7873 "target": "gorilla-large-language-2023" 7874 }, 7875 { 7876 "source": "code-hallucinations-slr-2025", 7877 "target": "bugs-large-language-2024" 7878 }, 7879 { 7880 "source": "code-hallucinations-slr-2025", 7881 "target": "identifying-mitigating-api-2025" 7882 }, 7883 { 7884 "source": "code-less-align-2024", 7885 "target": "magicoder-source-code-2023" 7886 }, 7887 { 7888 "source": "code-less-align-2024", 7889 "target": "wizardcoder-empowering-code-2023" 7890 }, 7891 { 7892 "source": "code-less-align-2024", 7893 "target": "codex-humaneval-2021" 7894 }, 7895 { 7896 "source": "code-less-align-2024", 7897 "target": "your-code-generated-2023" 7898 }, 7899 { 7900 "source": "code-less-align-2024", 7901 "target": "code-llama-2023" 7902 }, 7903 { 7904 "source": "code-less-align-2024", 7905 "target": "gpt4-technical-report-2023" 7906 }, 7907 { 7908 "source": "code-llama-2023", 7909 "target": "codex-humaneval-2021" 7910 }, 7911 { 7912 "source": "code-llama-2023", 7913 "target": "starcoder-2023" 7914 }, 7915 { 7916 "source": "code-llama-2023", 7917 "target": "alphacode-competition-level-2022" 7918 }, 7919 { 7920 "source": "code-llama-2023", 7921 "target": "gpt4-technical-report-2023" 7922 }, 7923 { 7924 "source": "code-llama-2023", 7925 "target": "multiple-scalable-polyglot-2023" 7926 }, 7927 { 7928 "source": "code-me-me-2025", 7929 "target": "openhands-ai-sw-agent-2024" 7930 }, 7931 { 7932 "source": "code-me-me-2025", 7933 "target": "swe-bench-2023" 7934 }, 7935 { 7936 "source": "code-me-me-2025", 7937 "target": "copilot-productivity-controlled-2023" 7938 }, 7939 { 7940 "source": "code-me-me-2025", 7941 "target": "gaia-benchmark-general-2023" 7942 }, 7943 { 7944 "source": "code-me-me-2025", 7945 "target": "metr-rct-2025" 7946 }, 7947 { 7948 "source": "code-me-me-2025", 7949 "target": "rise-ai-teammates-2025" 7950 }, 7951 { 7952 "source": "code-me-me-2025", 7953 "target": "ambigswe-interactive-agents-2025" 7954 }, 7955 { 7956 "source": "code-review-automation-2025", 7957 "target": "codebert-pretrained-model-2020" 7958 }, 7959 { 7960 "source": "code-review-survey-pre-post-llm-2026", 7961 "target": "codereviewqa-code-review-2025" 7962 }, 7963 { 7964 "source": "code-review-survey-pre-post-llm-2026", 7965 "target": "bitsaicr-automated-code-2025" 7966 }, 7967 { 7968 "source": "code-review-survey-pre-post-llm-2026", 7969 "target": "aiassisted-fixes-code-2025" 7970 }, 7971 { 7972 "source": "code-review-survey-pre-post-llm-2026", 7973 "target": "llm-code-review-benchmarking-2025" 7974 }, 7975 { 7976 "source": "codearena-collective-evaluation-2025", 7977 "target": "codex-humaneval-2021" 7978 }, 7979 { 7980 "source": "codearena-collective-evaluation-2025", 7981 "target": "livecodebench-2024" 7982 }, 7983 { 7984 "source": "codearena-collective-evaluation-2025", 7985 "target": "bigcodebench-2024" 7986 }, 7987 { 7988 "source": "codearena-collective-evaluation-2025", 7989 "target": "mercury-efficiency-benchmark-2024" 7990 }, 7991 { 7992 "source": "codearena-collective-evaluation-2025", 7993 "target": "deepseek-coder-v2-2024" 7994 }, 7995 { 7996 "source": "codearena-collective-evaluation-2025", 7997 "target": "your-code-generated-2023" 7998 }, 7999 { 8000 "source": "codearena-collective-evaluation-2025", 8001 "target": "starcoder2-2024" 8002 }, 8003 { 8004 "source": "codearena-collective-evaluation-2025", 8005 "target": "code-llama-2023" 8006 }, 8007 { 8008 "source": "codeaware-prompting-study-2024", 8009 "target": "codamosa-escaping-coverage-2023" 8010 }, 8011 { 8012 "source": "codeaware-prompting-study-2024", 8013 "target": "codex-humaneval-2021" 8014 }, 8015 { 8016 "source": "codeaware-prompting-study-2024", 8017 "target": "no-more-manual-2023" 8018 }, 8019 { 8020 "source": "codeaware-prompting-study-2024", 8021 "target": "adaptive-test-generation-2023" 8022 }, 8023 { 8024 "source": "codeaware-prompting-study-2024", 8025 "target": "chatunitest-framework-llmbased-2023" 8026 }, 8027 { 8028 "source": "codeaware-prompting-study-2024", 8029 "target": "chain-of-thought-prompting-2022" 8030 }, 8031 { 8032 "source": "codeaware-prompting-study-2024", 8033 "target": "tree-thoughts-deliberate-2023" 8034 }, 8035 { 8036 "source": "codebenchgen-creating-scalable-2024", 8037 "target": "codex-humaneval-2021" 8038 }, 8039 { 8040 "source": "codebenchgen-creating-scalable-2024", 8041 "target": "swe-bench-2023" 8042 }, 8043 { 8044 "source": "codebenchgen-creating-scalable-2024", 8045 "target": "your-code-generated-2023" 8046 }, 8047 { 8048 "source": "codebenchgen-creating-scalable-2024", 8049 "target": "repocoder-repositorylevel-code-2023" 8050 }, 8051 { 8052 "source": "codebenchgen-creating-scalable-2024", 8053 "target": "code-llama-2023" 8054 }, 8055 { 8056 "source": "codebert-pretrained-model-2020", 8057 "target": "bert-pretraining-deep-2018" 8058 }, 8059 { 8060 "source": "codecontests-highquality-test-2025", 8061 "target": "codex-humaneval-2021" 8062 }, 8063 { 8064 "source": "codecontests-highquality-test-2025", 8065 "target": "competitive-programming-reasoning-models-2025" 8066 }, 8067 { 8068 "source": "codecontests-highquality-test-2025", 8069 "target": "deepseek-r1-2025" 8070 }, 8071 { 8072 "source": "codecontests-highquality-test-2025", 8073 "target": "alphacode-competition-level-2022" 8074 }, 8075 { 8076 "source": "codecontests-highquality-test-2025", 8077 "target": "your-code-generated-2023" 8078 }, 8079 { 8080 "source": "codecontests-highquality-test-2025", 8081 "target": "livecodebench-2024" 8082 }, 8083 { 8084 "source": "codecontests-highquality-test-2025", 8085 "target": "dapo-opensource-llm-2025" 8086 }, 8087 { 8088 "source": "codecontests-highquality-test-2025", 8089 "target": "qwen25-technical-report-2024" 8090 }, 8091 { 8092 "source": "codecor-llmbased-selfreflective-2025", 8093 "target": "mapcoder-multiagent-code-2024" 8094 }, 8095 { 8096 "source": "codecor-llmbased-selfreflective-2025", 8097 "target": "metagpt-multi-agent-framework-2023" 8098 }, 8099 { 8100 "source": "codecor-llmbased-selfreflective-2025", 8101 "target": "chatdev-communicative-agents-2023" 8102 }, 8103 { 8104 "source": "codecor-llmbased-selfreflective-2025", 8105 "target": "reflexion-language-agents-2023" 8106 }, 8107 { 8108 "source": "codecor-llmbased-selfreflective-2025", 8109 "target": "codex-humaneval-2021" 8110 }, 8111 { 8112 "source": "codecor-llmbased-selfreflective-2025", 8113 "target": "code-llama-2023" 8114 }, 8115 { 8116 "source": "codecriticbench-holistic-code-2025", 8117 "target": "codex-humaneval-2021" 8118 }, 8119 { 8120 "source": "codecriticbench-holistic-code-2025", 8121 "target": "livecodebench-2024" 8122 }, 8123 { 8124 "source": "codecriticbench-holistic-code-2025", 8125 "target": "llm-critics-help-2024" 8126 }, 8127 { 8128 "source": "codecriticbench-holistic-code-2025", 8129 "target": "qwen25coder-technical-report-2024" 8130 }, 8131 { 8132 "source": "codecriticbench-holistic-code-2025", 8133 "target": "deepseek-coder-2024" 8134 }, 8135 { 8136 "source": "codediting-reasoningbased-metric-2025", 8137 "target": "codex-humaneval-2021" 8138 }, 8139 { 8140 "source": "codediting-reasoningbased-metric-2025", 8141 "target": "deepseek-r1-2025" 8142 }, 8143 { 8144 "source": "codediting-reasoningbased-metric-2025", 8145 "target": "your-code-generated-2023" 8146 }, 8147 { 8148 "source": "codediting-reasoningbased-metric-2025", 8149 "target": "bigcodebench-2024" 8150 }, 8151 { 8152 "source": "codediting-reasoningbased-metric-2025", 8153 "target": "codejudge-evaluating-code-2024" 8154 }, 8155 { 8156 "source": "codediting-reasoningbased-metric-2025", 8157 "target": "judging-llmasajudge-mtbench-2023" 8158 }, 8159 { 8160 "source": "codediting-reasoningbased-metric-2025", 8161 "target": "chain-of-thought-prompting-2022" 8162 }, 8163 { 8164 "source": "codediting-reasoningbased-metric-2025", 8165 "target": "llms-se-systematic-review-2023" 8166 }, 8167 { 8168 "source": "codediting-reasoningbased-metric-2025", 8169 "target": "from-code-courtroom-2025" 8170 }, 8171 { 8172 "source": "codeelo-benchmarking-competitionlevel-2025", 8173 "target": "livecodebench-2024" 8174 }, 8175 { 8176 "source": "codeelo-benchmarking-competitionlevel-2025", 8177 "target": "alphacode-competition-level-2022" 8178 }, 8179 { 8180 "source": "codeelo-benchmarking-competitionlevel-2025", 8181 "target": "codex-humaneval-2021" 8182 }, 8183 { 8184 "source": "codeelo-benchmarking-competitionlevel-2025", 8185 "target": "bigcodebench-2024" 8186 }, 8187 { 8188 "source": "codeelo-benchmarking-competitionlevel-2025", 8189 "target": "qwen25coder-technical-report-2024" 8190 }, 8191 { 8192 "source": "codeelo-benchmarking-competitionlevel-2025", 8193 "target": "deepseek-coder-2024" 8194 }, 8195 { 8196 "source": "codeelo-benchmarking-competitionlevel-2025", 8197 "target": "deepseek-coder-v2-2024" 8198 }, 8199 { 8200 "source": "codeelo-benchmarking-competitionlevel-2025", 8201 "target": "llama-3-herd-2024" 8202 }, 8203 { 8204 "source": "codegrag-bridging-gap-2024", 8205 "target": "gpt4-technical-report-2023" 8206 }, 8207 { 8208 "source": "codegrag-bridging-gap-2024", 8209 "target": "code-llama-2023" 8210 }, 8211 { 8212 "source": "codegrag-bridging-gap-2024", 8213 "target": "reacc-retrievalaugmented-code-2022" 8214 }, 8215 { 8216 "source": "codegrag-bridging-gap-2024", 8217 "target": "evor-evolving-retrieval-2024" 8218 }, 8219 { 8220 "source": "codegrag-bridging-gap-2024", 8221 "target": "codebert-pretrained-model-2020" 8222 }, 8223 { 8224 "source": "codeinsight-curated-dataset-2024", 8225 "target": "codex-humaneval-2021" 8226 }, 8227 { 8228 "source": "codeinsight-curated-dataset-2024", 8229 "target": "code-llama-2023" 8230 }, 8231 { 8232 "source": "codeinsight-curated-dataset-2024", 8233 "target": "starcoder-2023" 8234 }, 8235 { 8236 "source": "codejudge-evaluating-code-2024", 8237 "target": "codex-humaneval-2021" 8238 }, 8239 { 8240 "source": "codejudge-evaluating-code-2024", 8241 "target": "your-code-generated-2023" 8242 }, 8243 { 8244 "source": "codejudge-evaluating-code-2024", 8245 "target": "judging-llmasajudge-mtbench-2023" 8246 }, 8247 { 8248 "source": "codejudge-evaluating-code-2024", 8249 "target": "bigcodebench-2024" 8250 }, 8251 { 8252 "source": "codejudge-evaluating-code-2024", 8253 "target": "chateval-better-llmbased-2023" 8254 }, 8255 { 8256 "source": "codejudge-evaluating-code-2024", 8257 "target": "chain-of-thought-prompting-2022" 8258 }, 8259 { 8260 "source": "codejudgebench-benchmarking-llmasajudge-2025", 8261 "target": "judging-llmasajudge-mtbench-2023" 8262 }, 8263 { 8264 "source": "codejudgebench-benchmarking-llmasajudge-2025", 8265 "target": "livecodebench-2024" 8266 }, 8267 { 8268 "source": "codejudgebench-benchmarking-llmasajudge-2025", 8269 "target": "from-code-courtroom-2025" 8270 }, 8271 { 8272 "source": "codemark-imperceptible-watermarking-2023", 8273 "target": "starcoder-2023" 8274 }, 8275 { 8276 "source": "codemirage-hallucinations-code-2024", 8277 "target": "codex-humaneval-2021" 8278 }, 8279 { 8280 "source": "codemirage-hallucinations-code-2024", 8281 "target": "code-llama-2023" 8282 }, 8283 { 8284 "source": "codemirage-hallucinations-code-2024", 8285 "target": "codebert-pretrained-model-2020" 8286 }, 8287 { 8288 "source": "codemirage-hallucinations-code-2024", 8289 "target": "your-code-generated-2023" 8290 }, 8291 { 8292 "source": "codemirage-hallucinations-code-2024", 8293 "target": "large-language-models-2023-3" 8294 }, 8295 { 8296 "source": "codemirage-hallucinations-code-2024", 8297 "target": "sirens-song-ai-2023" 8298 }, 8299 { 8300 "source": "codemmlu-multitask-benchmark-2024", 8301 "target": "codex-humaneval-2021" 8302 }, 8303 { 8304 "source": "codemmlu-multitask-benchmark-2024", 8305 "target": "mmlu-measuring-massive-2020" 8306 }, 8307 { 8308 "source": "codemmlu-multitask-benchmark-2024", 8309 "target": "cruxeval-benchmark-code-2024" 8310 }, 8311 { 8312 "source": "codemmlu-multitask-benchmark-2024", 8313 "target": "livecodebench-2024" 8314 }, 8315 { 8316 "source": "codemmlu-multitask-benchmark-2024", 8317 "target": "leakage-code-generation-2024" 8318 }, 8319 { 8320 "source": "codemmlu-multitask-benchmark-2024", 8321 "target": "your-code-generated-2023" 8322 }, 8323 { 8324 "source": "codemmlu-multitask-benchmark-2024", 8325 "target": "chain-of-thought-prompting-2022" 8326 }, 8327 { 8328 "source": "codemmlu-multitask-benchmark-2024", 8329 "target": "starcoder2-2024" 8330 }, 8331 { 8332 "source": "codemmlu-multitask-benchmark-2024-2", 8333 "target": "codex-humaneval-2021" 8334 }, 8335 { 8336 "source": "codemmlu-multitask-benchmark-2024-2", 8337 "target": "mmlu-measuring-massive-2020" 8338 }, 8339 { 8340 "source": "codemmlu-multitask-benchmark-2024-2", 8341 "target": "cruxeval-benchmark-code-2024" 8342 }, 8343 { 8344 "source": "codemmlu-multitask-benchmark-2024-2", 8345 "target": "livecodebench-2024" 8346 }, 8347 { 8348 "source": "codemmlu-multitask-benchmark-2024-2", 8349 "target": "bigcodebench-2024" 8350 }, 8351 { 8352 "source": "codemmlu-multitask-benchmark-2024-2", 8353 "target": "leakage-code-generation-2024" 8354 }, 8355 { 8356 "source": "codemmlu-multitask-benchmark-2024-2", 8357 "target": "your-code-generated-2023" 8358 }, 8359 { 8360 "source": "codemmlu-multitask-benchmark-2024-2", 8361 "target": "chain-of-thought-prompting-2022" 8362 }, 8363 { 8364 "source": "codemorph-mitigating-data-2025", 8365 "target": "quantifying-contamination-evaluating-2024" 8366 }, 8367 { 8368 "source": "codemorph-mitigating-data-2025", 8369 "target": "concerned-data-contamination-2024" 8370 }, 8371 { 8372 "source": "codemorph-mitigating-data-2025", 8373 "target": "rethinking-benchmark-contamination-2023" 8374 }, 8375 { 8376 "source": "codemorph-mitigating-data-2025", 8377 "target": "livecodebench-2024" 8378 }, 8379 { 8380 "source": "codemorph-mitigating-data-2025", 8381 "target": "top-leaderboard-ranking-2024" 8382 }, 8383 { 8384 "source": "codemorph-mitigating-data-2025", 8385 "target": "codex-humaneval-2021" 8386 }, 8387 { 8388 "source": "codemorph-mitigating-data-2025", 8389 "target": "your-code-generated-2023" 8390 }, 8391 { 8392 "source": "codemorph-mitigating-data-2025", 8393 "target": "benchmark-contamination-survey-2024" 8394 }, 8395 { 8396 "source": "codemorph-mitigating-data-2025", 8397 "target": "bigcodebench-2024" 8398 }, 8399 { 8400 "source": "codemorph-mitigating-data-2025", 8401 "target": "llms-se-systematic-review-2023" 8402 }, 8403 { 8404 "source": "codepde-inference-framework-2025", 8405 "target": "mlebench-evaluating-machine-2024" 8406 }, 8407 { 8408 "source": "codepromptzip-codespecific-prompt-2025", 8409 "target": "code-llama-2023" 8410 }, 8411 { 8412 "source": "coderagbench-can-retrieval-2024", 8413 "target": "codex-humaneval-2021" 8414 }, 8415 { 8416 "source": "coderagbench-can-retrieval-2024", 8417 "target": "swe-bench-2023" 8418 }, 8419 { 8420 "source": "coderagbench-can-retrieval-2024", 8421 "target": "agentless-2024" 8422 }, 8423 { 8424 "source": "coderagbench-can-retrieval-2024", 8425 "target": "livecodebench-2024" 8426 }, 8427 { 8428 "source": "coderagbench-can-retrieval-2024", 8429 "target": "swe-agent-2024" 8430 }, 8431 { 8432 "source": "coderagbench-can-retrieval-2024", 8433 "target": "deepseek-coder-2024" 8434 }, 8435 { 8436 "source": "coderagbench-can-retrieval-2024", 8437 "target": "starcoder-2023" 8438 }, 8439 { 8440 "source": "coderagbench-can-retrieval-2024", 8441 "target": "repocoder-repositorylevel-code-2023" 8442 }, 8443 { 8444 "source": "coderagbench-can-retrieval-2024", 8445 "target": "your-code-generated-2023" 8446 }, 8447 { 8448 "source": "coderagbench-can-retrieval-2024", 8449 "target": "arks-active-retrieval-2024" 8450 }, 8451 { 8452 "source": "codereviewqa-code-review-2025", 8453 "target": "swe-bench-2023" 8454 }, 8455 { 8456 "source": "codereviewqa-code-review-2025", 8457 "target": "codex-humaneval-2021" 8458 }, 8459 { 8460 "source": "codereviewqa-code-review-2025", 8461 "target": "bigcodebench-2024" 8462 }, 8463 { 8464 "source": "coderl-improving-code-2025", 8465 "target": "livecodebench-2024" 8466 }, 8467 { 8468 "source": "coderl-improving-code-2025", 8469 "target": "codex-humaneval-2021" 8470 }, 8471 { 8472 "source": "coderl-improving-code-2025", 8473 "target": "qwen25coder-technical-report-2024" 8474 }, 8475 { 8476 "source": "codescope-executionbased-multilingual-2023", 8477 "target": "codex-humaneval-2021" 8478 }, 8479 { 8480 "source": "codescope-executionbased-multilingual-2023", 8481 "target": "wizardcoder-empowering-code-2023" 8482 }, 8483 { 8484 "source": "codescope-executionbased-multilingual-2023", 8485 "target": "code-llama-2023" 8486 }, 8487 { 8488 "source": "codescope-executionbased-multilingual-2023", 8489 "target": "starcoder-2023" 8490 }, 8491 { 8492 "source": "codescope-executionbased-multilingual-2023", 8493 "target": "classeval-manuallycrafted-benchmark-2023" 8494 }, 8495 { 8496 "source": "codescope-executionbased-multilingual-2023", 8497 "target": "no-more-manual-2023" 8498 }, 8499 { 8500 "source": "codescore-evaluating-code-2023", 8501 "target": "codex-humaneval-2021" 8502 }, 8503 { 8504 "source": "codescore-evaluating-code-2023", 8505 "target": "starcoder-2023" 8506 }, 8507 { 8508 "source": "codescore-evaluating-code-2023", 8509 "target": "code-llama-2023" 8510 }, 8511 { 8512 "source": "codesift-llmbased-referenceless-2024", 8513 "target": "codex-humaneval-2021" 8514 }, 8515 { 8516 "source": "codesift-llmbased-referenceless-2024", 8517 "target": "judging-llmasajudge-mtbench-2023" 8518 }, 8519 { 8520 "source": "codesift-llmbased-referenceless-2024", 8521 "target": "your-code-generated-2023" 8522 }, 8523 { 8524 "source": "codesift-llmbased-referenceless-2024", 8525 "target": "codamosa-escaping-coverage-2023" 8526 }, 8527 { 8528 "source": "codesift-llmbased-referenceless-2024", 8529 "target": "starcoder-2023" 8530 }, 8531 { 8532 "source": "codesift-llmbased-referenceless-2024", 8533 "target": "code-llama-2023" 8534 }, 8535 { 8536 "source": "codetm4-detecting-machinegenerated-2025", 8537 "target": "codex-humaneval-2021" 8538 }, 8539 { 8540 "source": "codetm4-detecting-machinegenerated-2025", 8541 "target": "codebert-pretrained-model-2020" 8542 }, 8543 { 8544 "source": "codetm4-detecting-machinegenerated-2025", 8545 "target": "livecodebench-2024" 8546 }, 8547 { 8548 "source": "codex-humaneval-2021", 8549 "target": "scaling-laws-2020" 8550 }, 8551 { 8552 "source": "codex-humaneval-2021", 8553 "target": "codebert-pretrained-model-2020" 8554 }, 8555 { 8556 "source": "codexity-secure-aiassisted-2024", 8557 "target": "copilot-security-weaknesses-2023" 8558 }, 8559 { 8560 "source": "codexity-secure-aiassisted-2024", 8561 "target": "starcoder-2023" 8562 }, 8563 { 8564 "source": "codexity-secure-aiassisted-2024", 8565 "target": "scaffolded-model-capability-2023" 8566 }, 8567 { 8568 "source": "codified-context-infrastructure-2026", 8569 "target": "uc-berkeley-mast-2025" 8570 }, 8571 { 8572 "source": "codified-context-infrastructure-2026", 8573 "target": "context-engineering-ai-2025" 8574 }, 8575 { 8576 "source": "codified-context-infrastructure-2026", 8577 "target": "agentic-ai-software-2025-2" 8578 }, 8579 { 8580 "source": "codified-context-infrastructure-2026", 8581 "target": "agentic-adoption-github-2026" 8582 }, 8583 { 8584 "source": "coding-agents-generating-2026", 8585 "target": "metr-rct-2025" 8586 }, 8587 { 8588 "source": "coding-agents-generating-2026", 8589 "target": "understanding-software-engineering-2025" 8590 }, 8591 { 8592 "source": "coding-agents-generating-2026", 8593 "target": "rise-ai-teammates-2025" 8594 }, 8595 { 8596 "source": "coding-agents-generating-2026", 8597 "target": "agentic-adoption-github-2026" 8598 }, 8599 { 8600 "source": "coding-agents-generating-2026", 8601 "target": "promises-perils-timely-2026" 8602 }, 8603 { 8604 "source": "coding-agents-generating-2026", 8605 "target": "automated-unit-test-2024" 8606 }, 8607 { 8608 "source": "coding-agents-generating-2026", 8609 "target": "test-smells-llmgenerated-2024" 8610 }, 8611 { 8612 "source": "coevolving-llm-coder-2025", 8613 "target": "deepseek-r1-2025" 8614 }, 8615 { 8616 "source": "coevolving-llm-coder-2025", 8617 "target": "livecodebench-2024" 8618 }, 8619 { 8620 "source": "coevolving-llm-coder-2025", 8621 "target": "your-code-generated-2023" 8622 }, 8623 { 8624 "source": "coevolving-llm-coder-2025", 8625 "target": "no-more-manual-2023" 8626 }, 8627 { 8628 "source": "coevolving-llm-coder-2025", 8629 "target": "alphacode-competition-level-2022" 8630 }, 8631 { 8632 "source": "coffe-code-efficiency-2025", 8633 "target": "codex-humaneval-2021" 8634 }, 8635 { 8636 "source": "coffe-code-efficiency-2025", 8637 "target": "swe-bench-2023" 8638 }, 8639 { 8640 "source": "coffe-code-efficiency-2025", 8641 "target": "evaluating-language-models-2024" 8642 }, 8643 { 8644 "source": "coffe-code-efficiency-2025", 8645 "target": "your-code-generated-2023" 8646 }, 8647 { 8648 "source": "coffe-code-efficiency-2025", 8649 "target": "swe-agent-2024" 8650 }, 8651 { 8652 "source": "coffe-code-efficiency-2025", 8653 "target": "code-llama-2023" 8654 }, 8655 { 8656 "source": "coffe-code-efficiency-2025", 8657 "target": "deepseek-coder-2024" 8658 }, 8659 { 8660 "source": "cognitive-control-architecture-2025", 8661 "target": "agentdojo-dynamic-environment-2024" 8662 }, 8663 { 8664 "source": "cognitive-control-architecture-2025", 8665 "target": "melon-provable-defense-2025" 8666 }, 8667 { 8668 "source": "cognitive-control-architecture-2025", 8669 "target": "not-what-youve-2023" 8670 }, 8671 { 8672 "source": "cognitive-control-architecture-2025", 8673 "target": "ipiguard-novel-tool-2025" 8674 }, 8675 { 8676 "source": "cognitive-control-architecture-2025", 8677 "target": "task-shield-enforcing-2024" 8678 }, 8679 { 8680 "source": "cognitive-control-architecture-2025", 8681 "target": "defending-against-indirect-2024" 8682 }, 8683 { 8684 "source": "cognitive-control-architecture-2025", 8685 "target": "formalizing-benchmarking-prompt-2023" 8686 }, 8687 { 8688 "source": "cognitive-control-architecture-2025", 8689 "target": "react-synergizing-reasoning-2022" 8690 }, 8691 { 8692 "source": "cognitive-models-ai-2026", 8693 "target": "tree-thoughts-deliberate-2023" 8694 }, 8695 { 8696 "source": "cognitive-models-ai-2026", 8697 "target": "react-synergizing-reasoning-2022" 8698 }, 8699 { 8700 "source": "cognitive-models-ai-2026", 8701 "target": "swe-agent-2024" 8702 }, 8703 { 8704 "source": "cognitive-models-ai-2026", 8705 "target": "reflexion-language-agents-2023" 8706 }, 8707 { 8708 "source": "cognitive-models-ai-2026", 8709 "target": "openhands-ai-sw-agent-2024" 8710 }, 8711 { 8712 "source": "cognitive-models-ai-2026", 8713 "target": "ai-scientist-fully-2024" 8714 }, 8715 { 8716 "source": "cognitive-overload-attackprompt-2024", 8717 "target": "prompt-injection-llm-apps-2023" 8718 }, 8719 { 8720 "source": "cognitive-overload-attackprompt-2024", 8721 "target": "not-what-youve-2023" 8722 }, 8723 { 8724 "source": "coladder-supporting-programmers-2023", 8725 "target": "codex-humaneval-2021" 8726 }, 8727 { 8728 "source": "coladder-supporting-programmers-2023", 8729 "target": "productivity-assessment-neural-2022" 8730 }, 8731 { 8732 "source": "collab-controlled-decoding-2025", 8733 "target": "transfer-q-star-2024" 8734 }, 8735 { 8736 "source": "collaborating-genai-incentives-2025", 8737 "target": "metr-rct-2025" 8738 }, 8739 { 8740 "source": "collaborating-genai-incentives-2025", 8741 "target": "copilot-productivity-controlled-2023" 8742 }, 8743 { 8744 "source": "collaboration-all-you-2025", 8745 "target": "lost-translation-study-2024" 8746 }, 8747 { 8748 "source": "collaboration-all-you-2025", 8749 "target": "cotran-llmbased-code-2023" 8750 }, 8751 { 8752 "source": "collaborative-agents-automated-2025", 8753 "target": "repairagent-llm-bug-repair-2024" 8754 }, 8755 { 8756 "source": "collaborative-agents-automated-2025", 8757 "target": "openhands-ai-sw-agent-2024" 8758 }, 8759 { 8760 "source": "collaborative-agents-automated-2025", 8761 "target": "demystifying-llmbased-software-2025" 8762 }, 8763 { 8764 "source": "collaborative-agents-automated-2025", 8765 "target": "apr-llm-survey-2025" 8766 }, 8767 { 8768 "source": "collaborative-agents-automated-2025", 8769 "target": "automated-program-repair-2023" 8770 }, 8771 { 8772 "source": "collaborative-agents-automated-2025", 8773 "target": "reflexion-language-agents-2023" 8774 }, 8775 { 8776 "source": "collaborative-agents-automated-2025", 8777 "target": "systematic-literature-review-2024" 8778 }, 8779 { 8780 "source": "collaborative-agents-automated-2025", 8781 "target": "deepseek-coder-2024" 8782 }, 8783 { 8784 "source": "collubench-benchmark-predicting-2024", 8785 "target": "swe-bench-2023" 8786 }, 8787 { 8788 "source": "collubench-benchmark-predicting-2024", 8789 "target": "swe-agent-2024" 8790 }, 8791 { 8792 "source": "collubench-benchmark-predicting-2024", 8793 "target": "codex-humaneval-2021" 8794 }, 8795 { 8796 "source": "collubench-benchmark-predicting-2024", 8797 "target": "your-code-generated-2023" 8798 }, 8799 { 8800 "source": "collubench-benchmark-predicting-2024", 8801 "target": "impact-code-language-2023" 8802 }, 8803 { 8804 "source": "collubench-benchmark-predicting-2024", 8805 "target": "deep-dive-into-2024-2" 8806 }, 8807 { 8808 "source": "collubench-benchmark-predicting-2024", 8809 "target": "beyond-functional-correctness-2024" 8810 }, 8811 { 8812 "source": "collubench-benchmark-predicting-2024", 8813 "target": "deepseek-coder-2024" 8814 }, 8815 { 8816 "source": "collubench-benchmark-predicting-2024", 8817 "target": "starcoder2-2024" 8818 }, 8819 { 8820 "source": "collubench-benchmark-predicting-2024", 8821 "target": "can-llm-replace-2023" 8822 }, 8823 { 8824 "source": "colm-collaborative-large-2025", 8825 "target": "improving-factuality-reasoning-2023" 8826 }, 8827 { 8828 "source": "colm-collaborative-large-2025", 8829 "target": "think-deep-think-2025" 8830 }, 8831 { 8832 "source": "colm-collaborative-large-2025", 8833 "target": "judging-llmasajudge-mtbench-2023" 8834 }, 8835 { 8836 "source": "colt-lightweight-multillm-2026", 8837 "target": "improving-factuality-reasoning-2023" 8838 }, 8839 { 8840 "source": "comback-versatile-dataset-2024", 8841 "target": "code-llama-2023" 8842 }, 8843 { 8844 "source": "comback-versatile-dataset-2024", 8845 "target": "codebert-pretrained-model-2020" 8846 }, 8847 { 8848 "source": "combined-approach-program-2024", 8849 "target": "unified-multitask-learning-2022" 8850 }, 8851 { 8852 "source": "combining-costconstrained-runtime-2025", 8853 "target": "bigcodebench-2024" 8854 }, 8855 { 8856 "source": "combining-large-language-2025", 8857 "target": "repairagent-llm-bug-repair-2024" 8858 }, 8859 { 8860 "source": "combining-large-language-2025", 8861 "target": "judging-llmasajudge-mtbench-2023" 8862 }, 8863 { 8864 "source": "combining-large-language-2025", 8865 "target": "no-more-manual-2023" 8866 }, 8867 { 8868 "source": "combining-large-language-2025", 8869 "target": "adaptive-test-generation-2023" 8870 }, 8871 { 8872 "source": "comparative-analysis-pretrained-2025", 8873 "target": "codex-humaneval-2021" 8874 }, 8875 { 8876 "source": "comparative-analysis-pretrained-2025", 8877 "target": "impact-code-language-2023" 8878 }, 8879 { 8880 "source": "comparative-analysis-pretrained-2025", 8881 "target": "how-effective-neural-2023" 8882 }, 8883 { 8884 "source": "comparative-analysis-pretrained-2025", 8885 "target": "less-training-more-2022" 8886 }, 8887 { 8888 "source": "comparative-analysis-pretrained-2025", 8889 "target": "code-llama-2023" 8890 }, 8891 { 8892 "source": "comparative-analysis-pretrained-2025", 8893 "target": "starcoder-2023" 8894 }, 8895 { 8896 "source": "comparative-analysis-pretrained-2025", 8897 "target": "automated-program-repair-2022" 8898 }, 8899 { 8900 "source": "comparative-review-ai-2024", 8901 "target": "alphacode-competition-level-2022" 8902 }, 8903 { 8904 "source": "comparative-study-ai-2025", 8905 "target": "artificial-intelligence-system-2024" 8906 }, 8907 { 8908 "source": "comparative-study-dsl-2024", 8909 "target": "codex-humaneval-2021" 8910 }, 8911 { 8912 "source": "comparative-study-dsl-2024", 8913 "target": "starcoder-2023" 8914 }, 8915 { 8916 "source": "comparative-study-dsl-2024", 8917 "target": "gorilla-large-language-2023" 8918 }, 8919 { 8920 "source": "comparative-study-dsl-2024", 8921 "target": "toolformer-language-models-2023" 8922 }, 8923 { 8924 "source": "comparative-study-large-2025", 8925 "target": "survey-automated-program-2023" 8926 }, 8927 { 8928 "source": "comparative-study-large-2025", 8929 "target": "chain-of-thought-prompting-2022" 8930 }, 8931 { 8932 "source": "comparative-study-large-2025", 8933 "target": "survey-learningbased-automated-2023" 8934 }, 8935 { 8936 "source": "comparative-study-large-2025", 8937 "target": "empirical-evaluation-large-2025" 8938 }, 8939 { 8940 "source": "comparative-study-large-2025", 8941 "target": "deepseek-v3-2024" 8942 }, 8943 { 8944 "source": "compass-contrastive-learning-2026", 8945 "target": "codebert-pretrained-model-2020" 8946 }, 8947 { 8948 "source": "compass-contrastive-learning-2026", 8949 "target": "survey-automated-program-2023" 8950 }, 8951 { 8952 "source": "compass-contrastive-learning-2026", 8953 "target": "invalidator-automated-patch-2023" 8954 }, 8955 { 8956 "source": "competitive-programming-reasoning-models-2025", 8957 "target": "alphacode-competition-level-2022" 8958 }, 8959 { 8960 "source": "competitive-programming-reasoning-models-2025", 8961 "target": "codex-humaneval-2021" 8962 }, 8963 { 8964 "source": "competitive-programming-reasoning-models-2025", 8965 "target": "deepseek-r1-2025" 8966 }, 8967 { 8968 "source": "competitive-programming-reasoning-models-2025", 8969 "target": "swe-bench-2023" 8970 }, 8971 { 8972 "source": "compiler-feedback-loops-2025", 8973 "target": "llms-se-systematic-review-2023" 8974 }, 8975 { 8976 "source": "compiler-feedback-loops-2025", 8977 "target": "automated-unit-test-2024" 8978 }, 8979 { 8980 "source": "compiler-feedback-loops-2025", 8981 "target": "how-much-does-2024" 8982 }, 8983 { 8984 "source": "compiler-feedback-loops-2025", 8985 "target": "swe-bench-2023" 8986 }, 8987 { 8988 "source": "compiler-feedback-loops-2025", 8989 "target": "codex-humaneval-2021" 8990 }, 8991 { 8992 "source": "compilernext-searchbased-compiler-2025", 8993 "target": "swe-bench-2023" 8994 }, 8995 { 8996 "source": "compilernext-searchbased-compiler-2025", 8997 "target": "dspy-compiling-declarative-2023" 8998 }, 8999 { 9000 "source": "compilernext-searchbased-compiler-2025", 9001 "target": "autogen-multi-agent-2023" 9002 }, 9003 { 9004 "source": "compilernext-searchbased-compiler-2025", 9005 "target": "adas-automated-design-2024" 9006 }, 9007 { 9008 "source": "compilernext-searchbased-compiler-2025", 9009 "target": "ainative-software-engineering-2024" 9010 }, 9011 { 9012 "source": "completion-by-comprehension-2025", 9013 "target": "deepseek-coder-2024" 9014 }, 9015 { 9016 "source": "completion-by-comprehension-2025", 9017 "target": "code-llama-2023" 9018 }, 9019 { 9020 "source": "completion-by-comprehension-2025", 9021 "target": "crosscodeeval-diverse-multilingual-2023" 9022 }, 9023 { 9024 "source": "completion-by-comprehension-2025", 9025 "target": "repocoder-repositorylevel-code-2023" 9026 }, 9027 { 9028 "source": "completion-by-comprehension-2025", 9029 "target": "rlcoder-reinforcement-learning-2024" 9030 }, 9031 { 9032 "source": "completion-by-comprehension-2025", 9033 "target": "graphcoder-enhancing-repositorylevel-2024" 9034 }, 9035 { 9036 "source": "completion-by-comprehension-2025", 9037 "target": "swe-bench-2023" 9038 }, 9039 { 9040 "source": "completion-by-comprehension-2025", 9041 "target": "productivity-assessment-neural-2022" 9042 }, 9043 { 9044 "source": "complexcodeeval-benchmark-evaluating-2024", 9045 "target": "codex-humaneval-2021" 9046 }, 9047 { 9048 "source": "complexcodeeval-benchmark-evaluating-2024", 9049 "target": "deepseek-coder-2024" 9050 }, 9051 { 9052 "source": "complexcodeeval-benchmark-evaluating-2024", 9053 "target": "code-llama-2023" 9054 }, 9055 { 9056 "source": "complexcodeeval-benchmark-evaluating-2024", 9057 "target": "starcoder2-2024" 9058 }, 9059 { 9060 "source": "complexcodeeval-benchmark-evaluating-2024", 9061 "target": "livecodebench-2024" 9062 }, 9063 { 9064 "source": "complexcodeeval-benchmark-evaluating-2024", 9065 "target": "crosscodeeval-diverse-multilingual-2023" 9066 }, 9067 { 9068 "source": "complexcodeeval-benchmark-evaluating-2024", 9069 "target": "classeval-manuallycrafted-benchmark-2023" 9070 }, 9071 { 9072 "source": "complexcodeeval-benchmark-evaluating-2024", 9073 "target": "evocodebench-evolving-code-2024" 9074 }, 9075 { 9076 "source": "complexcodeeval-benchmark-evaluating-2024", 9077 "target": "alphacode-competition-level-2022" 9078 }, 9079 { 9080 "source": "compounding-reliability-2025", 9081 "target": "measuring-ai-ability-2025" 9082 }, 9083 { 9084 "source": "compounding-reliability-2025", 9085 "target": "compute-optimal-inference-2024" 9086 }, 9087 { 9088 "source": "compounding-reliability-2025", 9089 "target": "react-synergizing-reasoning-2022" 9090 }, 9091 { 9092 "source": "compounding-reliability-2025", 9093 "target": "bench-benchmark-toolagentuser-2024" 9094 }, 9095 { 9096 "source": "compounding-reliability-2025", 9097 "target": "scaling-laws-2020" 9098 }, 9099 { 9100 "source": "compounding-reliability-2025", 9101 "target": "chinchilla-compute-optimal-2022" 9102 }, 9103 { 9104 "source": "comprehensive-analysis-machine-2025", 9105 "target": "prompt-injection-llm-apps-2023" 9106 }, 9107 { 9108 "source": "comprehensive-analysis-machine-2025", 9109 "target": "not-what-youve-2023" 9110 }, 9111 { 9112 "source": "comprehensive-analysis-machine-2025", 9113 "target": "optimizationbased-prompt-injection-2024" 9114 }, 9115 { 9116 "source": "comprehensive-analysis-machine-2025", 9117 "target": "guardian-multitiered-defense-2024" 9118 }, 9119 { 9120 "source": "comprehensive-llm-secure-code-2025", 9121 "target": "bigcodebench-2024" 9122 }, 9123 { 9124 "source": "comprehensive-llm-secure-code-2025", 9125 "target": "secodeplt-unified-platform-2024" 9126 }, 9127 { 9128 "source": "comprehensive-llm-secure-code-2025", 9129 "target": "codex-humaneval-2021" 9130 }, 9131 { 9132 "source": "comprehensive-llm-secure-code-2025", 9133 "target": "cweval-outcomedriven-evaluation-2025" 9134 }, 9135 { 9136 "source": "comprehensive-study-posttraining-2023", 9137 "target": "smoothquant-accurate-efficient-2022" 9138 }, 9139 { 9140 "source": "comprehensive-study-posttraining-2023", 9141 "target": "case-4bit-precision-2022" 9142 }, 9143 { 9144 "source": "comprehensive-survey-aidriven-2024", 9145 "target": "codex-humaneval-2021" 9146 }, 9147 { 9148 "source": "comprehensive-survey-aidriven-2024", 9149 "target": "deepseek-coder-2024" 9150 }, 9151 { 9152 "source": "comprehensive-survey-aidriven-2024", 9153 "target": "magicoder-source-code-2023" 9154 }, 9155 { 9156 "source": "comprehensive-survey-aidriven-2024", 9157 "target": "starcoder2-2024" 9158 }, 9159 { 9160 "source": "comprehensive-survey-llm-2024", 9161 "target": "constitutional-ai-2022" 9162 }, 9163 { 9164 "source": "comprehensive-survey-llm-2024", 9165 "target": "dpo-superior-ppo-2024" 9166 }, 9167 { 9168 "source": "comprehensive-survey-trustworthiness-2025", 9169 "target": "deepseek-r1-2025" 9170 }, 9171 { 9172 "source": "comprehensive-survey-trustworthiness-2025", 9173 "target": "chain-of-thought-prompting-2022" 9174 }, 9175 { 9176 "source": "comprehensive-survey-trustworthiness-2025", 9177 "target": "alignment-faking-2024" 9178 }, 9179 { 9180 "source": "comprehensive-taxonomy-hallucinations-2025", 9181 "target": "codemirage-hallucinations-code-2024" 9182 }, 9183 { 9184 "source": "comprehensive-taxonomy-hallucinations-2025", 9185 "target": "hallulens-llm-hallucination-2025" 9186 }, 9187 { 9188 "source": "comprehensive-taxonomy-hallucinations-2025", 9189 "target": "toolformer-language-models-2023" 9190 }, 9191 { 9192 "source": "comprehensive-taxonomy-hallucinations-2025", 9193 "target": "hallucination-inevitable-innate-2024" 9194 }, 9195 { 9196 "source": "comprehensive-taxonomy-hallucinations-2025", 9197 "target": "sparks-agi-early-2023" 9198 }, 9199 { 9200 "source": "comprehensive-verilog-design-2025", 9201 "target": "verilogeval-evaluating-large-2023" 9202 }, 9203 { 9204 "source": "comprehensive-verilog-design-2025", 9205 "target": "swe-bench-2023" 9206 }, 9207 { 9208 "source": "comprehensive-verilog-design-2025", 9209 "target": "copilot-evaluation-harness-2024" 9210 }, 9211 { 9212 "source": "compute-optimal-inference-2024", 9213 "target": "chinchilla-compute-optimal-2022" 9214 }, 9215 { 9216 "source": "compute-optimal-inference-2024", 9217 "target": "beyond-chinchillaoptimal-accounting-2023" 9218 }, 9219 { 9220 "source": "compute-optimal-inference-2024", 9221 "target": "tree-thoughts-deliberate-2023" 9222 }, 9223 { 9224 "source": "compute-optimal-inference-2024", 9225 "target": "reflexion-language-agents-2023" 9226 }, 9227 { 9228 "source": "compute-optimal-inference-2024", 9229 "target": "chain-of-thought-prompting-2022" 9230 }, 9231 { 9232 "source": "concept-influence-leveraging-2026", 9233 "target": "sleeper-agents-2024" 9234 }, 9235 { 9236 "source": "conceptguard-neurosymbolic-safety-2025", 9237 "target": "sleeper-agents-2024" 9238 }, 9239 { 9240 "source": "conceptguard-neurosymbolic-safety-2025", 9241 "target": "judging-llmasajudge-mtbench-2023" 9242 }, 9243 { 9244 "source": "conceptguard-neurosymbolic-safety-2025", 9245 "target": "beavertails-improved-safety-2023" 9246 }, 9247 { 9248 "source": "concerned-data-contamination-2024", 9249 "target": "codex-humaneval-2021" 9250 }, 9251 { 9252 "source": "concerned-data-contamination-2024", 9253 "target": "nlp-evaluation-trouble-2023" 9254 }, 9255 { 9256 "source": "concerned-data-contamination-2024", 9257 "target": "classeval-manuallycrafted-benchmark-2023" 9258 }, 9259 { 9260 "source": "concerned-data-contamination-2024", 9261 "target": "how-effective-neural-2023" 9262 }, 9263 { 9264 "source": "concrete-roadmap-safety-2025", 9265 "target": "alignment-faking-2024" 9266 }, 9267 { 9268 "source": "concrete-roadmap-safety-2025", 9269 "target": "sleeper-agents-2024" 9270 }, 9271 { 9272 "source": "condor-enhance-llm-2025", 9273 "target": "self-instruct-aligning-language-2022" 9274 }, 9275 { 9276 "source": "condor-enhance-llm-2025", 9277 "target": "reflexion-language-agents-2023" 9278 }, 9279 { 9280 "source": "condor-enhance-llm-2025", 9281 "target": "codex-humaneval-2021" 9282 }, 9283 { 9284 "source": "confidencedriven-multiscale-model-2026", 9285 "target": "scaffolded-model-capability-2023" 9286 }, 9287 { 9288 "source": "confidencedriven-multiscale-model-2026", 9289 "target": "scaling-laws-2020" 9290 }, 9291 { 9292 "source": "confidenceguided-stepwise-model-2025", 9293 "target": "beyond-chinchillaoptimal-accounting-2023" 9294 }, 9295 { 9296 "source": "confidenceguided-stepwise-model-2025", 9297 "target": "deepseek-r1-2025" 9298 }, 9299 { 9300 "source": "configuring-agentic-coding-tools-2026", 9301 "target": "cursor-speed-quality-tradeoff-2025" 9302 }, 9303 { 9304 "source": "configuring-agentic-coding-tools-2026", 9305 "target": "context-engineering-ai-2025" 9306 }, 9307 { 9308 "source": "configuring-agentic-coding-tools-2026", 9309 "target": "swe-agent-2024" 9310 }, 9311 { 9312 "source": "conformal-constrained-policy-2025", 9313 "target": "chain-of-thought-prompting-2022" 9314 }, 9315 { 9316 "source": "conformal-constrained-policy-2025", 9317 "target": "react-synergizing-reasoning-2022" 9318 }, 9319 { 9320 "source": "consistency-key-detecting-2025", 9321 "target": "selfcheckgpt-zeroresource-blackbox-2023" 9322 }, 9323 { 9324 "source": "consistency-key-detecting-2025", 9325 "target": "survey-hallucination-large-2023-2" 9326 }, 9327 { 9328 "source": "consistency-key-detecting-2025", 9329 "target": "judging-llmasajudge-mtbench-2023" 9330 }, 9331 { 9332 "source": "consistency-key-detecting-2025", 9333 "target": "hallucination-inevitable-innate-2024" 9334 }, 9335 { 9336 "source": "constitutional-ai-2022", 9337 "target": "beyond-imitation-game-2022" 9338 }, 9339 { 9340 "source": "constrained-decoding-diffusion-2025", 9341 "target": "codex-humaneval-2021" 9342 }, 9343 { 9344 "source": "constrained-decoding-diffusion-2025", 9345 "target": "guiding-llms-right-2024" 9346 }, 9347 { 9348 "source": "constrained-decoding-diffusion-2025", 9349 "target": "syncode-llm-generation-2024" 9350 }, 9351 { 9352 "source": "constrained-decoding-diffusion-2025", 9353 "target": "livecodebench-2024" 9354 }, 9355 { 9356 "source": "constrained-decoding-diffusion-2025", 9357 "target": "constrained-decoding-fillinthemiddle-2024" 9358 }, 9359 { 9360 "source": "constrained-decoding-diffusion-2025", 9361 "target": "survey-llm-code-generation-2025" 9362 }, 9363 { 9364 "source": "constrained-decoding-fillinthemiddle-2024", 9365 "target": "starcoder-2023" 9366 }, 9367 { 9368 "source": "constrained-decoding-fillinthemiddle-2024", 9369 "target": "codex-humaneval-2021" 9370 }, 9371 { 9372 "source": "constrained-decoding-fillinthemiddle-2024", 9373 "target": "code-llama-2023" 9374 }, 9375 { 9376 "source": "constrained-decoding-fillinthemiddle-2024", 9377 "target": "efficient-guided-generation-2023" 9378 }, 9379 { 9380 "source": "constrained-decoding-fillinthemiddle-2024", 9381 "target": "syntaxaware-onthefly-code-2022" 9382 }, 9383 { 9384 "source": "constrained-decoding-fillinthemiddle-2024", 9385 "target": "deepseek-coder-v2-2024" 9386 }, 9387 { 9388 "source": "context-engineering-ai-2025", 9389 "target": "llms-se-systematic-review-2023" 9390 }, 9391 { 9392 "source": "context-engineering-ai-2025", 9393 "target": "swe-agent-2024" 9394 }, 9395 { 9396 "source": "context-engineering-ai-2025", 9397 "target": "rise-potential-large-2023" 9398 }, 9399 { 9400 "source": "contextaugmented-code-generation-2024", 9401 "target": "codex-humaneval-2021" 9402 }, 9403 { 9404 "source": "contextaugmented-code-generation-2024", 9405 "target": "coderagbench-can-retrieval-2024" 9406 }, 9407 { 9408 "source": "contextaugmented-code-generation-2024", 9409 "target": "code-llama-2023" 9410 }, 9411 { 9412 "source": "contextaugmented-code-generation-2024", 9413 "target": "deepseek-coder-v2-2024" 9414 }, 9415 { 9416 "source": "contextaugmented-code-generation-2024", 9417 "target": "starcoder2-2024" 9418 }, 9419 { 9420 "source": "contextaugmented-code-generation-2024", 9421 "target": "repocoder-repositorylevel-code-2023" 9422 }, 9423 { 9424 "source": "contrastrepair-enhancing-conversationbased-2024", 9425 "target": "automated-program-repair-2023" 9426 }, 9427 { 9428 "source": "contrastrepair-enhancing-conversationbased-2024", 9429 "target": "less-training-more-2022" 9430 }, 9431 { 9432 "source": "contrastrepair-enhancing-conversationbased-2024", 9433 "target": "impact-code-language-2023" 9434 }, 9435 { 9436 "source": "contrastrepair-enhancing-conversationbased-2024", 9437 "target": "codex-humaneval-2021" 9438 }, 9439 { 9440 "source": "contrastrepair-enhancing-conversationbased-2024", 9441 "target": "your-code-generated-2023" 9442 }, 9443 { 9444 "source": "contrastrepair-enhancing-conversationbased-2024", 9445 "target": "survey-learningbased-automated-2023" 9446 }, 9447 { 9448 "source": "control-models-inide-2026", 9449 "target": "metr-rct-2025" 9450 }, 9451 { 9452 "source": "control-models-inide-2026", 9453 "target": "dont-complete-it-2022" 9454 }, 9455 { 9456 "source": "control-models-inide-2026", 9457 "target": "productivity-assessment-neural-2022" 9458 }, 9459 { 9460 "source": "controlled-selfevolution-algorithmic-2026", 9461 "target": "effilearner-enhancing-efficiency-2024" 9462 }, 9463 { 9464 "source": "controlled-selfevolution-algorithmic-2026", 9465 "target": "reflexion-language-agents-2023" 9466 }, 9467 { 9468 "source": "controlled-selfevolution-algorithmic-2026", 9469 "target": "codex-humaneval-2021" 9470 }, 9471 { 9472 "source": "controlled-selfevolution-algorithmic-2026", 9473 "target": "survey-code-gen-llm-agents-2025" 9474 }, 9475 { 9476 "source": "convergence-dynamics-agenttoagent-2025", 9477 "target": "prompt-infection-llmtollm-2024" 9478 }, 9479 { 9480 "source": "convergence-dynamics-agenttoagent-2025", 9481 "target": "improving-factuality-reasoning-2023" 9482 }, 9483 { 9484 "source": "convergence-dynamics-agenttoagent-2025", 9485 "target": "rise-potential-large-2023" 9486 }, 9487 { 9488 "source": "cooperbench-why-coding-2026", 9489 "target": "uc-berkeley-mast-2025" 9490 }, 9491 { 9492 "source": "cooperbench-why-coding-2026", 9493 "target": "openhands-ai-sw-agent-2024" 9494 }, 9495 { 9496 "source": "cooperbench-why-coding-2026", 9497 "target": "swe-bench-2023" 9498 }, 9499 { 9500 "source": "cooperbench-why-coding-2026", 9501 "target": "metagpt-multi-agent-framework-2023" 9502 }, 9503 { 9504 "source": "cooperbench-why-coding-2026", 9505 "target": "chatdev-communicative-agents-2023" 9506 }, 9507 { 9508 "source": "cooperbench-why-coding-2026", 9509 "target": "magenticone-generalist-multiagent-2024" 9510 }, 9511 { 9512 "source": "cooperbench-why-coding-2026", 9513 "target": "camel-communicative-agents-2023" 9514 }, 9515 { 9516 "source": "copilot-arena-platform-2025", 9517 "target": "judging-llmasajudge-mtbench-2023" 9518 }, 9519 { 9520 "source": "copilot-arena-platform-2025", 9521 "target": "chatbot-arena-open-2024" 9522 }, 9523 { 9524 "source": "copilot-arena-platform-2025", 9525 "target": "livecodebench-2024" 9526 }, 9527 { 9528 "source": "copilot-arena-platform-2025", 9529 "target": "bigcodebench-2024" 9530 }, 9531 { 9532 "source": "copilot-arena-platform-2025", 9533 "target": "copilot-productivity-controlled-2023" 9534 }, 9535 { 9536 "source": "copilot-arena-platform-2025", 9537 "target": "codex-humaneval-2021" 9538 }, 9539 { 9540 "source": "copilot-arena-platform-2025", 9541 "target": "swe-bench-2023" 9542 }, 9543 { 9544 "source": "copilot-arena-platform-2025", 9545 "target": "livebench-challenging-contaminationlimited-2024" 9546 }, 9547 { 9548 "source": "copilot-code-quality-empirical-2023", 9549 "target": "codex-humaneval-2021" 9550 }, 9551 { 9552 "source": "copilot-efficiency-real-world-2024", 9553 "target": "productivity-assessment-neural-2022" 9554 }, 9555 { 9556 "source": "copilot-efficiency-real-world-2024", 9557 "target": "chatdev-communicative-agents-2023" 9558 }, 9559 { 9560 "source": "copilot-evaluation-harness-2024", 9561 "target": "codex-humaneval-2021" 9562 }, 9563 { 9564 "source": "copilot-evaluation-harness-2024", 9565 "target": "code-llama-2023" 9566 }, 9567 { 9568 "source": "copilot-evaluation-harness-2024", 9569 "target": "gpt4-technical-report-2023" 9570 }, 9571 { 9572 "source": "copilot-evaluation-harness-2024", 9573 "target": "llms-se-systematic-review-2023" 9574 }, 9575 { 9576 "source": "copilot-evaluation-harness-2024", 9577 "target": "lost-middle-how-2023" 9578 }, 9579 { 9580 "source": "copilot-evaluation-harness-2024", 9581 "target": "chain-of-thought-prompting-2022" 9582 }, 9583 { 9584 "source": "copilot-longitudinal-case-study-2025", 9585 "target": "copilot-productivity-controlled-2023" 9586 }, 9587 { 9588 "source": "copilot-longitudinal-case-study-2025", 9589 "target": "generative-ai-at-2023" 9590 }, 9591 { 9592 "source": "copilot-productivity-controlled-2023", 9593 "target": "codex-humaneval-2021" 9594 }, 9595 { 9596 "source": "copilot-productivity-controlled-2023", 9597 "target": "productivity-assessment-neural-2022" 9598 }, 9599 { 9600 "source": "copilot-security-weaknesses-2023", 9601 "target": "codex-humaneval-2021" 9602 }, 9603 { 9604 "source": "copilot-security-weaknesses-2023", 9605 "target": "ai-code-generators-2024" 9606 }, 9607 { 9608 "source": "copilot-zoominfo-productivity-2025", 9609 "target": "codex-humaneval-2021" 9610 }, 9611 { 9612 "source": "copilot-zoominfo-productivity-2025", 9613 "target": "github-copilot-test-2024" 9614 }, 9615 { 9616 "source": "copilot-zoominfo-productivity-2025", 9617 "target": "copilot-code-quality-empirical-2023" 9618 }, 9619 { 9620 "source": "copilot-zoominfo-productivity-2025", 9621 "target": "impact-large-language-2024" 9622 }, 9623 { 9624 "source": "copiloting-copilots-fusing-2023", 9625 "target": "codex-humaneval-2021" 9626 }, 9627 { 9628 "source": "copiloting-copilots-fusing-2023", 9629 "target": "your-code-generated-2023" 9630 }, 9631 { 9632 "source": "copiloting-copilots-fusing-2023", 9633 "target": "alphacode-competition-level-2022" 9634 }, 9635 { 9636 "source": "coprompter-usercentric-evaluation-2024", 9637 "target": "who-validates-validators-2024" 9638 }, 9639 { 9640 "source": "core-bench-computational-2024", 9641 "target": "codex-humaneval-2021" 9642 }, 9643 { 9644 "source": "core-bench-computational-2024", 9645 "target": "swe-bench-2023" 9646 }, 9647 { 9648 "source": "core-bench-computational-2024", 9649 "target": "ai-scientist-fully-2024" 9650 }, 9651 { 9652 "source": "core-bench-computational-2024", 9653 "target": "swe-agent-2024" 9654 }, 9655 { 9656 "source": "core-bench-computational-2024", 9657 "target": "lessons-from-trenches-2024" 9658 }, 9659 { 9660 "source": "core-comprehensive-ontological-2026", 9661 "target": "emergent-abilities-mirage-2023" 9662 }, 9663 { 9664 "source": "corecodebench-decoupling-code-2025", 9665 "target": "codex-humaneval-2021" 9666 }, 9667 { 9668 "source": "corecodebench-decoupling-code-2025", 9669 "target": "swe-bench-2023" 9670 }, 9671 { 9672 "source": "corecodebench-decoupling-code-2025", 9673 "target": "bigcodebench-2024" 9674 }, 9675 { 9676 "source": "corecodebench-decoupling-code-2025", 9677 "target": "livecodebench-2024" 9678 }, 9679 { 9680 "source": "corecodebench-decoupling-code-2025", 9681 "target": "your-code-generated-2023" 9682 }, 9683 { 9684 "source": "corecodebench-decoupling-code-2025", 9685 "target": "qwen25coder-technical-report-2024" 9686 }, 9687 { 9688 "source": "correctnessguaranteed-code-generation-2025", 9689 "target": "monitorguided-decoding-code-2023" 9690 }, 9691 { 9692 "source": "correctnessguaranteed-code-generation-2025", 9693 "target": "efficient-guided-generation-2023" 9694 }, 9695 { 9696 "source": "cosight-enhancing-llmbased-2025", 9697 "target": "react-synergizing-reasoning-2022" 9698 }, 9699 { 9700 "source": "cosight-enhancing-llmbased-2025", 9701 "target": "gaia-benchmark-general-2023" 9702 }, 9703 { 9704 "source": "cosight-enhancing-llmbased-2025", 9705 "target": "benchmark-expertlevel-academic-2025" 9706 }, 9707 { 9708 "source": "cosight-enhancing-llmbased-2025", 9709 "target": "reflexion-language-agents-2023" 9710 }, 9711 { 9712 "source": "cosight-enhancing-llmbased-2025", 9713 "target": "improving-factuality-reasoning-2023" 9714 }, 9715 { 9716 "source": "cosight-enhancing-llmbased-2025", 9717 "target": "selfconsistency-improves-chain-2022" 9718 }, 9719 { 9720 "source": "cosight-enhancing-llmbased-2025", 9721 "target": "uc-berkeley-mast-2025" 9722 }, 9723 { 9724 "source": "cost-accuracy-longterm-2026", 9725 "target": "llm-long-term-memory-eval-2024" 9726 }, 9727 { 9728 "source": "cost-accuracy-longterm-2026", 9729 "target": "large-language-model-2024" 9730 }, 9731 { 9732 "source": "cost-accuracy-longterm-2026", 9733 "target": "scaffolded-model-capability-2023" 9734 }, 9735 { 9736 "source": "cost-accuracy-longterm-2026", 9737 "target": "uc-berkeley-mast-2025" 9738 }, 9739 { 9740 "source": "cost-dynamic-reasoning-2025", 9741 "target": "react-synergizing-reasoning-2022" 9742 }, 9743 { 9744 "source": "cost-dynamic-reasoning-2025", 9745 "target": "reflexion-language-agents-2023" 9746 }, 9747 { 9748 "source": "cost-dynamic-reasoning-2025", 9749 "target": "codex-humaneval-2021" 9750 }, 9751 { 9752 "source": "cost-dynamic-reasoning-2025", 9753 "target": "chain-of-thought-prompting-2022" 9754 }, 9755 { 9756 "source": "cost-dynamic-reasoning-2025", 9757 "target": "autogen-multi-agent-2023" 9758 }, 9759 { 9760 "source": "cotbased-synthesizer-enhancing-2025", 9761 "target": "chain-of-thought-prompting-2022" 9762 }, 9763 { 9764 "source": "cotbased-synthesizer-enhancing-2025", 9765 "target": "selfconsistency-improves-chain-2022" 9766 }, 9767 { 9768 "source": "cotbased-synthesizer-enhancing-2025", 9769 "target": "inference-scaling-laws-2024" 9770 }, 9771 { 9772 "source": "cotdeceptoradversarial-code-obfuscation-2025", 9773 "target": "copilot-security-weaknesses-2023" 9774 }, 9775 { 9776 "source": "cotdeceptoradversarial-code-obfuscation-2025", 9777 "target": "deepseek-r1-2025" 9778 }, 9779 { 9780 "source": "cotdeceptoradversarial-code-obfuscation-2025", 9781 "target": "react-synergizing-reasoning-2022" 9782 }, 9783 { 9784 "source": "cotrag-integrating-chain-2025", 9785 "target": "chain-of-thought-prompting-2022" 9786 }, 9787 { 9788 "source": "cotran-llmbased-code-2023", 9789 "target": "codebert-pretrained-model-2020" 9790 }, 9791 { 9792 "source": "cotran-llmbased-code-2023", 9793 "target": "rltf-reinforcement-learning-2023" 9794 }, 9795 { 9796 "source": "cotran-llmbased-code-2023", 9797 "target": "compilable-neural-code-2022" 9798 }, 9799 { 9800 "source": "courtguard-local-multiagent-2025", 9801 "target": "adaptive-attacks-break-2025" 9802 }, 9803 { 9804 "source": "courtguard-local-multiagent-2025", 9805 "target": "llmailinject-dataset-from-2025" 9806 }, 9807 { 9808 "source": "courtguard-local-multiagent-2025", 9809 "target": "injecguard-benchmarking-mitigating-2024" 9810 }, 9811 { 9812 "source": "courtguard-local-multiagent-2025", 9813 "target": "backdoored-retrievers-prompt-2024" 9814 }, 9815 { 9816 "source": "coverup-effective-high-2024", 9817 "target": "codamosa-escaping-coverage-2023" 9818 }, 9819 { 9820 "source": "coverup-effective-high-2024", 9821 "target": "codex-humaneval-2021" 9822 }, 9823 { 9824 "source": "coverup-effective-high-2024", 9825 "target": "chatunitest-framework-llmbased-2023" 9826 }, 9827 { 9828 "source": "coverup-effective-high-2024", 9829 "target": "fuzz4all-universal-fuzzing-2023" 9830 }, 9831 { 9832 "source": "coverup-effective-high-2024", 9833 "target": "lost-middle-how-2023" 9834 }, 9835 { 9836 "source": "coverup-effective-high-2024", 9837 "target": "automated-program-repair-2022" 9838 }, 9839 { 9840 "source": "cracking-sql-barriers-2025", 9841 "target": "lost-translation-study-2024" 9842 }, 9843 { 9844 "source": "cracking-sql-barriers-2025", 9845 "target": "starcoder-2023" 9846 }, 9847 { 9848 "source": "creativeval-evaluating-creativity-2024", 9849 "target": "codex-humaneval-2021" 9850 }, 9851 { 9852 "source": "creativeval-evaluating-creativity-2024", 9853 "target": "gpt4-technical-report-2023" 9854 }, 9855 { 9856 "source": "creativeval-evaluating-creativity-2024", 9857 "target": "verilogeval-evaluating-large-2023" 9858 }, 9859 { 9860 "source": "critical-evaluation-defenses-2025", 9861 "target": "secalign-defending-against-2024" 9862 }, 9863 { 9864 "source": "critical-evaluation-defenses-2025", 9865 "target": "attention-tracker-detecting-2024" 9866 }, 9867 { 9868 "source": "critical-evaluation-defenses-2025", 9869 "target": "formalizing-benchmarking-prompt-2023" 9870 }, 9871 { 9872 "source": "critical-evaluation-defenses-2025", 9873 "target": "automatic-universal-prompt-2024" 9874 }, 9875 { 9876 "source": "critical-evaluation-defenses-2025", 9877 "target": "datasentinel-gametheoretic-detection-2025" 9878 }, 9879 { 9880 "source": "critical-review-large-2023", 9881 "target": "automated-program-repair-2023" 9882 }, 9883 { 9884 "source": "critical-review-large-2023", 9885 "target": "automated-program-repair-2022" 9886 }, 9887 { 9888 "source": "critical-review-large-2023", 9889 "target": "less-training-more-2022" 9890 }, 9891 { 9892 "source": "critical-review-large-2023", 9893 "target": "no-more-manual-2023" 9894 }, 9895 { 9896 "source": "crosscodeeval-diverse-multilingual-2023", 9897 "target": "codex-humaneval-2021" 9898 }, 9899 { 9900 "source": "crosscodeeval-diverse-multilingual-2023", 9901 "target": "starcoder-2023" 9902 }, 9903 { 9904 "source": "crosscodeeval-diverse-multilingual-2023", 9905 "target": "repocoder-repositorylevel-code-2023" 9906 }, 9907 { 9908 "source": "crossllm-generalization-behavioral-2025", 9909 "target": "malice-agentland-down-2025" 9910 }, 9911 { 9912 "source": "crossllm-generalization-behavioral-2025", 9913 "target": "sleeper-agents-2024" 9914 }, 9915 { 9916 "source": "crossllm-generalization-behavioral-2025", 9917 "target": "agentbench-evaluating-llms-2023" 9918 }, 9919 { 9920 "source": "crossmodal-memory-compression-2026", 9921 "target": "improving-factuality-reasoning-2023" 9922 }, 9923 { 9924 "source": "crossmodal-memory-compression-2026", 9925 "target": "uc-berkeley-mast-2025" 9926 }, 9927 { 9928 "source": "crossmodal-memory-compression-2026", 9929 "target": "chateval-better-llmbased-2023" 9930 }, 9931 { 9932 "source": "crossmodal-memory-compression-2026", 9933 "target": "metagpt-multi-agent-framework-2023" 9934 }, 9935 { 9936 "source": "crossplatform-evaluation-large-2025", 9937 "target": "chain-of-thought-prompting-2022" 9938 }, 9939 { 9940 "source": "crossplatform-evaluation-large-2025", 9941 "target": "chinchilla-compute-optimal-2022" 9942 }, 9943 { 9944 "source": "crqbench-benchmark-code-2024", 9945 "target": "codex-humaneval-2021" 9946 }, 9947 { 9948 "source": "crqbench-benchmark-code-2024", 9949 "target": "swe-bench-2023" 9950 }, 9951 { 9952 "source": "crqbench-benchmark-code-2024", 9953 "target": "gpt4-technical-report-2023" 9954 }, 9955 { 9956 "source": "crqbench-benchmark-code-2024", 9957 "target": "can-llm-replace-2023" 9958 }, 9959 { 9960 "source": "crqbench-benchmark-code-2024", 9961 "target": "chain-of-thought-prompting-2022" 9962 }, 9963 { 9964 "source": "crqbench-benchmark-code-2024", 9965 "target": "selfconsistency-improves-chain-2022" 9966 }, 9967 { 9968 "source": "crscore-reinforcement-learning-2025", 9969 "target": "combining-large-language-2025" 9970 }, 9971 { 9972 "source": "crscore-reinforcement-learning-2025", 9973 "target": "can-llms-replace-2025" 9974 }, 9975 { 9976 "source": "crscore-reinforcement-learning-2025", 9977 "target": "codereviewqa-code-review-2025" 9978 }, 9979 { 9980 "source": "cruxeval-benchmark-code-2024", 9981 "target": "codex-humaneval-2021" 9982 }, 9983 { 9984 "source": "cruxeval-benchmark-code-2024", 9985 "target": "swe-bench-2023" 9986 }, 9987 { 9988 "source": "cruxeval-benchmark-code-2024", 9989 "target": "code-llama-2023" 9990 }, 9991 { 9992 "source": "cruxeval-benchmark-code-2024", 9993 "target": "your-code-generated-2023" 9994 }, 9995 { 9996 "source": "cruxeval-benchmark-code-2024", 9997 "target": "chain-of-thought-prompting-2022" 9998 }, 9999 { 10000 "source": "cruxeval-benchmark-code-2024", 10001 "target": "wizardcoder-empowering-code-2023" 10002 }, 10003 { 10004 "source": "cruxevalx-benchmark-multilingual-2024", 10005 "target": "codex-humaneval-2021" 10006 }, 10007 { 10008 "source": "cruxevalx-benchmark-multilingual-2024", 10009 "target": "cruxeval-benchmark-code-2024" 10010 }, 10011 { 10012 "source": "cruxevalx-benchmark-multilingual-2024", 10013 "target": "multiple-scalable-polyglot-2023" 10014 }, 10015 { 10016 "source": "cruxevalx-benchmark-multilingual-2024", 10017 "target": "swe-bench-2023" 10018 }, 10019 { 10020 "source": "cruxevalx-benchmark-multilingual-2024", 10021 "target": "concerned-data-contamination-2024" 10022 }, 10023 { 10024 "source": "cruxevalx-benchmark-multilingual-2024", 10025 "target": "deepseek-coder-2024" 10026 }, 10027 { 10028 "source": "cruxevalx-benchmark-multilingual-2024", 10029 "target": "your-code-generated-2023" 10030 }, 10031 { 10032 "source": "cruxevalx-benchmark-multilingual-2024", 10033 "target": "code-llama-2023" 10034 }, 10035 { 10036 "source": "cuckoo-attack-stealthy-2025", 10037 "target": "mcp-safety-audit-2025" 10038 }, 10039 { 10040 "source": "cuckoo-attack-stealthy-2025", 10041 "target": "redcode-risky-code-2024" 10042 }, 10043 { 10044 "source": "cuckoo-attack-stealthy-2025", 10045 "target": "prompt-injection-llm-apps-2023" 10046 }, 10047 { 10048 "source": "cudaforge-agent-framework-2025", 10049 "target": "survey-code-gen-llm-agents-2025" 10050 }, 10051 { 10052 "source": "cudaforge-agent-framework-2025", 10053 "target": "survey-llm-code-generation-2025" 10054 }, 10055 { 10056 "source": "current-challenges-software-2024", 10057 "target": "codex-humaneval-2021" 10058 }, 10059 { 10060 "source": "current-challenges-software-2024", 10061 "target": "chatunitest-framework-llmbased-2023" 10062 }, 10063 { 10064 "source": "current-challenges-software-2024", 10065 "target": "deepseek-coder-2024" 10066 }, 10067 { 10068 "source": "current-challenges-software-2024", 10069 "target": "fuzz4all-universal-fuzzing-2023" 10070 }, 10071 { 10072 "source": "current-challenges-software-2024", 10073 "target": "semcoder-training-code-2024" 10074 }, 10075 { 10076 "source": "curriculum-guided-massive-2025", 10077 "target": "emergent-abilities-mirage-2023" 10078 }, 10079 { 10080 "source": "curriculum-guided-massive-2025", 10081 "target": "optima-optimizing-effectiveness-2024" 10082 }, 10083 { 10084 "source": "curriculum-guided-massive-2025", 10085 "target": "plan-and-act-long-horizon-2025" 10086 }, 10087 { 10088 "source": "curriculum-guided-massive-2025", 10089 "target": "selfconsistency-improves-chain-2022" 10090 }, 10091 { 10092 "source": "cweval-outcomedriven-evaluation-2025", 10093 "target": "codex-humaneval-2021" 10094 }, 10095 { 10096 "source": "cweval-outcomedriven-evaluation-2025", 10097 "target": "your-code-generated-2023" 10098 }, 10099 { 10100 "source": "cweval-outcomedriven-evaluation-2025", 10101 "target": "livecodebench-2024" 10102 }, 10103 { 10104 "source": "dacode-agent-data-2024", 10105 "target": "codex-humaneval-2021" 10106 }, 10107 { 10108 "source": "dacode-agent-data-2024", 10109 "target": "swe-bench-2023" 10110 }, 10111 { 10112 "source": "dacode-agent-data-2024", 10113 "target": "autogen-multi-agent-2023" 10114 }, 10115 { 10116 "source": "dacode-agent-data-2024", 10117 "target": "executable-code-actions-2024" 10118 }, 10119 { 10120 "source": "dacode-agent-data-2024", 10121 "target": "reflexion-language-agents-2023" 10122 }, 10123 { 10124 "source": "dancing-critiques-enhancing-2025", 10125 "target": "selfconsistency-improves-chain-2022" 10126 }, 10127 { 10128 "source": "dancing-critiques-enhancing-2025", 10129 "target": "tree-thoughts-deliberate-2023" 10130 }, 10131 { 10132 "source": "dancing-critiques-enhancing-2025", 10133 "target": "reflexion-language-agents-2023" 10134 }, 10135 { 10136 "source": "dancing-critiques-enhancing-2025", 10137 "target": "deepseek-r1-2025" 10138 }, 10139 { 10140 "source": "dancing-critiques-enhancing-2025", 10141 "target": "chain-of-thought-prompting-2022" 10142 }, 10143 { 10144 "source": "dangers-poisoned-llms-2025", 10145 "target": "sleeper-agents-2024" 10146 }, 10147 { 10148 "source": "dangers-poisoned-llms-2025", 10149 "target": "poisoning-attacks-llms-2025" 10150 }, 10151 { 10152 "source": "dapo-opensource-llm-2025", 10153 "target": "deepseek-r1-2025" 10154 }, 10155 { 10156 "source": "dapo-opensource-llm-2025", 10157 "target": "reflexion-language-agents-2023" 10158 }, 10159 { 10160 "source": "leakage-reproducibility-crisis-2023", 10161 "target": "neurips-reproducibility-2021" 10162 }, 10163 { 10164 "source": "lost-middle-how-2023", 10165 "target": "toolformer-language-models-2023" 10166 }, 10167 { 10168 "source": "lost-middle-how-2023", 10169 "target": "llama-open-efficient-2023" 10170 }, 10171 { 10172 "source": "lost-mix-evaluating-2025", 10173 "target": "judging-llmasajudge-mtbench-2023" 10174 }, 10175 { 10176 "source": "lost-mix-evaluating-2025", 10177 "target": "llama-3-herd-2024" 10178 }, 10179 { 10180 "source": "lost-mix-evaluating-2025", 10181 "target": "mmlu-measuring-massive-2020" 10182 }, 10183 { 10184 "source": "lost-mix-evaluating-2025", 10185 "target": "linguistics-theory-meets-2024" 10186 }, 10187 { 10188 "source": "lpcd-unified-framework-2025", 10189 "target": "llama-3-herd-2024" 10190 }, 10191 { 10192 "source": "lutllm-efficient-large-2025", 10193 "target": "smoothquant-accurate-efficient-2022" 10194 }, 10195 { 10196 "source": "lynx-open-source-2024", 10197 "target": "ragtruth-hallucination-corpus-2023" 10198 }, 10199 { 10200 "source": "lynx-open-source-2024", 10201 "target": "ares-automated-evaluation-2023" 10202 }, 10203 { 10204 "source": "lynx-open-source-2024", 10205 "target": "judging-llmasajudge-mtbench-2023" 10206 }, 10207 { 10208 "source": "lynx-open-source-2024", 10209 "target": "selfcheckgpt-zeroresource-blackbox-2023" 10210 }, 10211 { 10212 "source": "lynx-open-source-2024", 10213 "target": "chain-of-thought-prompting-2022" 10214 }, 10215 { 10216 "source": "mactg-multiagent-collaborative-2024", 10217 "target": "metagpt-multi-agent-framework-2023" 10218 }, 10219 { 10220 "source": "mactg-multiagent-collaborative-2024", 10221 "target": "chatdev-communicative-agents-2023" 10222 }, 10223 { 10224 "source": "mactg-multiagent-collaborative-2024", 10225 "target": "tree-thoughts-deliberate-2023" 10226 }, 10227 { 10228 "source": "mactg-multiagent-collaborative-2024", 10229 "target": "react-synergizing-reasoning-2022" 10230 }, 10231 { 10232 "source": "mactg-multiagent-collaborative-2024", 10233 "target": "survey-autonomous-llm-agents-2023" 10234 }, 10235 { 10236 "source": "mactg-multiagent-collaborative-2024", 10237 "target": "survey-llm-code-generation-2025" 10238 }, 10239 { 10240 "source": "mactg-multiagent-collaborative-2024", 10241 "target": "deepseek-v3-2024" 10242 }, 10243 { 10244 "source": "madspear-conformitydriven-prompt-2025", 10245 "target": "formalizing-benchmarking-prompt-2023" 10246 }, 10247 { 10248 "source": "madspear-conformitydriven-prompt-2025", 10249 "target": "automatic-universal-prompt-2024" 10250 }, 10251 { 10252 "source": "madspear-conformitydriven-prompt-2025", 10253 "target": "improving-factuality-reasoning-2023" 10254 }, 10255 { 10256 "source": "madspear-conformitydriven-prompt-2025", 10257 "target": "which-agent-causes-2025" 10258 }, 10259 { 10260 "source": "madspear-conformitydriven-prompt-2025", 10261 "target": "prompt-infection-llmtollm-2024" 10262 }, 10263 { 10264 "source": "maestro-multiagent-evaluation-2026", 10265 "target": "agentbench-evaluating-llms-2023" 10266 }, 10267 { 10268 "source": "maestro-multiagent-evaluation-2026", 10269 "target": "uc-berkeley-mast-2025" 10270 }, 10271 { 10272 "source": "maestro-multiagent-evaluation-2026", 10273 "target": "magenticone-generalist-multiagent-2024" 10274 }, 10275 { 10276 "source": "maestro-multiagent-evaluation-2026", 10277 "target": "measuring-agents-production-2025" 10278 }, 10279 { 10280 "source": "maestro-multiagent-evaluation-2026", 10281 "target": "reflexion-language-agents-2023" 10282 }, 10283 { 10284 "source": "maestro-multiagent-evaluation-2026", 10285 "target": "tree-thoughts-deliberate-2023" 10286 }, 10287 { 10288 "source": "maestro-multiagent-evaluation-2026", 10289 "target": "lost-middle-how-2023" 10290 }, 10291 { 10292 "source": "maestro-multiagent-evaluation-2026", 10293 "target": "large-language-model-2024" 10294 }, 10295 { 10296 "source": "magentic-marketplace-opensource-2025", 10297 "target": "multiagent-risks-from-2025" 10298 }, 10299 { 10300 "source": "magentic-marketplace-opensource-2025", 10301 "target": "generative-agents-interactive-2023" 10302 }, 10303 { 10304 "source": "magentic-marketplace-opensource-2025", 10305 "target": "from-llm-reasoning-2025" 10306 }, 10307 { 10308 "source": "magenticone-generalist-multiagent-2024", 10309 "target": "autogen-enabling-nextgen-2023" 10310 }, 10311 { 10312 "source": "magenticone-generalist-multiagent-2024", 10313 "target": "gaia-benchmark-general-2023" 10314 }, 10315 { 10316 "source": "magenticone-generalist-multiagent-2024", 10317 "target": "webarena-autonomous-agents-2023" 10318 }, 10319 { 10320 "source": "magenticone-generalist-multiagent-2024", 10321 "target": "swe-bench-2023" 10322 }, 10323 { 10324 "source": "magenticone-generalist-multiagent-2024", 10325 "target": "swe-agent-2024" 10326 }, 10327 { 10328 "source": "magenticone-generalist-multiagent-2024", 10329 "target": "agentless-2024" 10330 }, 10331 { 10332 "source": "magenticone-generalist-multiagent-2024", 10333 "target": "ai-scientist-fully-2024" 10334 }, 10335 { 10336 "source": "neurips-reproducibility-2021", 10337 "target": "deep-rl-matters-2018" 10338 }, 10339 { 10340 "source": "neurips-reproducibility-2021", 10341 "target": "gans-created-equal-2018" 10342 }, 10343 { 10344 "source": "neurips-reproducibility-2021", 10345 "target": "bert-pretraining-deep-2018" 10346 }, 10347 { 10348 "source": "questionable-practices-ml-2024", 10349 "target": "lessons-from-trenches-2024" 10350 }, 10351 { 10352 "source": "questionable-practices-ml-2024", 10353 "target": "troubling-trends-ml-2018" 10354 }, 10355 { 10356 "source": "questionable-practices-ml-2024", 10357 "target": "chatbot-arena-open-2024" 10358 }, 10359 { 10360 "source": "questionable-practices-ml-2024", 10361 "target": "gpt4-technical-report-2023" 10362 }, 10363 { 10364 "source": "questionable-practices-ml-2024", 10365 "target": "codex-humaneval-2021" 10366 }, 10367 { 10368 "source": "reproducibility-ml-overview-2025", 10369 "target": "lessons-from-trenches-2024" 10370 }, 10371 { 10372 "source": "reproducibility-ml-overview-2025", 10373 "target": "leakage-reproducibility-crisis-2023" 10374 }, 10375 { 10376 "source": "reproducibility-ml-overview-2025", 10377 "target": "neurips-reproducibility-2021" 10378 }, 10379 { 10380 "source": "show-your-work-2019", 10381 "target": "troubling-trends-ml-2018" 10382 }, 10383 { 10384 "source": "show-your-work-2019", 10385 "target": "deep-rl-matters-2018" 10386 }, 10387 { 10388 "source": "show-your-work-2019", 10389 "target": "gans-created-equal-2018" 10390 }, 10391 { 10392 "source": "troubling-trends-ml-2018", 10393 "target": "deep-rl-matters-2018" 10394 }, 10395 { 10396 "source": "troubling-trends-ml-2018", 10397 "target": "gans-created-equal-2018" 10398 }, 10399 { 10400 "source": "trust-ai-benchmarks-2025", 10401 "target": "lessons-from-trenches-2024" 10402 }, 10403 { 10404 "source": "trust-ai-benchmarks-2025", 10405 "target": "alignment-faking-2024" 10406 }, 10407 { 10408 "source": "trust-ai-benchmarks-2025", 10409 "target": "not-what-youve-2023" 10410 }, 10411 { 10412 "source": "trust-ai-benchmarks-2025", 10413 "target": "chatbot-arena-open-2024" 10414 } 10415 ], 10416 "most_cited": [ 10417 { 10418 "slug": "codex-humaneval-2021", 10419 "incoming_citations": 159 10420 }, 10421 { 10422 "slug": "swe-bench-2023", 10423 "incoming_citations": 63 10424 }, 10425 { 10426 "slug": "your-code-generated-2023", 10427 "incoming_citations": 47 10428 }, 10429 { 10430 "slug": "chain-of-thought-prompting-2022", 10431 "incoming_citations": 46 10432 }, 10433 { 10434 "slug": "gpt4-technical-report-2023", 10435 "incoming_citations": 38 10436 }, 10437 { 10438 "slug": "react-synergizing-reasoning-2022", 10439 "incoming_citations": 38 10440 }, 10441 { 10442 "slug": "livecodebench-2024", 10443 "incoming_citations": 38 10444 }, 10445 { 10446 "slug": "judging-llmasajudge-mtbench-2023", 10447 "incoming_citations": 37 10448 }, 10449 { 10450 "slug": "code-llama-2023", 10451 "incoming_citations": 36 10452 }, 10453 { 10454 "slug": "reflexion-language-agents-2023", 10455 "incoming_citations": 36 10456 }, 10457 { 10458 "slug": "deepseek-r1-2025", 10459 "incoming_citations": 30 10460 }, 10461 { 10462 "slug": "metagpt-multi-agent-framework-2023", 10463 "incoming_citations": 29 10464 }, 10465 { 10466 "slug": "sleeper-agents-2024", 10467 "incoming_citations": 29 10468 }, 10469 { 10470 "slug": "not-what-youve-2023", 10471 "incoming_citations": 27 10472 }, 10473 { 10474 "slug": "swe-agent-2024", 10475 "incoming_citations": 27 10476 }, 10477 { 10478 "slug": "autogen-multi-agent-2023", 10479 "incoming_citations": 23 10480 }, 10481 { 10482 "slug": "alphacode-competition-level-2022", 10483 "incoming_citations": 21 10484 }, 10485 { 10486 "slug": "chatdev-communicative-agents-2023", 10487 "incoming_citations": 20 10488 }, 10489 { 10490 "slug": "bigcodebench-2024", 10491 "incoming_citations": 20 10492 }, 10493 { 10494 "slug": "selfconsistency-improves-chain-2022", 10495 "incoming_citations": 19 10496 }, 10497 { 10498 "slug": "starcoder-2023", 10499 "incoming_citations": 19 10500 }, 10501 { 10502 "slug": "tree-thoughts-deliberate-2023", 10503 "incoming_citations": 18 10504 }, 10505 { 10506 "slug": "uc-berkeley-mast-2025", 10507 "incoming_citations": 18 10508 }, 10509 { 10510 "slug": "copilot-productivity-controlled-2023", 10511 "incoming_citations": 17 10512 }, 10513 { 10514 "slug": "llms-se-systematic-review-2023", 10515 "incoming_citations": 17 10516 }, 10517 { 10518 "slug": "codebert-pretrained-model-2020", 10519 "incoming_citations": 17 10520 }, 10521 { 10522 "slug": "deepseek-coder-2024", 10523 "incoming_citations": 16 10524 }, 10525 { 10526 "slug": "emergent-abilities-mirage-2023", 10527 "incoming_citations": 16 10528 }, 10529 { 10530 "slug": "openhands-ai-sw-agent-2024", 10531 "incoming_citations": 16 10532 }, 10533 { 10534 "slug": "formalizing-benchmarking-prompt-2023", 10535 "incoming_citations": 16 10536 } 10537 ], 10538 "connected_components": { 10539 "count": 12, 10540 "largest_size": 752, 10541 "components": [ 10542 [ 10543 "3dshape2vecset-3d-shape-2023", 10544 "a2hcoder-llmdriven-coding-2025", 10545 "aart-aiassisted-redteaming-2023", 10546 "acar-adaptive-complexity-2026", 10547 "accelerating-automatic-program-2025", 10548 "accelerating-large-language-2023", 10549 "across-programming-language-2025", 10550 "adafuse-adaptive-ensemble-2026", 10551 "adaplanner-adaptive-planning-2023", 10552 "adaptevolve-improving-efficiency-2026", 10553 "adapting-knowledge-prompt-2025", 10554 "adaptive-attacks-break-2025", 10555 "adaptive-attacks-bypass-defenses-2025", 10556 "adaptive-test-generation-2023", 10557 "adaptrack-constrained-decoding-2025", 10558 "adas-automated-design-2024", 10559 "adoption-generative-artificial-2026", 10560 "advancements-generative-ai-2023", 10561 "advancing-code-generation-2025", 10562 "advancing-language-model-2025", 10563 "advancing-largemolecule-discovery-2025", 10564 "advancing-methodological-development-2025", 10565 "advancing-nursing-regulation-2025", 10566 "advancing-software-quality-2025", 10567 "adversarial-bug-reports-2025", 10568 "adversarial-threat-vectors-2025", 10569 "advevomarl-shaping-internalized-2025", 10570 "aegis-automated-coevolutionary-2025", 10571 "aegis20-diverse-ai-2025", 10572 "aegisagent-autonomous-defense-2025", 10573 "agent-contracts-formal-2026", 10574 "agent-developer-practices-2025", 10575 "agent-error-taxonomy-2025", 10576 "agent-security-bench-2024", 10577 "agentasajudge-evaluate-agents-2024", 10578 "agentask-multiagent-systems-2025", 10579 "agentbased-evaluation-framework-2025", 10580 "agentbench-evaluating-llms-2023", 10581 "agentdojo-dynamic-environment-2024", 10582 "agentfuzzer-generic-blackbox-2025", 10583 "agentic-adoption-github-2026", 10584 "agentic-ai-architectures-2026", 10585 "agentic-ai-assessment-framework-2025", 10586 "agentic-ai-modernization-2026", 10587 "agentic-ai-security-survey-2025", 10588 "agentic-ai-software-2025", 10589 "agentic-ai-software-2025-2", 10590 "agentic-bug-reproduction-2025", 10591 "agentic-programming-survey-2025", 10592 "agentic-refactoring-empirical-2025", 10593 "agentic-software-engineering-2025", 10594 "agentless-2024", 10595 "agentmesh-cooperative-multiagent-2025", 10596 "agents-of-chaos-2026", 10597 "agents4plc-automating-closedloop-2024", 10598 "agentsllm-augmentative-generation-2025", 10599 "agentsnet-coordination-collaborative-2025", 10600 "agentspawn-adaptive-multiagent-2026", 10601 "agenttypo-adaptive-typographic-2025", 10602 "agentvigil-generic-blackbox-2025", 10603 "agint-agentic-graph-2025", 10604 "ai-alignment-contemporary-2025", 10605 "ai-alignment-strategies-2025", 10606 "ai-as-cognitive-2025", 10607 "ai-assistance-legal-2023", 10608 "ai-code-generators-2024", 10609 "ai-code-maintainability-registered-report-2024", 10610 "ai-code-not-reproducible-2025", 10611 "ai-code-review-2025", 10612 "ai-code-survival-open-source-2026", 10613 "ai-code-wild-2025", 10614 "ai-ides-vs-agents-impact-2026", 10615 "ai-productivity-index-2025", 10616 "ai-prs-code-quality-reuse-2026", 10617 "ai-safety-subproblems-2023", 10618 "ai-scientist-fully-2024", 10619 "ai-scientistv2-workshoplevel-2025", 10620 "ai-software-engineering-2025", 10621 "ai-testing-should-2025", 10622 "aiassisted-assessment-coding-2024", 10623 "aiassisted-code-editors-2025", 10624 "aiassisted-fixes-code-2025", 10625 "aiassisted-programming-decreases-2025", 10626 "aidriven-scholarly-peer-2025", 10627 "aidriven-software-engineering-2024", 10628 "aime-ai-system-2024", 10629 "ainative-software-engineering-2024", 10630 "ainstein-assessing-feasibility-2025", 10631 "aipowered-code-review-2023", 10632 "aipowered-code-review-2024", 10633 "ais-environmental-cost-2025", 10634 "aixamine-simplified-llm-2025", 10635 "aligned-query-expansion-2025", 10636 "aligning-objective-llmbased-2024", 10637 "alignment-faking-2024", 10638 "alignment-safety-llm-survey-2025", 10639 "alleviating-fear-losing-2025", 10640 "alphacode-competition-level-2022", 10641 "ambigswe-interactive-agents-2025", 10642 "among-us-measuring-2026", 10643 "among-us-sandbox-2025", 10644 "analysis-evaluation-synthetic-2025", 10645 "analysis-studentllm-interaction-2025", 10646 "anatomy-capability-emergence-2026", 10647 "ancoder-anchored-code-2026", 10648 "animagents-coordinating-multistage-2025", 10649 "annotation-alignment-comparing-2024", 10650 "antiregulatory-ai-how-2025", 10651 "appatch-automated-adaptive-2024", 10652 "applying-rlaif-code-2024", 10653 "appworld-controllable-world-2024", 10654 "apr-llm-survey-2025", 10655 "april-api-synthesis-2025", 10656 "aptserve-adaptive-request-2025", 10657 "aquallm-evaluating-accuracy-2025", 10658 "arcmemo-abstract-reasoning-2025", 10659 "arcs-agentic-retrievalaugmented-2025", 10660 "arena-hard-auto-2024", 10661 "ares-automated-evaluation-2023", 10662 "argus-defending-against-2025", 10663 "arks-active-retrieval-2024", 10664 "art-adaptive-response-2025", 10665 "art-repair-optimizing-2025", 10666 "art-scaling-test-time-compute-2025", 10667 "artifactsbench-bridging-visualinteractive-2025", 10668 "artificial-brain-neuroscience-2026", 10669 "artificial-human-intelligence-2025", 10670 "artificial-intelligence-assistance-2026", 10671 "artificial-just-artful-2025", 10672 "artificial-organisations-2026", 10673 "ask-me-anything-2022", 10674 "askeda-design-assistant-2024", 10675 "assessing-answerability-queries-2024", 10676 "assessing-correctness-llmbased-2025", 10677 "assessing-domainlevel-susceptibility-2026", 10678 "assessing-impact-code-2025", 10679 "assessing-latent-automated-2024", 10680 "assessing-verifying-task-2024", 10681 "astrovisbench-code-benchmark-2025", 10682 "asymptotic-study-incontext-2025", 10683 "atlas-artifact-generation-2025", 10684 "atom-thoughts-markov-2025", 10685 "attacking-llms-ai-2025", 10686 "attacks-by-content-2025", 10687 "attention-all-you-2025", 10688 "attention-is-all-you-need-2017", 10689 "attention-pruning-automated-2025", 10690 "attention-tracker-detecting-2024", 10691 "audit-trails-accountability-2026", 10692 "auditing-fairness-under-2026", 10693 "augmented-language-models-2023", 10694 "autocodebench-large-language-2025", 10695 "autocypher-improving-llms-2024", 10696 "autoflow-automated-workflow-2024", 10697 "autogen-enabling-nextgen-2023", 10698 "autogen-multi-agent-2023", 10699 "autokaggle-multiagent-framework-2024", 10700 "automated-bug-detection-2025", 10701 "automated-cc-program-2024", 10702 "automated-code-generation-2025", 10703 "automated-code-review-practice-2024", 10704 "automated-discovery-test-2025", 10705 "automated-extraction-mechanical-2026", 10706 "automated-formalization-conceptual-2025", 10707 "automated-knowledge-component-2025", 10708 "automated-program-repair-2022", 10709 "automated-program-repair-2023", 10710 "automated-program-repair-2023-2", 10711 "automated-program-repair-2024", 10712 "automated-program-repair-2025", 10713 "automated-repair-c-2025", 10714 "automated-smart-contract-2025", 10715 "automated-structural-testing-2026", 10716 "automated-test-case-2024", 10717 "automated-test-generation-2024", 10718 "automated-unit-test-2024", 10719 "automatic-generation-benchmarks-2024", 10720 "automatic-universal-prompt-2024", 10721 "automatically-benchmarking-code-agents-2025", 10722 "automatically-generating-web-2025", 10723 "automatically-surfacing-opportunities-2025", 10724 "automating-deception-scalable-2025", 10725 "automating-rest-api-2024", 10726 "automating-structural-engineering-2025", 10727 "automation-ai-intergenerational-2025", 10728 "autonomous-normative-multiagent-2025", 10729 "autonomous-supplier-evaluation-2025", 10730 "autop2c-llmbased-agent-2025", 10731 "autostreampipe-llm-assisted-2025", 10732 "autotom-scaling-modelbased-2025", 10733 "autovcoder-systematic-framework-2024", 10734 "autoverus-automated-proof-2024", 10735 "awcp-workspace-delegation-2026", 10736 "backdoor-attribution-elucidating-2025", 10737 "backdoor-samples-detection-2025", 10738 "backdoored-retrievers-prompt-2024", 10739 "backdooring-bias-large-2026", 10740 "backdoorpowered-prompt-injection-2025", 10741 "backportbench-multilingual-benchmark-2025", 10742 "bamas-structuring-budgetaware-2025", 10743 "bamboo-comprehensive-benchmark-2023", 10744 "banglaforge-llm-collaboration-2025", 10745 "bashexplainer-retrievalaugmented-bash-2022", 10746 "battleagentbench-benchmark-evaluating-2024", 10747 "bayesian-reward-models-2024", 10748 "beavertails-improved-safety-2023", 10749 "behavior-alignment-new-2024", 10750 "bench-benchmark-toolagentuser-2024", 10751 "benchmark-contamination-survey-2024", 10752 "benchmark-expertlevel-academic-2025", 10753 "benchmark-test-time-scaling-agents-2026", 10754 "benchmarking-ai-models-2025", 10755 "benchmarking-ai-models-2025-2", 10756 "benchmarking-epistemology-construct-2025", 10757 "benchmarking-hallucination-large-2024", 10758 "benchmarking-large-language-2022", 10759 "benchmarking-large-language-2024", 10760 "benchmarking-llms-unit-2025", 10761 "benchmarks-automated-commonsense-2023", 10762 "bert-pretraining-deep-2018", 10763 "best-practices-ai-2023", 10764 "beyond-automation-job-redesign-2025", 10765 "beyond-benchmark-innovative-2025", 10766 "beyond-chinchillaoptimal-accounting-2023", 10767 "beyond-chunks-graphs-2025", 10768 "beyond-commit-developer-perspectives-2026", 10769 "beyond-correctness-benchmarking-2024", 10770 "beyond-correctness-rewarding-2025", 10771 "beyond-functional-correctness-2024", 10772 "beyond-imitation-game-2022", 10773 "beyond-mimicry-preference-2025", 10774 "beyond-promptinduced-lies-2025", 10775 "beyond-quantity-trajectory-2026", 10776 "beyond-singleagent-safety-2025", 10777 "beyond-static-datasets-2023", 10778 "beyond-static-pattern-2025", 10779 "beyond-synthetic-benchmarks-2025", 10780 "beyond-textual-context-2025", 10781 "beyond-token-probes-2025", 10782 "bias-assessment-mitigation-2023", 10783 "bias-unveiled-investigating-2024", 10784 "biasalert-plugandplay-tool-2024", 10785 "bigcodebench-2024", 10786 "bigrpo-bidirectional-optimization-2025", 10787 "bioplanner-automatic-evaluation-2023", 10788 "biotrouble-multiagent-workflow-2026", 10789 "bitsaicr-automated-code-2025", 10790 "blockdialect-blockwise-finegrained-2025", 10791 "boosting-llm-reasoning-2025", 10792 "boosting-redundancybased-automated-2023", 10793 "bottomup-domainspecific-superintelligence-2025", 10794 "bpo-staying-close-2024", 10795 "break-sequential-dependency-2024", 10796 "breaking-prompt-wall-2025", 10797 "bridging-human-interpretation-2026", 10798 "bridging-llmgenerated-code-2025", 10799 "broken-neural-scaling-2022", 10800 "browserarena-web-agents-2025", 10801 "browsesafe-understanding-preventing-2025", 10802 "budgetaware-agentic-routing-2026", 10803 "bugdar-aiaugmented-secure-2025", 10804 "bugs-large-language-2024", 10805 "build-your-personalized-2025", 10806 "building-coding-assistant-2024", 10807 "building-cooperative-embodied-2023", 10808 "building-early-warning-2024", 10809 "building-understandable-messaging-2024", 10810 "bypassing-llm-guardrails-2025", 10811 "bytesized32refactored-extensible-interactive-2025", 10812 "c3po-optimized-large-2025", 10813 "cacheprune-neuralbased-attribution-2025", 10814 "calibrating-llm-judges-2025", 10815 "calibration-large-language-2026", 10816 "camel-communicative-agents-2023", 10817 "can-1b-llm-2025", 10818 "can-chatgpt-support-2024", 10819 "can-indirect-prompt-2025", 10820 "can-large-language-2025", 10821 "can-llm-replace-2023", 10822 "can-llms-replace-2025", 10823 "can-reasoning-models-2025", 10824 "can-vibe-coding-2025", 10825 "canaries-coal-mine-2025", 10826 "capability-ceilings-autoregressive-2025", 10827 "capabilityoriented-training-induced-2026", 10828 "capture-contextaware-prompt-2025", 10829 "carbon-footprint-evaluation-2025", 10830 "case-4bit-precision-2022", 10831 "case-study-transformative-2025", 10832 "cast-enhancing-code-2025", 10833 "caster-breaking-costperformance-2026", 10834 "catarena-evaluating-evolutionary-2025", 10835 "catch-me-if-2025", 10836 "catdb-datacatalogguided-llmbased-2025", 10837 "causalarmor-efficient-indirect-2026", 10838 "cbfllm-safe-control-2024", 10839 "cctest-testing-repairing-2022", 10840 "chain-of-thought-prompting-2022", 10841 "chainofthought-prompting-obscures-2025", 10842 "chainpoll-high-efficacy-2023", 10843 "challenge-optimization-context-2025", 10844 "challenges-humanagent-communication-2024", 10845 "challenges-paths-ai-2025", 10846 "changes-coding-behavior-2026", 10847 "characterizing-llm-inference-2025", 10848 "chasing-progress-not-2024", 10849 "chat-bankmanfried-exploration-2024", 10850 "chatassert-llmbased-test-2025", 10851 "chatbot-arena-open-2024", 10852 "chatdev-communicative-agents-2023", 10853 "chateval-better-llmbased-2023", 10854 "chatgpt-agent-system-2025", 10855 "chatgpt-not-all-2023", 10856 "chatinject-abusing-chat-2025", 10857 "chatofthought-collaborative-multiagent-2025", 10858 "chatunitest-framework-llmbased-2023", 10859 "check-your-facts-2023", 10860 "checkpointgcg-auditing-attacking-2025", 10861 "chinchilla-compute-optimal-2022", 10862 "chipbench-nextstep-benchmark-2026", 10863 "chopchop-programmable-framework-2025", 10864 "chorus-zeroshot-hierarchical-2025", 10865 "ciata-risk-assessment-2025", 10866 "cigar-costefficient-program-2024", 10867 "citationenhanced-generation-llmbased-2024", 10868 "citationgrounded-code-comprehension-2025", 10869 "citywalk-enhancing-llmbased-2025", 10870 "ckgfuzzer-llmbased-fuzz-2025", 10871 "clarifygpt-empowering-llmbased-2023", 10872 "clarifygpt-framework-enhancing-2024", 10873 "classeval-manuallycrafted-benchmark-2023", 10874 "classifying-addressing-diversity-2025", 10875 "classit-conversational-lecturealigned-2025", 10876 "claude-sonnet-45-2025", 10877 "cloud-platforms-developing-2024", 10878 "cloudevalyaml-practical-benchmark-2023", 10879 "cloudfix-automated-policy-2025", 10880 "cmoe-converting-mixtureofexperts-2025", 10881 "cocomic-code-completion-2022", 10882 "codamosa-escaping-coverage-2023", 10883 "code-aesthetics-agentic-2025", 10884 "code-hallucination-2024", 10885 "code-hallucinations-slr-2025", 10886 "code-less-align-2024", 10887 "code-llama-2023", 10888 "code-me-me-2025", 10889 "code-review-automation-2025", 10890 "code-review-survey-pre-post-llm-2026", 10891 "codearena-collective-evaluation-2025", 10892 "codeaware-prompting-study-2024", 10893 "codebenchgen-creating-scalable-2024", 10894 "codebert-pretrained-model-2020", 10895 "codecontests-highquality-test-2025", 10896 "codecor-llmbased-selfreflective-2025", 10897 "codecriticbench-holistic-code-2025", 10898 "codediting-reasoningbased-metric-2025", 10899 "codeelo-benchmarking-competitionlevel-2025", 10900 "codefill-multitoken-code-2022", 10901 "codegrag-bridging-gap-2024", 10902 "codeinsight-curated-dataset-2024", 10903 "codejudge-evaluating-code-2024", 10904 "codejudgebench-benchmarking-llmasajudge-2025", 10905 "codemark-imperceptible-watermarking-2023", 10906 "codemirage-hallucinations-code-2024", 10907 "codemmlu-multitask-benchmark-2024", 10908 "codemmlu-multitask-benchmark-2024-2", 10909 "codemorph-mitigating-data-2025", 10910 "codepde-inference-framework-2025", 10911 "codepromptzip-codespecific-prompt-2025", 10912 "coderagbench-can-retrieval-2024", 10913 "codereviewqa-code-review-2025", 10914 "coderl-improving-code-2025", 10915 "codescope-executionbased-multilingual-2023", 10916 "codescore-evaluating-code-2023", 10917 "codesift-llmbased-referenceless-2024", 10918 "codetm4-detecting-machinegenerated-2025", 10919 "codex-humaneval-2021", 10920 "codexity-secure-aiassisted-2024", 10921 "codified-context-infrastructure-2026", 10922 "coding-agents-generating-2026", 10923 "coevolving-llm-coder-2025", 10924 "coffe-code-efficiency-2025", 10925 "cognitive-control-architecture-2025", 10926 "cognitive-models-ai-2026", 10927 "cognitive-overload-attackprompt-2024", 10928 "coladder-supporting-programmers-2023", 10929 "collaborating-genai-incentives-2025", 10930 "collaboration-all-you-2025", 10931 "collaborative-agents-automated-2025", 10932 "collubench-benchmark-predicting-2024", 10933 "colm-collaborative-large-2025", 10934 "colt-lightweight-multillm-2026", 10935 "comback-versatile-dataset-2024", 10936 "combining-costconstrained-runtime-2025", 10937 "combining-large-language-2025", 10938 "comparative-analysis-pretrained-2025", 10939 "comparative-review-ai-2024", 10940 "comparative-study-dsl-2024", 10941 "comparative-study-large-2025", 10942 "compass-contrastive-learning-2026", 10943 "competitive-programming-reasoning-models-2025", 10944 "compilable-neural-code-2022", 10945 "compiler-feedback-loops-2025", 10946 "compilernext-searchbased-compiler-2025", 10947 "completion-by-comprehension-2025", 10948 "complexcodeeval-benchmark-evaluating-2024", 10949 "compounding-reliability-2025", 10950 "comprehensive-analysis-machine-2025", 10951 "comprehensive-llm-secure-code-2025", 10952 "comprehensive-study-posttraining-2023", 10953 "comprehensive-survey-aidriven-2024", 10954 "comprehensive-survey-llm-2024", 10955 "comprehensive-survey-trustworthiness-2025", 10956 "comprehensive-taxonomy-hallucinations-2025", 10957 "comprehensive-verilog-design-2025", 10958 "compute-optimal-inference-2024", 10959 "concept-influence-leveraging-2026", 10960 "conceptguard-neurosymbolic-safety-2025", 10961 "concerned-data-contamination-2024", 10962 "concrete-roadmap-safety-2025", 10963 "condor-enhance-llm-2025", 10964 "confidencedriven-multiscale-model-2026", 10965 "confidenceguided-stepwise-model-2025", 10966 "configuring-agentic-coding-tools-2026", 10967 "conformal-constrained-policy-2025", 10968 "consistency-key-detecting-2025", 10969 "constitutional-ai-2022", 10970 "constrained-decoding-diffusion-2025", 10971 "constrained-decoding-fillinthemiddle-2024", 10972 "context-engineering-ai-2025", 10973 "contextaugmented-code-generation-2024", 10974 "contrastrepair-enhancing-conversationbased-2024", 10975 "control-models-inide-2026", 10976 "controlled-selfevolution-algorithmic-2026", 10977 "convergence-dynamics-agenttoagent-2025", 10978 "cooperbench-why-coding-2026", 10979 "copilot-arena-platform-2025", 10980 "copilot-code-quality-empirical-2023", 10981 "copilot-efficiency-real-world-2024", 10982 "copilot-evaluation-harness-2024", 10983 "copilot-longitudinal-case-study-2025", 10984 "copilot-productivity-controlled-2023", 10985 "copilot-security-weaknesses-2023", 10986 "copilot-zoominfo-productivity-2025", 10987 "copiloting-copilots-fusing-2023", 10988 "core-bench-computational-2024", 10989 "core-comprehensive-ontological-2026", 10990 "corecodebench-decoupling-code-2025", 10991 "correctnessguaranteed-code-generation-2025", 10992 "cosight-enhancing-llmbased-2025", 10993 "cost-accuracy-longterm-2026", 10994 "cost-dynamic-reasoning-2025", 10995 "cotbased-synthesizer-enhancing-2025", 10996 "cotdeceptoradversarial-code-obfuscation-2025", 10997 "cotrag-integrating-chain-2025", 10998 "cotran-llmbased-code-2023", 10999 "courtguard-local-multiagent-2025", 11000 "coverup-effective-high-2024", 11001 "cracking-sql-barriers-2025", 11002 "creativeval-evaluating-creativity-2024", 11003 "critical-evaluation-defenses-2025", 11004 "critical-review-large-2023", 11005 "crosscodeeval-diverse-multilingual-2023", 11006 "crossllm-generalization-behavioral-2025", 11007 "crossmodal-memory-compression-2026", 11008 "crossplatform-evaluation-large-2025", 11009 "crqbench-benchmark-code-2024", 11010 "crscore-reinforcement-learning-2025", 11011 "cruxeval-benchmark-code-2024", 11012 "cruxevalx-benchmark-multilingual-2024", 11013 "cuckoo-attack-stealthy-2025", 11014 "cudaforge-agent-framework-2025", 11015 "current-challenges-software-2024", 11016 "curriculum-guided-massive-2025", 11017 "cursor-speed-quality-tradeoff-2025", 11018 "cweval-outcomedriven-evaluation-2025", 11019 "dacode-agent-data-2024", 11020 "dancing-critiques-enhancing-2025", 11021 "dangers-poisoned-llms-2025", 11022 "dapo-opensource-llm-2025", 11023 "data-contamination-benchmarks-2023", 11024 "datasentinel-gametheoretic-detection-2025", 11025 "deep-dive-into-2024-2", 11026 "deep-rl-matters-2018", 11027 "deepreview-improving-llmbased-2025", 11028 "deepseek-coder-2024", 11029 "deepseek-coder-v2-2024", 11030 "deepseek-r1-2025", 11031 "deepseek-v3-2024", 11032 "defending-against-indirect-2024", 11033 "defense-against-prompt-2024", 11034 "dehallucinator-mitigating-llm-2024", 11035 "demystifying-llmbased-software-2025", 11036 "diffusionsdf-conditional-generative-2022", 11037 "diversity-empowers-intelligence-2024", 11038 "does-prompt-formatting-2024", 11039 "dont-complete-it-2022", 11040 "dont-overthink-it-2025", 11041 "dpo-superior-ppo-2024", 11042 "drawing-pandas-benchmark-2024", 11043 "drift-dynamic-rulebased-2025", 11044 "dspy-compiling-declarative-2023", 11045 "efficient-guided-generation-2023", 11046 "effilearner-enhancing-efficiency-2024", 11047 "emergent-abilities-large-2022", 11048 "emergent-abilities-mirage-2023", 11049 "empirical-evaluation-large-2025", 11050 "empirical-study-finetuning-2023", 11051 "empirical-study-retrievalaugmented-2025", 11052 "empirical-study-usage-2024", 11053 "engineering-multiagent-llms-2025", 11054 "enhancing-code-translation-2024", 11055 "evaluating-language-models-2024", 11056 "evocodebench-evolving-code-2024", 11057 "evomarl-coevolutionary-multiagent-2025", 11058 "evor-evolving-retrieval-2024", 11059 "executable-code-actions-2024", 11060 "experimental-evidence-productivity-2023", 11061 "exploring-generalizable-automated-2025", 11062 "exposing-privacy-gaps-2024", 11063 "fast-inference-from-2022", 11064 "fath-authenticationbased-testtime-2024", 11065 "formalizing-benchmarking-prompt-2023", 11066 "from-code-courtroom-2025", 11067 "from-llm-reasoning-2025", 11068 "frontier-models-in-context-scheming-2024", 11069 "frontiermath-benchmark-evaluating-2024", 11070 "fuzz4all-universal-fuzzing-2023", 11071 "gaia-benchmark-general-2023", 11072 "gamma-revisiting-templatebased-2023", 11073 "gans-created-equal-2018", 11074 "gemma-open-models-2024", 11075 "generative-agents-interactive-2023", 11076 "generative-ai-at-2023", 11077 "github-copilot-test-2024", 11078 "gorilla-large-language-2023", 11079 "gpqa-graduatelevel-googleproof-2023", 11080 "gpt4-technical-report-2023", 11081 "granite-code-models-2024", 11082 "graphcodeagent-dual-graphguided-2025", 11083 "graphcoder-enhancing-repositorylevel-2024", 11084 "grokking-generalization-beyond-2022", 11085 "guardian-multitiered-defense-2024", 11086 "guiding-llms-right-2024", 11087 "hallucination-by-code-2025", 11088 "hallucination-inevitable-innate-2024", 11089 "hallulens-llm-hallucination-2025", 11090 "hits-highcoverage-llmbased-2024", 11091 "how-effective-neural-2023", 11092 "how-much-does-2024", 11093 "how-should-i-2025", 11094 "hybrid-automated-program-2024", 11095 "hyperagent-generalist-software-2024", 11096 "identifying-mitigating-api-2025", 11097 "impact-code-language-2023", 11098 "impact-large-language-2024", 11099 "improving-factuality-reasoning-2023", 11100 "incontext-sharpness-as-2024", 11101 "indirect-prompt-injections-2025", 11102 "inference-scaling-laws-2024", 11103 "injecguard-benchmarking-mitigating-2024", 11104 "interactive-code-generation-2022", 11105 "intuition-to-evidence-productivity-2025", 11106 "invalidator-automated-patch-2023", 11107 "ipiguard-novel-tool-2025", 11108 "jailbreaking-safety-aligned-llms-2024", 11109 "jatmo-prompt-injection-2023", 11110 "judging-llmasajudge-mtbench-2023", 11111 "lamda-language-models-2022", 11112 "large-language-model-2024", 11113 "large-language-models-2023-3", 11114 "largescale-independent-comprehensive-2024", 11115 "leakage-code-generation-2024", 11116 "leakage-reproducibility-crisis-2023", 11117 "learn-code-sustainably-2024", 11118 "less-training-more-2022", 11119 "lessleakbench-first-investigation-2025", 11120 "lessons-from-trenches-2024", 11121 "lhdeception-simulating-understanding-2025", 11122 "linguistics-theory-meets-2024", 11123 "livebench-challenging-contaminationlimited-2024", 11124 "livecodebench-2024", 11125 "llama-3-herd-2024", 11126 "llama-open-efficient-2023", 11127 "llm-agents-se-survey-2024", 11128 "llm-code-review-benchmarking-2025", 11129 "llm-critics-help-2024", 11130 "llm-fuzzing-challenges-2024", 11131 "llm-hallucinations-code-practical-2024", 11132 "llm-long-term-memory-eval-2024", 11133 "llm-strategic-deception-under-pressure-2023", 11134 "llm-unit-test-generation-empirical-2024", 11135 "llmailinject-dataset-from-2025", 11136 "llmassisted-static-analysis-2024", 11137 "llmbased-retrievalaugmented-control-2024", 11138 "llmcoordination-evaluating-analyzing-2023", 11139 "llms-se-systematic-review-2023", 11140 "lost-middle-how-2023", 11141 "lost-mix-evaluating-2025", 11142 "lost-translation-study-2024", 11143 "lpcd-unified-framework-2025", 11144 "lutllm-efficient-large-2025", 11145 "lynx-open-source-2024", 11146 "mactg-multiagent-collaborative-2024", 11147 "madspear-conformitydriven-prompt-2025", 11148 "maestro-multiagent-evaluation-2026", 11149 "magentic-marketplace-opensource-2025", 11150 "magenticone-generalist-multiagent-2024", 11151 "magicoder-source-code-2023", 11152 "malice-agentland-down-2025", 11153 "manipulating-multimodal-agents-2025", 11154 "mapcoder-multiagent-code-2024", 11155 "masai-modular-architecture-2024", 11156 "matplotagent-method-evaluation-2024", 11157 "mcp-safety-audit-2025", 11158 "measuring-agents-production-2025", 11159 "measuring-ai-ability-2025", 11160 "melon-provable-defense-2025", 11161 "memgpt-llms-as-2023", 11162 "mercury-efficiency-benchmark-2024", 11163 "metagpt-multi-agent-framework-2023", 11164 "metr-rct-2025", 11165 "mind2web-generalist-agent-2023", 11166 "mitigating-api-hallucination-2025", 11167 "mlebench-evaluating-machine-2024", 11168 "mmlu-measuring-massive-2020", 11169 "moco-onestop-shop-2026", 11170 "monitorguided-decoding-code-2023", 11171 "more-llm-calls-2024", 11172 "multi-agent-collaboration-survey-2025", 11173 "multiagent-risks-from-2025", 11174 "multiple-scalable-polyglot-2023", 11175 "navigating-complexity-generative-2023", 11176 "neural-exec-learning-2024", 11177 "neurips-reproducibility-2021", 11178 "nlp-evaluation-trouble-2023", 11179 "no-more-manual-2023", 11180 "no-need-lift-2023", 11181 "not-what-youve-2023", 11182 "openhands-ai-sw-agent-2024", 11183 "optima-optimizing-effectiveness-2024", 11184 "optimizationbased-prompt-injection-2024", 11185 "osworld-benchmarking-multimodal-2024", 11186 "palm-scaling-language-2022", 11187 "paperbench-evaluating-ais-2025", 11188 "performance-study-llmgenerated-2024", 11189 "phi4-technical-report-2024", 11190 "plan-and-act-long-horizon-2025", 11191 "poisoning-attacks-llms-2025", 11192 "preventing-rogue-agents-2025", 11193 "productivity-assessment-neural-2022", 11194 "projecteval-benchmark-programming-2025", 11195 "promises-perils-timely-2026", 11196 "prompt-infection-llmtollm-2024", 11197 "prompt-injection-attacks-2024", 11198 "prompt-injection-llm-apps-2023", 11199 "promptarmor-simple-yet-2025", 11200 "quantifying-contamination-evaluating-2024", 11201 "questionable-practices-ml-2024", 11202 "qwen25-technical-report-2024", 11203 "qwen25coder-technical-report-2024", 11204 "ragtruth-hallucination-corpus-2023", 11205 "rapgen-retrievalaugmented-patch-2023", 11206 "reacc-retrievalaugmented-code-2022", 11207 "react-synergizing-reasoning-2022", 11208 "rebench-evaluating-frontier-2024", 11209 "redcode-risky-code-2024", 11210 "reflexion-language-agents-2023", 11211 "repairagent-llm-bug-repair-2024", 11212 "repairllama-efficient-representations-2023", 11213 "repoaudit-autonomous-llmagent-2025", 11214 "repocoder-repositorylevel-code-2023", 11215 "repoformer-selective-retrieval-2024", 11216 "reproducibility-ml-overview-2025", 11217 "rethinking-benchmark-contamination-2023", 11218 "rethinking-verification-llm-2025", 11219 "retrievalaugmented-code-generation-2025", 11220 "rise-ai-teammates-2025", 11221 "rise-potential-large-2023", 11222 "rlcoder-reinforcement-learning-2024", 11223 "rltf-reinforcement-learning-2023", 11224 "runbugrun-executable-dataset-2023", 11225 "scaffolded-model-capability-2023", 11226 "scaling-laws-2020", 11227 "secalign-defending-against-2024", 11228 "secodeplt-unified-platform-2024", 11229 "security-degradation-iterative-2025", 11230 "self-instruct-aligning-language-2022", 11231 "selfcheckgpt-zeroresource-blackbox-2023", 11232 "selfconsistency-improves-chain-2022", 11233 "semcoder-training-code-2024", 11234 "show-your-work-2019", 11235 "sirens-song-ai-2023", 11236 "sleeper-agents-2024", 11237 "sloaware-gpu-dvfs-2024", 11238 "smoothquant-accurate-efficient-2022", 11239 "software-engineering-by-2025", 11240 "soleval-benchmarking-large-2025", 11241 "sparks-agi-early-2023", 11242 "starcoder-2023", 11243 "starcoder2-2024", 11244 "survey-automated-program-2023", 11245 "survey-autonomous-llm-agents-2023", 11246 "survey-code-gen-llm-agents-2025", 11247 "survey-hallucination-large-2023-2", 11248 "survey-large-language-2023", 11249 "survey-learningbased-automated-2023", 11250 "survey-llm-code-generation-2025", 11251 "swe-agent-2024", 11252 "swe-bench-2023", 11253 "swe-bench-plus-2024", 11254 "swe-bench-pro-2025", 11255 "swelancer-can-frontier-2025", 11256 "swerebench-automated-pipeline-2025", 11257 "swtbench-testing-validating-2024", 11258 "syncode-llm-generation-2024", 11259 "syntaxaware-onthefly-code-2022", 11260 "systematic-literature-review-2024", 11261 "tales-from-trenches-2024", 11262 "task-shield-enforcing-2024", 11263 "test-driven-interactive-code-gen-2024", 11264 "test-smells-llmgenerated-2024", 11265 "testdriven-development-llmbased-2024", 11266 "testgeneval-real-world-2024", 11267 "think-deep-think-2025", 11268 "thinking-llms-lie-2025", 11269 "tigercoder-novel-suite-2025", 11270 "toolformer-language-models-2023", 11271 "toolllm-facilitating-large-2023", 11272 "top-leaderboard-ranking-2024", 11273 "tree-thoughts-deliberate-2023", 11274 "troubling-trends-ml-2018", 11275 "trust-ai-benchmarks-2025", 11276 "trustworthy-llm-agents-survey-2025", 11277 "uc-berkeley-mast-2025", 11278 "understanding-software-engineering-2025", 11279 "unified-scaling-laws-2022", 11280 "unsupervised-realtime-hallucination-2024", 11281 "verilogeval-evaluating-large-2023", 11282 "verimind-agentic-llm-2025", 11283 "vibe-coding-practice-2025", 11284 "visualwebarena-evaluating-multimodal-2024", 11285 "voyager-open-ended-2023", 11286 "wasp-benchmarking-web-2025", 11287 "webarena-autonomous-agents-2023", 11288 "webbench-llm-code-2025", 11289 "what-wrong-your-2024", 11290 "when-singleagent-skills-2026", 11291 "where-do-ai-2026", 11292 "which-agent-causes-2025", 11293 "wizardcoder-empowering-code-2023", 11294 "your-code-generated-2023" 11295 ], 11296 [ 11297 "automated-program-repair-2023-3", 11298 "automated-program-repair-2024-3", 11299 "automated-program-repair-2025-2" 11300 ], 11301 [ 11302 "beyond-hype-comprehensive-2024", 11303 "how-beginning-programmers-2024", 11304 "how-far-we-2023" 11305 ], 11306 [ 11307 "2025-ai-agent-2026", 11308 "remote-labor-index-2025" 11309 ], 11310 [ 11311 "artificial-intelligence-system-2024", 11312 "comparative-study-ai-2025" 11313 ], 11314 [ 11315 "coprompter-usercentric-evaluation-2024", 11316 "who-validates-validators-2024" 11317 ], 11318 [ 11319 "caredio-cultural-alignment-2025", 11320 "unintended-impacts-llm-2024" 11321 ], 11322 [ 11323 "collab-controlled-decoding-2025", 11324 "transfer-q-star-2024" 11325 ], 11326 [ 11327 "ai-inference-falling-costs-2025", 11328 "intelligence-per-watt-2025" 11329 ], 11330 [ 11331 "analysis-research-status-2025", 11332 "utboost-rigorous-evaluation-2025" 11333 ], 11334 [ 11335 "combined-approach-program-2024", 11336 "unified-multitask-learning-2022" 11337 ], 11338 [ 11339 "bioragent-retrievalaugmented-generation-2024", 11340 "gemini-15-technical-report-2024" 11341 ] 11342 ] 11343 } 11344 }