scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (26055B)
      1 {
      2   "paper": {
      3     "title": "Building A Coding Assistant via the Retrieval-Augmented Language Model",
      4     "authors": [
      5       "Xinze Li",
      6       "Hanbin Wang",
      7       "Zhenghao Liu",
      8       "Shi Yu",
      9       "Shuo Wang",
     10       "Yukun Yan",
     11       "Yukai Fu",
     12       "Yu Gu",
     13       "Ge Yu"
     14     ],
     15     "year": 2024,
     16     "venue": "ACM Trans. Inf. Syst.",
     17     "arxiv_id": "2410.16229",
     18     "doi": "10.1145/3695868"
     19   },
     20   "checklist": {
     21     "artifacts": {
     22       "code_released": {
     23         "applies": true,
     24         "answer": true,
     25         "justification": "The paper states 'All codes are available at https://github.com/NEUIR/CONAN' in the preamble section (page 1)."
     26       },
     27       "data_released": {
     28         "applies": true,
     29         "answer": true,
     30         "justification": "The paper uses publicly available datasets: CodeSearchNet, Concode, CgCSN, PY150, JavaCorpus, HumanEval, and MBPP. All are standard public benchmarks referenced with citations and available independently."
     31       },
     32       "environment_specified": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "Section 4.4 mentions 'PyTorch and Huggingface transformers' and 'OpenMatch' but provides no version numbers, requirements file, Dockerfile, or detailed environment specification."
     36       },
     37       "reproduction_instructions": {
     38         "applies": true,
     39         "answer": false,
     40         "justification": "No step-by-step reproduction instructions are provided in the paper. The GitHub link is given but the paper itself contains no README-style reproduction guide or specific commands to run."
     41       }
     42     },
     43     "statistical_methodology": {
     44       "confidence_intervals_or_error_bars": {
     45         "applies": true,
     46         "answer": false,
     47         "justification": "All results in Tables 5-12 are reported as point estimates only. No confidence intervals, error bars, or ± notation appear anywhere in the paper."
     48       },
     49       "significance_tests": {
     50         "applies": true,
     51         "answer": false,
     52         "justification": "The paper makes numerous comparative claims (e.g., 'CONAN achieves convincing performance' and 'significantly outperforms previous retrieval augmented code generation models') but reports no statistical significance tests such as p-values, t-tests, or bootstrap tests."
     53       },
     54       "effect_sizes_reported": {
     55         "applies": true,
     56         "answer": true,
     57         "justification": "The paper reports improvements with baseline context throughout, e.g., 'surpasses the state-of-the-art models REDCODER-EXT with an average of approximately 3.1% and 0.6% improvements on CsCSN and Concode datasets' (Sec 5.1), and 'approximately 7% improvements on CgCSN' with full tables showing baseline and proposed method scores."
     58       },
     59       "sample_size_justified": {
     60         "applies": true,
     61         "answer": false,
     62         "justification": "No justification is given for why these particular datasets and their sizes are sufficient. Dataset sizes are reported in Table 2 but there is no discussion of whether they are adequate for the claims being made."
     63       },
     64       "variance_reported": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "No standard deviations, variance measures, or multi-run results are reported. All results appear to be from single runs with no indication of run-to-run variability."
     68       }
     69     },
     70     "evaluation_design": {
     71       "baselines_included": {
     72         "applies": true,
     73         "answer": true,
     74         "justification": "Extensive baselines are included across multiple categories: retrieval models (BM25, CodeBERT, CodeT5, SCODE-R), PLMs (GPT-2, CodeGPT, PLBART, UniXcoder, CodeT5), PLM w. RAG (REDCODER, ReACC), and LLMs (DeepSeek-Coder, CodeQwen), documented in Section 4.3."
     75       },
     76       "baselines_contemporary": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Baselines include contemporary models such as REDCODER (2021), ReACC (2022), UniXcoder (2022), CodeRetriever (2022), DeepSeek-Coder (2024), and CodeQwen1.5 (2024). These represent recent and competitive work in the area."
     80       },
     81       "ablation_study": {
     82         "applies": true,
     83         "answer": true,
     84         "justification": "Table 9 presents ablation studies removing RAG, FID, and Dual-View components individually. Table 11 further ablates the pretraining strategies (MEP, CDA) and compares masking strategies (span vs entity mask). Section 5.2 discusses results."
     85       },
     86       "multiple_metrics": {
     87         "applies": true,
     88         "answer": true,
     89         "justification": "Multiple metrics are used: EM, BLEU, CBLEU (CodeBLEU) for code generation; smoothed BLEU-4 for summarization; EM and ES (edit similarity) for code completion; MRR@100 for retrieval; Pass@k for HumanEval/MBPP. Documented in Section 4.2."
     90       },
     91       "human_evaluation": {
     92         "applies": true,
     93         "answer": false,
     94         "justification": "No human evaluation is conducted. All evaluations are automated metrics. For a system claiming to be a 'code assistant,' human evaluation of output quality or utility would be relevant but is absent."
     95       },
     96       "held_out_test_set": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "All datasets have explicit train/dev/test splits shown in Table 2 and Table 4. Results are reported on test sets. The paper also states it excludes target code/documentation from retrieval databases to prevent information leakage (Section 4.1)."
    100       },
    101       "per_category_breakdown": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "Results are broken down by task (code generation, summarization, completion), by dataset (Concode, CgCSN, CsCSN, PY150, JavaCorpus, HumanEval, MBPP), and by programming language (Python, Java, Ruby, JavaScript, Go, PHP) in Tables 5-10."
    105       },
    106       "failure_cases_discussed": {
    107         "applies": true,
    108         "answer": true,
    109         "justification": "Section 5.1 discusses that CONAN does not outperform ReACC on code completion and explains why (different backbone architectures). Section 5.3 and Figure 5 analyze when retrieved documents help vs. hurt (pred==gold vs pred!=gold groups)."
    110       },
    111       "negative_results_reported": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "The paper reports that CONAN underperforms ReACC on code completion (Table 7), that the dual-view method shows 'less effectiveness in the code summarization task' (Section 5.2), and that MEP alone 'shows almost the same performance as the baseline model' (Section 5.4)."
    115       }
    116     },
    117     "claims_and_evidence": {
    118       "abstract_claims_supported": {
    119         "applies": true,
    120         "answer": true,
    121         "justification": "The abstract claims CONAN 'achieves convincing performance on different code generation tasks and significantly outperforms previous retrieval augmented code generation models,' which is supported by Tables 5-8 showing improvements over REDCODER-EXT and other baselines across most tasks."
    122       },
    123       "causal_claims_justified": {
    124         "applies": true,
    125         "answer": true,
    126         "justification": "The ablation studies (Tables 9, 11) use controlled single-variable manipulation to support causal claims about component contributions (e.g., removing RAG, FID, or Dual-View individually). Language like 'the improvements mainly derive from' is supported by these ablations."
    127       },
    128       "generalization_bounded": {
    129         "applies": true,
    130         "answer": false,
    131         "justification": "The title claims to build 'A Coding Assistant' generally, and the abstract claims it 'aims to build a code assistant by mimicking the knowledge-seeking behaviors of humans during coding.' However, experiments are limited to Python and Java on specific benchmarks. No testing on other languages like C++, TypeScript, or Rust, and no real-world deployment evaluation. The paper does not bound these generalization limits."
    132       },
    133       "alternative_explanations_discussed": {
    134         "applies": true,
    135         "answer": false,
    136         "justification": "The paper does not discuss alternative explanations for its results. For example, the improvements could stem from the larger effective context window rather than the dual-view representation, or from data leakage between CodeSearchNet-derived training and test sets. No such alternatives are considered."
    137       }
    138     },
    139     "setup_transparency": {
    140       "model_versions_specified": {
    141         "applies": true,
    142         "answer": true,
    143         "justification": "Specific model names with size are given: CodeT5-base as backbone, Deepseek-Coder-6.7b-Instruct, CodeQwen1.5-7B-Chat (Section 4.3-4.4). These are specific enough to identify the exact models."
    144       },
    145       "prompts_provided": {
    146         "applies": true,
    147         "answer": false,
    148         "justification": "When using LLMs (DeepSeek-Coder, CodeQwen), the paper provides no actual prompt text. It describes the process in equations (Eq. 14: concatenating generated knowledge with query) but does not show the actual prompt templates or system instructions used."
    149       },
    150       "hyperparameters_reported": {
    151         "applies": true,
    152         "answer": true,
    153         "justification": "Section 4.4 reports learning rates (1e-4, 2e-5, 1e-5, 5e-5), batch sizes (128, 64, 1), epochs (10, 12, 1), warmup settings (0.1 proportion, 1000 steps), optimizers (Adam, AdamW), and for LLM inference: temperature 0.2 and max generation length 512 tokens."
    154       },
    155       "scaffolding_described": {
    156         "applies": true,
    157         "answer": true,
    158         "justification": "The retrieval-augmented pipeline is described in detail in Section 3: CONAN-R retrieves top-N documents, CONAN-G uses FID architecture to encode them, and the dual-view representation concatenates code documentation with code snippets. Section 3.4 describes how CONAN assists LLMs. The architecture is illustrated in Figure 2."
    159       },
    160       "data_preprocessing_documented": {
    161         "applies": true,
    162         "answer": true,
    163         "justification": "Data preprocessing is documented: CodeSearchNet filtering for CgCSN/CsCSN (Section 4.1), AST parsing to remove unparseable examples, entity identification using BytesIO and tree_sitter (Section 4.1), retrieval database construction with deduplication and exclusion of target answers (Section 4.1)."
    164       }
    165     },
    166     "limitations_and_scope": {
    167       "limitations_section_present": {
    168         "applies": true,
    169         "answer": false,
    170         "justification": "There is no dedicated limitations or threats-to-validity section in the paper. The paper goes directly from evaluation results and case studies (Section 5) to Conclusion (Section 6) with no limitations discussion."
    171       },
    172       "threats_to_validity_specific": {
    173         "applies": true,
    174         "answer": false,
    175         "justification": "No threats to validity are discussed anywhere in the paper, neither specific nor generic."
    176       },
    177       "scope_boundaries_stated": {
    178         "applies": true,
    179         "answer": false,
    180         "justification": "No explicit scope boundaries are stated. The paper does not discuss what the results do NOT show, what settings were not tested, or what claims the authors are not making."
    181       }
    182     },
    183     "data_integrity": {
    184       "raw_data_available": {
    185         "applies": true,
    186         "answer": true,
    187         "justification": "The datasets used (CodeSearchNet, Concode, HumanEval, MBPP, PY150, JavaCorpus) are all publicly available and independently verifiable. The GitHub repository is provided for code. Raw experimental data (individual predictions) are not released, but the input data is fully public."
    188       },
    189       "data_collection_described": {
    190         "applies": true,
    191         "answer": true,
    192         "justification": "Section 4.1 describes the data sources, construction of retrieval databases, filtering procedures, and dataset statistics (Tables 1-4). Pretraining data construction from CodeSearchNet is described with examples (Figure 3)."
    193       },
    194       "recruitment_methods_described": {
    195         "applies": false,
    196         "answer": false,
    197         "justification": "No human participants are involved. All data comes from standard public benchmarks (CodeSearchNet, HumanEval, MBPP, etc.)."
    198       },
    199       "data_pipeline_documented": {
    200         "applies": true,
    201         "answer": true,
    202         "justification": "The data pipeline is documented: raw CodeSearchNet → filtering → code-documentation pair construction → entity identification → masking (Section 4.1, Figure 3). Retrieval database construction steps with size counts are provided in Table 3."
    203       }
    204     },
    205     "conflicts_of_interest": {
    206       "funding_disclosed": {
    207         "applies": true,
    208         "answer": true,
    209         "justification": "The Acknowledgments section lists funding: 'Natural Science Foundation of China under Grant (No. 92267201, No. 62206042 and No. U23B2019), the Joint Funds of Natural Science Foundation of Liaoning Province (No. 2023-MSBA-081), and the Fundamental Research Funds for the Central Universities (No. N2416012).'"
    210       },
    211       "affiliations_disclosed": {
    212         "applies": true,
    213         "answer": true,
    214         "justification": "Author affiliations are clearly listed: Northeastern University (China), Tsinghua University, and Chinese Academy of Sciences. The paper evaluates their own proposed system, not a commercial product, so there is no undisclosed corporate conflict."
    215       },
    216       "funder_independent_of_outcome": {
    217         "applies": true,
    218         "answer": true,
    219         "justification": "The funders are Chinese government science foundations (NSFC, Liaoning Province, Central Universities). These are independent research grants with no financial stake in the outcome of comparing code retrieval models."
    220       },
    221       "financial_interests_declared": {
    222         "applies": true,
    223         "answer": false,
    224         "justification": "No competing interests statement or financial interests declaration is present in the paper."
    225       }
    226     },
    227     "contamination": {
    228       "training_cutoff_stated": {
    229         "applies": true,
    230         "answer": false,
    231         "justification": "The paper uses DeepSeek-Coder and CodeQwen1.5 LLMs for evaluation on HumanEval and MBPP but does not state the training data cutoff dates for these models."
    232       },
    233       "train_test_overlap_discussed": {
    234         "applies": true,
    235         "answer": false,
    236         "justification": "The paper excludes target answers from retrieval databases to prevent information leakage from the RAG pipeline, but does not discuss whether the LLMs (DeepSeek-Coder, CodeQwen) may have seen HumanEval or MBPP test cases during pretraining."
    237       },
    238       "benchmark_contamination_addressed": {
    239         "applies": true,
    240         "answer": false,
    241         "justification": "HumanEval (published 2021) and MBPP (published 2021) are widely available online. DeepSeek-Coder and CodeQwen were trained after 2021 and may have seen these benchmarks. The paper does not address this contamination risk."
    242       }
    243     },
    244     "human_studies": {
    245       "pre_registered": {
    246         "applies": false,
    247         "answer": false,
    248         "justification": "No human participants are involved in this study. All evaluation is automated on benchmark datasets."
    249       },
    250       "irb_or_ethics_approval": {
    251         "applies": false,
    252         "answer": false,
    253         "justification": "No human participants are involved in this study."
    254       },
    255       "demographics_reported": {
    256         "applies": false,
    257         "answer": false,
    258         "justification": "No human participants are involved in this study."
    259       },
    260       "inclusion_exclusion_criteria": {
    261         "applies": false,
    262         "answer": false,
    263         "justification": "No human participants are involved in this study."
    264       },
    265       "randomization_described": {
    266         "applies": false,
    267         "answer": false,
    268         "justification": "No human participants are involved in this study."
    269       },
    270       "blinding_described": {
    271         "applies": false,
    272         "answer": false,
    273         "justification": "No human participants are involved in this study."
    274       },
    275       "attrition_reported": {
    276         "applies": false,
    277         "answer": false,
    278         "justification": "No human participants are involved in this study."
    279       }
    280     },
    281     "cost_and_practicality": {
    282       "inference_cost_reported": {
    283         "applies": true,
    284         "answer": false,
    285         "justification": "No inference cost, latency, or per-example cost is reported. The system involves retrieval + generation + LLM inference but the practical cost of this pipeline is not quantified."
    286       },
    287       "compute_budget_stated": {
    288         "applies": true,
    289         "answer": false,
    290         "justification": "No GPU hours, training time, or total computational budget is stated. Section 4.4 mentions a GPU memory limitation affecting batch size but does not quantify the overall compute."
    291       }
    292     }
    293   },
    294   "claims": [
    295     {
    296       "claim": "CONAN outperforms previous retrieval-augmented code generation models (REDCODER-EXT) with approximately 3.1% average BLEU improvement on CsCSN and 0.6% on Concode datasets.",
    297       "evidence": "Table 5 shows CONAN achieving BLEU scores of 23.5/26.5 on CsCSN Python/Java summarization (vs REDCODER-EXT 20.9/22.9) and 42.8 BLEU on Concode code generation (vs REDCODER-EXT 42.5). Section 5.1.",
    298       "supported": "strong"
    299     },
    300     {
    301       "claim": "CONAN achieves more significant improvements on CgCSN (approximately 7%) than on Concode (0.6%), demonstrating ability to handle longer code generation.",
    302       "evidence": "Table 6 shows CONAN achieving 32.9/37.7 BLEU on CgCSN Python/Java vs REDCODER-EXT 24.4/29.0, a substantial improvement. Average code length is 98 for CgCSN vs 27 for Concode (Table 2). Section 5.1.",
    303       "supported": "strong"
    304     },
    305     {
    306       "claim": "CONAN can serve as an effective assistant for LLMs, achieving approximately 10% improvement in EM when used with DeepSeek-Coder and CodeQwen.",
    307       "evidence": "Table 5 shows DSCoder-6.7b-Ins going from 0 EM to 24.2 EM with CONAN on Concode, and CQwen1.5-7B-Chat going from 0 to 24.2 EM. Section 5.1.",
    308       "supported": "strong"
    309     },
    310     {
    311       "claim": "The improvements of CONAN mainly derive from external retrieved knowledge, with FID architecture providing additional 3% improvement and dual-view representation providing 1.85% improvement on code generation.",
    312       "evidence": "Table 9 ablation study shows w/o RAG → w/o FID gives ~7.6% improvement, w/o FID → CONAN gives ~3% improvement, and w/o Dual-View → CONAN gives ~1.85% on code generation. Section 5.2.",
    313       "supported": "strong"
    314     },
    315     {
    316       "claim": "CONAN-R achieves state-of-the-art zero-shot code retrieval, outperforming CodeRetriever by about 2% on CodeSearch and 14% on Adv.",
    317       "evidence": "Table 10 shows CONAN-R zero-shot achieving 70.9 overall MRR on CodeSearch (vs CodeRetriever 69.1) and 46.1 MRR on Adv (vs CodeRetriever 34.7). The Adv improvement is actually ~11.4 absolute points. Section 5.4.",
    318       "supported": "moderate"
    319     },
    320     {
    321       "claim": "Code-Documentation Alignment (CDA) is the primary driver of retrieval improvements, while Masked Entity Prediction (MEP) alone shows little improvement.",
    322       "evidence": "Table 11 shows MEP alone has virtually no zero-shot improvement over vanilla CodeT5 (0.03 vs 0.03), while CDA alone jumps to 45.01. Adding MEP to CDA further improves from 45.01 to 46.08. Section 5.4.",
    323       "supported": "strong"
    324     }
    325   ],
    326   "methodology_tags": [
    327     "benchmark-eval"
    328   ],
    329   "key_findings": "CONAN, a retrieval-augmented code assistant combining a structure-aware retriever (CONAN-R) and a dual-view code representation generator (CONAN-G), outperforms prior RAG-based code generation models on code generation, summarization, and completion benchmarks. The system's improvements primarily derive from incorporating external retrieved code knowledge via the FID architecture, with the code-documentation alignment pretraining being the most impactful component. When used as an assistant for LLMs like DeepSeek-Coder and CodeQwen, CONAN improves their performance by filtering and denoising retrieved code documents into shorter, higher-quality inputs. However, CONAN underperforms specialized decoder-only models (ReACC) on code completion tasks, suggesting the T5-based architecture has limitations for autoregressive completion.",
    330   "red_flags": [
    331     {
    332       "flag": "No statistical significance testing",
    333       "detail": "The paper makes numerous claims about one method outperforming another based solely on comparing point estimates without any significance tests. Given the absence of variance reporting, it is impossible to know whether the reported differences are reliable."
    334     },
    335     {
    336       "flag": "No variance or multi-run reporting",
    337       "detail": "All results appear to be from single experimental runs. Without multiple runs or seed variation analysis, the reported numbers could be artifacts of random initialization or data ordering."
    338     },
    339     {
    340       "flag": "No limitations section",
    341       "detail": "The paper has no dedicated limitations or threats-to-validity discussion. For a system paper proposing a 'code assistant,' this omission is significant since there is no discussion of when the approach fails, what languages/domains it does not cover, or what the practical deployment challenges are."
    342     },
    343     {
    344       "flag": "Benchmark contamination risk with LLMs",
    345       "detail": "HumanEval and MBPP are widely available online. The paper evaluates DeepSeek-Coder and CodeQwen on these benchmarks without discussing whether these models may have seen the test cases during pretraining, which would inflate the LLM baseline and potentially the CONAN-augmented scores."
    346     },
    347     {
    348       "flag": "Overly broad claims relative to evaluation scope",
    349       "detail": "The paper titles itself a 'Coding Assistant' but only evaluates on Python and Java with specific benchmark datasets. No real-world deployment, no multi-language generalization beyond 6 languages for retrieval only, and no user studies are conducted."
    350     }
    351   ],
    352   "cited_papers": [
    353     {
    354       "title": "Evaluating Large Language Models Trained on Code",
    355       "authors": ["Mark Chen", "Jerry Tworek"],
    356       "year": 2021,
    357       "relevance": "Introduces HumanEval benchmark used for evaluating code generation capabilities of LLMs."
    358     },
    359     {
    360       "title": "Retrieval Augmented Code Generation and Summarization",
    361       "authors": ["Md Rizwan Parvez", "Wasi Ahmad", "Saikat Chakraborty", "Baishakhi Ray", "Kai-Wei Chang"],
    362       "year": 2021,
    363       "relevance": "Proposes REDCODER framework for retrieval-augmented code generation, a key baseline for RAG-based code systems."
    364     },
    365     {
    366       "title": "ReACC: A Retrieval-Augmented Code Completion Framework",
    367       "authors": ["Shuai Lu", "Nan Duan", "Hojae Han"],
    368       "year": 2022,
    369       "relevance": "Presents a retrieval-augmented code completion framework combining sparse and dense retrieval with autoregressive generation."
    370     },
    371     {
    372       "title": "CodeT5: Identifier-aware Unified Pre-trained Encoder-Decoder Models for Code Understanding and Generation",
    373       "authors": ["Yue Wang", "Weishi Wang", "Shafiq Joty", "Steven C.H. Hoi"],
    374       "year": 2021,
    375       "relevance": "Foundational code-oriented pretrained language model used as the backbone for CONAN, central to code generation research."
    376     },
    377     {
    378       "title": "CodeBERT: A Pre-Trained Model for Programming and Natural Languages",
    379       "authors": ["Zhangyin Feng", "Daya Guo", "Duyu Tang"],
    380       "year": 2020,
    381       "relevance": "Seminal pretrained model for code, widely used as a baseline in code generation and retrieval research."
    382     },
    383     {
    384       "title": "DeepSeek-Coder: When the Large Language Model Meets Programming – The Rise of Code Intelligence",
    385       "authors": ["Daya Guo", "Qihao Zhu"],
    386       "year": 2024,
    387       "relevance": "Recent code LLM used as a baseline for evaluating CONAN's LLM-assistant capabilities."
    388     },
    389     {
    390       "title": "Docprompting: Generating code by retrieving the docs",
    391       "authors": ["Shuyan Zhou", "Uri Alon", "Frank F Xu", "Zhengbao Jiang", "Graham Neubig"],
    392       "year": 2022,
    393       "relevance": "Uses documentation retrieval for code generation with FID architecture, directly related to retrieval-augmented code generation approaches."
    394     },
    395     {
    396       "title": "RepoFusion: Training Code Models to Understand Your Repository",
    397       "authors": ["Disha Shrivastava", "Denis Kocetkov", "Harm de Vries", "Dzmitry Bahdanau", "Torsten Scholak"],
    398       "year": 2023,
    399       "relevance": "Repository-level code understanding using retrieval augmentation, relevant to contextual code generation research."
    400     },
    401     {
    402       "title": "CodeRetriever: A Large Scale Contrastive Pre-Training Method for Code Search",
    403       "authors": ["Xiaonan Li", "Yeyun Gong"],
    404       "year": 2022,
    405       "relevance": "State-of-the-art code retrieval model using contrastive pre-training, key baseline for evaluating code search effectiveness."
    406     },
    407     {
    408       "title": "Program Synthesis with Large Language Models",
    409       "authors": ["Jacob Austin", "Augustus Odena", "Maxwell Nye"],
    410       "year": 2021,
    411       "relevance": "Introduces MBPP benchmark for evaluating program synthesis capabilities of language models."
    412     },
    413     {
    414       "title": "Leveraging Passage Retrieval with Generative Models for Open Domain Question Answering",
    415       "authors": ["Gautier Izacard", "Edouard Grave"],
    416       "year": 2021,
    417       "relevance": "Proposes the Fusion-in-Decoder (FID) architecture used as the generation backbone in CONAN-G."
    418     },
    419     {
    420       "title": "GraphCodeBERT: Pre-training Code Representations with Data Flow",
    421       "authors": ["Daya Guo", "Shuo Ren", "Shuai Lu"],
    422       "year": 2021,
    423       "relevance": "Code representation model incorporating data flow graphs, used as a baseline and as the foundation for CodeRetriever."
    424     }
    425   ]
    426 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs