scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (19344B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "survey",
      4   "paper": {
      5     "title": "A Literature Review on AI-Powered Smart Code Base Navigator",
      6     "authors": [
      7       "Sukanya G",
      8       "Radhika S K",
      9       "Rashmitha R",
     10       "Sanjana N",
     11       "Shanthala M N"
     12     ],
     13     "year": 2025,
     14     "venue": "International Journal of Scientific Research in Engineering and Management",
     15     "arxiv_id": null,
     16     "doi": "10.55041/ijsrem52774"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "The abstract describes an 'AI-Powered Smart Code Base Navigator' system with features like semantic search and intelligent suggestions, but the paper is a literature review summarizing existing tools—not a novel system or evaluation of one. Abstract reads as a system paper, not a survey paper.",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "The abstract claims the system 'boosts developer productivity' but no study design justifies this causal claim. The paper cites papers that make productivity claims, but doesn't evaluate or validate them itself.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "Scope is vaguely framed as 'contemporary software development' and 'extensive Python codebases' with no explicit boundaries on years, domains, or applicability limits. The 2019–2025 time span is implied but never stated as review scope.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "The review narrates existing approaches (RAG vs. AST-based, semantic vs. lexical retrieval) but does not discuss competing explanations or critically compare why different methods succeed or fail in different contexts.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": false,
     47         "justification": "Papers are summarized with metrics (e.g., BLEU scores, MRR, accuracy) without distinguishing whether these proxies (search rank, token match) actually correlate with developer productivity or code quality. No discussion of this gap.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "No dedicated limitations section exists. A brief 'Conclusions' section mentions 'future opportunities' but never discusses limitations of the review itself, such as search bias, publication bias, or methodological constraints.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "No discussion of threats to validity. The review does not address selection bias, publication bias, quality variance among cited papers, or limitations of relying on author summaries rather than full-text analysis.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "Scope boundaries are not explicit. No statement of why 2019–2025 was chosen, why only these 20 papers, or what populations/domains/languages are in vs. out of scope.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": false,
     75         "justification": "No funding statement provided. Likely unfunded undergraduate coursework, but this is not explicitly stated.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "All authors are affiliated with JNNCE, Shivamogga, Karnataka, India. No apparent conflict with any evaluated tools or vendors.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No funder identified; treated as unfunded work.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests or financial interests statement provided.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": false,
    101         "justification": "Key terms are not formally defined: 'AI-Powered Smart Code Base Navigator', 'semantic code search', 'context-aware code completion', 'retrieval-augmented generation' are used without precise definitions. Assumed jargon understanding is required.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": false,
    107         "justification": "The intended contribution is ambiguous. Abstract reads as a system paper proposing a novel tool, but the actual contribution—surveying existing methods, synthesizing findings, identifying gaps—is never explicitly stated. This creates confusion about what the paper claims to add.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": false,
    113         "justification": "The 'Literature Survey' section lists 20 papers (A–T) with individual summaries and a pros/cons table, but does not engage with them—no critical synthesis, no discussion of how this work fits into the broader landscape, no identification of emerging themes or unresolved problems.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "survey": {
    120       "search_and_selection": {
    121         "search_strategy_reproducible": {
    122           "applies": true,
    123           "answer": false,
    124           "justification": "No search strategy is documented. No mention of databases searched, search engines used, search terms, or how papers were identified. Someone cannot reproduce this review.",
    125           "source": "haiku"
    126         },
    127         "inclusion_exclusion_explicit": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "No explicit inclusion/exclusion criteria stated. Unclear why these 20 papers were selected and others excluded.",
    131           "source": "haiku"
    132         },
    133         "prisma_or_structured_protocol": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "No mention of PRISMA, COCHRANE, or any structured review protocol. This is an unstructured narrative review with no adherence to review standards.",
    137           "source": "haiku"
    138         },
    139         "search_terms_provided": {
    140           "applies": true,
    141           "answer": false,
    142           "justification": "No search terms or queries provided. The paper gives no transparency into how the initial paper pool was identified.",
    143           "source": "haiku"
    144         },
    145         "databases_listed": {
    146           "applies": true,
    147           "answer": false,
    148           "justification": "No databases explicitly listed (e.g., IEEE Xplore, ACM Digital Library, arXiv, etc.). Sources are mentioned only as citations.",
    149           "source": "haiku"
    150         },
    151         "screening_process_documented": {
    152           "applies": true,
    153           "answer": false,
    154           "justification": "No documented screening process. No reporting of initial yield, abstract screening, full-text screening, inter-rater agreement, or selection flow diagram.",
    155           "source": "haiku"
    156         },
    157         "review_scope_justified": {
    158           "applies": true,
    159           "answer": false,
    160           "justification": "Review scope is not justified. Why 2019–2025? Why code search/generation? Why Python focus? Why only 20 papers? No rationale provided for scope choices.",
    161           "source": "haiku"
    162         }
    163       },
    164       "synthesis_quality": {
    165         "conflicting_findings_acknowledged": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "Conflicting findings across papers (e.g., when one paper argues transformer models are sufficient and another argues RAG is necessary) are not identified or synthesized. Each paper is summarized independently with no comparative analysis.",
    169           "source": "haiku"
    170         },
    171         "quality_assessment_of_sources": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "No methodological quality assessment of cited papers. The 'Remarks' column notes trade-offs (pros/cons) of each method but does not evaluate methodological rigor, sample size, generalizability, or risk of bias.",
    175           "source": "haiku"
    176         },
    177         "publication_bias_discussed": {
    178           "applies": true,
    179           "answer": false,
    180           "justification": "No discussion of publication bias. All cited papers appear to be published (no null results, no negative findings). No acknowledgment that successful systems are overrepresented in the literature.",
    181           "source": "haiku"
    182         },
    183         "quantitative_synthesis_present": {
    184           "applies": true,
    185           "answer": false,
    186           "justification": "No quantitative synthesis. No meta-analysis, vote counting, effect-size aggregation, or data pooling. Purely narrative summary of individual papers with no statistical integration.",
    187           "source": "haiku"
    188         },
    189         "recommendations_supported_by_evidence": {
    190           "applies": true,
    191           "answer": false,
    192           "justification": "Conclusions recommend 'incorporating domain-specific knowledge', 'expanding support across languages', and 'improving scalability' but these are not supported by systematic evidence synthesis from the reviewed papers—they read as author opinion.",
    193           "source": "haiku"
    194         }
    195       }
    196     }
    197   },
    198   "claims": [
    199     {
    200       "claim": "Semantic code search improves code retrieval accuracy over keyword-based methods",
    201       "evidence": "Multiple cited papers (SENCS [6], PSCS [17], JessCS [14], REINFOREST [9]) demonstrate neural semantic search outperforms lexical baselines on standard benchmarks",
    202       "supported": "moderate"
    203     },
    204     {
    205       "claim": "Retrieval-augmented generation (RAG) improves code completion accuracy significantly",
    206       "evidence": "Papers [5] (WeChat RAG study) and [11] show BLEU/execution improvements of ~50–130%, though [5] cautions results may not generalize beyond their codebase",
    207       "supported": "moderate"
    208     },
    209     {
    210       "claim": "Large language models understand code syntax better than semantics or runtime behavior",
    211       "evidence": "Paper [13] explicitly tests GPT-4, GPT-3.5, StarCoder, CodeLlama on syntax vs. semantic tasks and finds LLMs frequently hallucinate semantic behavior",
    212       "supported": "moderate"
    213     },
    214     {
    215       "claim": "Transformer-based code models (CodeBERT, GraphCodeBERT) set state-of-the-art on code search and generation tasks",
    216       "evidence": "Paper [18] introduced CodeBERT with SOTA results; Papers [6], [17] cite improvements over prior deep-learning baselines",
    217       "supported": "strong"
    218     },
    219     {
    220       "claim": "Incorporating code structure (AST paths) into neural models improves semantic code search",
    221       "evidence": "PSCS [17] and SENCS [6] both show ablation studies where structural features (AST, dependency graphs) significantly improve ranking metrics",
    222       "supported": "strong"
    223     },
    224     {
    225       "claim": "AI-based code analysis tools improve developer productivity",
    226       "evidence": "Abstract claims this; individual papers cite improvements in completion latency [12], search accuracy [6], [17], or educational feedback [7], but no paper measures end-to-end productivity change",
    227       "supported": "weak"
    228     }
    229   ],
    230   "methodology_tags": [
    231     "meta-analysis"
    232   ],
    233   "key_findings": "This narrative literature review surveys 20 papers on AI-driven code search and generation (2019–2025), covering approaches including transformer models (CodeBERT), retrieval-augmented generation (RAG), neural code search (SENCS, PSCS), and LLM-based code analysis. The papers collectively show semantic embeddings outperform lexical search on standard benchmarks, RAG improves code completion accuracy (50–130% gains reported), and structural information (AST paths, syntax trees) enhances semantic matching. However, the review identifies a critical limitation: LLMs understand code syntax well but frequently fail at semantic and runtime reasoning, and most systems are evaluated on proxy metrics (search rank, BLEU scores) rather than end-to-end developer productivity. The paper does not synthesize conflicting findings or discuss publication bias toward published successful systems.",
    234   "red_flags": [
    235     {
    236       "flag": "Misleading abstract",
    237       "detail": "Abstract describes an 'AI-Powered Smart Code Base Navigator' system with implemented features (semantic search, code completion, jump-to-definition), but the paper is a literature review with no novel tool or system described or evaluated. Reads as a system/tool proposal paper, not a survey."
    238     },
    239     {
    240       "flag": "No documented review methodology",
    241       "detail": "No search strategy, databases, search terms, inclusion/exclusion criteria, or screening process documented. Review is not reproducible. This violates PRISMA standards for systematic reviews."
    242     },
    243     {
    244       "flag": "Unstructured narrative review presented as comprehensive survey",
    245       "detail": "Purely narrative summaries of 20 papers with no critical synthesis, thematic analysis, or evidence aggregation. No meta-analysis, vote counting, or effect-size pooling despite claiming to review the field."
    246     },
    247     {
    248       "flag": "No quality assessment of reviewed papers",
    249       "detail": "No methodological quality rubric, risk-of-bias assessment, or evaluation of generalizability. The table lists pros/cons of each method, not methodological rigor of the papers themselves."
    250     },
    251     {
    252       "flag": "No discussion of limitations or scope boundaries",
    253       "detail": "No limitations section. Scope (why 2019–2025? why these 20 papers? what domains?) is never justified. No discussion of publication bias, selection bias, or threats to validity."
    254     },
    255     {
    256       "flag": "Vague intended contribution",
    257       "detail": "Contribution is unclear—is this surveying the field, proposing a framework, identifying gaps, or something else? No explicit statement of what the review adds beyond listing papers."
    258     },
    259     {
    260       "flag": "Key concepts undefined",
    261       "detail": "Terms like 'semantic search', 'context-aware completion', 'retrieval-augmented generation' are used throughout without formal definitions, assuming reader expertise."
    262     },
    263     {
    264       "flag": "No engagement with conflicting findings",
    265       "detail": "Papers that disagree (e.g., on whether model complexity matters [20] vs. sufficiency of simple retrieval [11]) are not acknowledged or synthesized. Each paper summarized in isolation."
    266     },
    267     {
    268       "flag": "Productivity claims unsupported",
    269       "detail": "Abstract and conclusions claim the system/methods 'boost developer productivity' but this is never measured or validated. Individual papers cite proxy metrics (search rank, BLEU, latency) but not end-to-end productivity gains."
    270     },
    271     {
    272       "flag": "Authored by undergraduate students without systematic training",
    273       "detail": "Lead author is an Assistant Professor; co-authors listed as 'UG Students'. No evidence of formal training in systematic review methodology (PRISMA, COCHRANE) or librarian consultation."
    274     }
    275   ],
    276   "cited_papers": [
    277     {
    278       "title": "CodeBERT: A Pre-Trained Model for Programming and Natural Languages",
    279       "relevance": "Foundational pre-trained model for code-NL tasks; benchmarked on code search and generation—central to survey's scope"
    280     },
    281     {
    282       "title": "Code Search Is All You Need: Improving Code Suggestions with Code Search",
    283       "relevance": "Core finding that retrieval-augmented code generation dramatically improves completion (50–130% BLEU gains); directly relevant to survey topic"
    284     },
    285     {
    286       "title": "Meta-RAG on Large Codebases Using Code Summarization",
    287       "relevance": "Multi-agent retrieval framework achieving SOTA bug-localization on SWE-bench; demonstrates scalability of RAG to industrial code"
    288     },
    289     {
    290       "title": "Deep Semantics-Enhanced Neural Code Search",
    291       "relevance": "Neural code search using dependency graphs and attention; shows structural information improves semantic matching"
    292     },
    293     {
    294       "title": "EVOR: Evolving Retrieval for Code Generation",
    295       "relevance": "Dynamic retrieval-augmented generation adapting to evolving APIs; demonstrates adaptive approaches to code generation"
    296     },
    297     {
    298       "title": "REINFOREST: Reinforcing Semantic Code Similarity for Cross-Lingual Code Search Models",
    299       "relevance": "Cross-language code search using runtime information; shows contrastive learning improves code similarity embeddings"
    300     },
    301     {
    302       "title": "LLMs: Understanding Code Syntax and Semantics for Code Analysis",
    303       "relevance": "Critical limitation: GPT-4, StarCoder, CodeLlama understand syntax but fail at semantic reasoning—key finding on LLM weaknesses"
    304     },
    305     {
    306       "title": "When Deep Learning Met Code Search",
    307       "relevance": "Systematic comparison showing simple supervised models outperform complex RNNs on code search—challenges model complexity assumption"
    308     }
    309   ],
    310   "engagement_factors": {
    311     "practical_relevance": {
    312       "score": 2,
    313       "justification": "Discusses tools and methods practitioners could use (CodeBERT, RAG, neural search), but is a narrative review with no actionable evaluation, comparison, or guidance on which approaches to choose for specific scenarios."
    314     },
    315     "surprise_contrarian": {
    316       "score": 0,
    317       "justification": "No surprising or contrarian findings. Straightforward summary of existing work without challenging conventional wisdom or identifying unexpected patterns in the literature."
    318     },
    319     "fear_safety": {
    320       "score": 0,
    321       "justification": "No discussion of security risks, generated code correctness, hallucinations, or AI safety concerns despite citing paper [13] which documents LLM semantic failures."
    322     },
    323     "drama_conflict": {
    324       "score": 0,
    325       "justification": "No dramatic findings, controversies, or conflicts acknowledged. Papers that disagree are not identified as conflicting; no debates highlighted."
    326     },
    327     "demo_ability": {
    328       "score": 1,
    329       "justification": "Review mentions tools and systems but provides no hands-on demonstrations, benchmarks someone could run, or concrete how-to guidance. Mostly descriptive without interactive elements."
    330     },
    331     "brand_recognition": {
    332       "score": 2,
    333       "justification": "Discusses well-known models (CodeBERT, GPT-4, CodeLlama, GraphCodeBERT) and systems from major organizations (WeChat, academic labs), but this is incidental to the review content."
    334     }
    335   },
    336   "hn_data": {
    337     "threads": [],
    338     "top_points": 0,
    339     "total_points": 0,
    340     "total_comments": 0
    341   }
    342 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs