scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (27060B)
      1 {
      2   "paper": {
      3     "title": "DeputyDev - AI Powered Developer Assistant: Breaking the Code Review Logjam through Contextual AI to Boost Developer Productivity",
      4     "authors": [
      5       "Vishal Khare",
      6       "Vijay Saini",
      7       "Deepak Sharma",
      8       "Anand Raj",
      9       "Ankit Rana",
     10       "Anshul Yadav"
     11     ],
     12     "year": 2025,
     13     "venue": "arXiv (preprint, under review)",
     14     "arxiv_id": "2508.09676",
     15     "doi": "10.48550/arXiv.2508.09676"
     16   },
     17   "scan_version": 3,
     18   "active_modules": [],
     19   "methodology_tags": ["rct", "case-study"],
     20   "key_findings": "DeputyDev, an AI code review assistant using multi-agent LLM architecture with AST-based context retrieval, was evaluated in a 30-day double-controlled A/B experiment at TATA 1mg with ~721 PRs across 3 groups. The test group showed reductions of up to 28.8% in average review time, 42.2% per LOC, and 47.5% in median review time compared to controls. DeputyDev was most effective on smaller PRs (0-50 LOC: ~42% reduction) but showed mixed or negative results on medium and extra-large PRs.",
     21   "checklist": {
     22     "artifacts": {
     23       "code_released": {
     24         "applies": true,
     25         "answer": false,
     26         "justification": "No source code or experiment scripts are released. DeputyDev is a commercial SaaS product (deputydev.ai). No repository URL is provided for the tool or the experimental analysis code."
     27       },
     28       "data_released": {
     29         "applies": true,
     30         "answer": false,
     31         "justification": "The experiment data (PR review times, telemetry data) is proprietary to TATA 1mg and is not released. No dataset download or supplementary data is provided."
     32       },
     33       "environment_specified": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "No environment specifications, dependency lists, or setup instructions are provided. The paper does not mention any reproducibility artifacts."
     37       },
     38       "reproduction_instructions": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "No reproduction instructions are provided. The experiment relies on internal TATA 1mg infrastructure and proprietary telemetry, making reproduction by external researchers impossible without detailed instructions."
     42       }
     43     },
     44     "statistical_methodology": {
     45       "confidence_intervals_or_error_bars": {
     46         "applies": true,
     47         "answer": false,
     48         "justification": "Only point estimates are reported (averages, medians, percentage changes in Table 2 and Table 3). No confidence intervals, error bars, or uncertainty measures are provided for any result."
     49       },
     50       "significance_tests": {
     51         "applies": true,
     52         "answer": false,
     53         "justification": "The abstract claims 'statistically significant reduction' but no statistical significance test is reported anywhere in the paper — no p-values, t-tests, Mann-Whitney U, or any test statistic. The claim of statistical significance is entirely unsupported."
     54       },
     55       "effect_sizes_reported": {
     56         "applies": true,
     57         "answer": true,
     58         "justification": "Table 2 reports percentage changes with baseline context: e.g., average review time went from 239.57/278.14 hrs (controls) to 197.97 hrs (test), yielding -17.36%/-28.82%. Per-LOC and median reductions are similarly contextualized. Table 3 breaks this down by PR size category."
     59       },
     60       "sample_size_justified": {
     61         "applies": true,
     62         "answer": false,
     63         "justification": "No justification for the sample size is given. The experiment yielded ~240 PRs per group (721 total after filtering) but no power analysis or sample size rationale is discussed."
     64       },
     65       "variance_reported": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "No standard deviations, interquartile ranges, or variance measures are reported for any metric. Only averages and medians are provided in Table 2."
     69       }
     70     },
     71     "evaluation_design": {
     72       "baselines_included": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "Two control sets (ControlSet1 and ControlSet2) serve as baselines, with 33% allocation each alongside the test set. Results are compared against both controls in Tables 2 and 3."
     76       },
     77       "baselines_contemporary": {
     78         "applies": true,
     79         "answer": true,
     80         "justification": "The two control groups are contemporaneous — they are from the same 30-day period (July 27 - August 27, 2024) as the test group, ensuring temporal validity."
     81       },
     82       "ablation_study": {
     83         "applies": true,
     84         "answer": false,
     85         "justification": "DeputyDev has multiple components (6 agents, reflection, blending engine, AST-based context retrieval) but no ablation study tests which components contribute to the observed improvements. The paper does not isolate the effect of any individual component."
     86       },
     87       "multiple_metrics": {
     88         "applies": true,
     89         "answer": true,
     90         "justification": "Three metrics are reported: average review time per PR, average review time per LOC, and median review time (Table 2)."
     91       },
     92       "human_evaluation": {
     93         "applies": true,
     94         "answer": false,
     95         "justification": "No human evaluation of DeputyDev's review quality is performed. The experiment measures only review time metrics. There is no assessment of whether DeputyDev's comments were useful, accurate, or accepted by developers."
     96       },
     97       "held_out_test_set": {
     98         "applies": true,
     99         "answer": false,
    100         "justification": "Post-hoc outlier exclusion thresholds (top 25%, bottom 10% by LOC) and repository balance criteria were applied to the same data on which results are reported. No pre-specified analysis plan or separation between exploratory and confirmatory analysis is documented."
    101       },
    102       "per_category_breakdown": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Table 3 provides a breakdown by PR size category (S: 0-50 LOC, M: 51-100, L: 101-200, XL: 201-500), showing DeputyDev's differential effectiveness across categories."
    106       },
    107       "failure_cases_discussed": {
    108         "applies": true,
    109         "answer": false,
    110         "justification": "The paper notes DeputyDev has 'mixed performance' in M and XL categories and mentions 'areas where DeputyDev's performance is suboptimal' (Section 2), but provides no qualitative examples of failed or unhelpful AI reviews, no error analysis of review quality, and no discussion of where the AI's feedback was wrong or misleading."
    111       },
    112       "negative_results_reported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "Table 3 shows DeputyDev increased review time per LOC vs ControlSet1 in the M category (+34.01%) and XL category (+100.30%). The paper acknowledges 'mixed performance' in these categories."
    116       }
    117     },
    118     "claims_and_evidence": {
    119       "abstract_claims_supported": {
    120         "applies": true,
    121         "answer": false,
    122         "justification": "The abstract claims 'statistically significant reduction' but no statistical significance test is reported in the paper. The results show percentage differences (Table 2) but 'statistically significant' is asserted without any supporting statistical test or p-value."
    123       },
    124       "causal_claims_justified": {
    125         "applies": true,
    126         "answer": true,
    127         "justification": "The paper uses a double-controlled A/B experiment with random 33% allocation of PRs to three groups (Section 9). While the design has limitations (no blinding, PR-level rather than engineer-level randomization), the basic causal design — random assignment with concurrent controls — is adequate for the causal claims about review time reduction."
    128       },
    129       "generalization_bounded": {
    130         "applies": true,
    131         "answer": false,
    132         "justification": "The title claims to 'Break the Code Review Logjam' and 'Boost Developer Productivity' without qualification. Results come from a single company (TATA 1mg) using one LLM (GPT-4o) in one ecosystem (Bitbucket), but claims are framed broadly. The abstract mentions 'external companies' using it but provides no data from those deployments."
    133       },
    134       "alternative_explanations_discussed": {
    135         "applies": true,
    136         "answer": false,
    137         "justification": "No alternative explanations are discussed. The paper does not consider: Hawthorne effect (engineers aware of the experiment), novelty effect, the possibility that immediate AI feedback simply reminds authors to self-fix rather than providing quality reviews, or that reduced review time might reflect reviewers rubber-stamping after seeing AI feedback."
    138       },
    139       "proxy_outcome_distinction": {
    140         "applies": true,
    141         "answer": false,
    142         "justification": "The paper measures review time reduction but frames this as 'boosting developer productivity' (title) and 'improving development workflow timelines and code quality' (abstract). Faster review time is a proxy for productivity, and no measurement of code quality is performed despite quality being claimed as an improvement. The gap between the proxy (time) and the claimed outcomes (productivity, quality) is never acknowledged."
    143       }
    144     },
    145     "setup_transparency": {
    146       "model_versions_specified": {
    147         "applies": true,
    148         "answer": false,
    149         "justification": "The paper mentions 'GPT-4o' (Section 8) and 'Claude 3.5 Sonnet' (Section 6.2.3) without snapshot dates or API version identifiers. Model behavior varies across versions; neither model is pinned to a specific version."
    150       },
    151       "prompts_provided": {
    152         "applies": true,
    153         "answer": false,
    154         "justification": "Prompts are described only in natural language (e.g., 'This agent is responsible to identify and recommend corrective code for any security issues,' Section 6.2.3). No actual prompt text is provided in the paper or appendix. The XML output schema in Appendix A is a response format, not the input prompt."
    155       },
    156       "hyperparameters_reported": {
    157         "applies": true,
    158         "answer": false,
    159         "justification": "No hyperparameters (temperature, top-p, max tokens, etc.) are reported for any LLM call. The paper uses both GPT-4o and Claude 3.5 Sonnet without stating any API parameters."
    160       },
    161       "scaffolding_described": {
    162         "applies": true,
    163         "answer": true,
    164         "justification": "The agentic scaffolding is described in substantial detail: 6 specialized agents (security, code communication, performance, maintainability, errors, business validation), reflection design pattern for iterative improvement, AST-based semantic chunking for context retrieval (Section 6.1), lexical+semantic search fusion (Section 6.1), blending engine with confidence scoring and comment overlap summarization (Section 6.4), and mathematical formalization (Section 6.5)."
    165       },
    166       "data_preprocessing_documented": {
    167         "applies": true,
    168         "answer": true,
    169         "justification": "Section 9 documents: outlier exclusion (top 25% and bottom 10% by LOC), repository balance filtering (≥10 PRs per set or equal count), experiment duration (30 days, July 27 - August 27, 2024), and 33% allocation per set."
    170       }
    171     },
    172     "limitations_and_scope": {
    173       "limitations_section_present": {
    174         "applies": true,
    175         "answer": false,
    176         "justification": "There is no limitations section, threats-to-validity section, or any dedicated discussion of the study's shortcomings. The conclusion mentions 'areas where DeputyDev's performance is suboptimal' only in passing in the introduction (Section 2)."
    177       },
    178       "threats_to_validity_specific": {
    179         "applies": true,
    180         "answer": false,
    181         "justification": "No threats to validity are discussed anywhere in the paper. Obvious threats like Hawthorne effect, single-company bias, lack of blinding, and PR-level vs engineer-level randomization are not mentioned."
    182       },
    183       "scope_boundaries_stated": {
    184         "applies": true,
    185         "answer": false,
    186         "justification": "No explicit scope boundaries are stated. The paper does not acknowledge that results are limited to one company, one LLM, one version control system, or that code quality was not measured despite being claimed as an improvement."
    187       }
    188     },
    189     "data_integrity": {
    190       "raw_data_available": {
    191         "applies": true,
    192         "answer": false,
    193         "justification": "Raw data (PR review times, telemetry) is not available. Only aggregated statistics are presented in Tables 2-3. No supplementary data files or download links are provided."
    194       },
    195       "data_collection_described": {
    196         "applies": true,
    197         "answer": true,
    198         "justification": "Section 9 describes data collection: Bitbucket telemetry at TATA 1mg, 30-day experiment period (July 27 - August 27, 2024), three-group allocation at 33% each, over 200 engineers involved. Review time metrics (pick-up time, review time, closure cycle) are defined in the abstract."
    199       },
    200       "recruitment_methods_described": {
    201         "applies": true,
    202         "answer": false,
    203         "justification": "The paper states 'over 200 engineers' participated but does not describe how they were selected — whether all TATA 1mg engineers were included, whether participation was voluntary, or whether specific teams were targeted. No recruitment or selection process is described."
    204       },
    205       "data_pipeline_documented": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "While filtering criteria are stated (outlier exclusion by LOC percentiles, repository balance), the paper does not document how many PRs were collected initially, how many were removed at each filtering stage, or the full pipeline from raw telemetry to the final 721 PRs analyzed."
    209       }
    210     },
    211     "conflicts_of_interest": {
    212       "funding_disclosed": {
    213         "applies": true,
    214         "answer": false,
    215         "justification": "No funding section or acknowledgments are present. The work was conducted at TATA 1mg, a commercial entity, but no funding disclosure is made."
    216       },
    217       "affiliations_disclosed": {
    218         "applies": true,
    219         "answer": true,
    220         "justification": "Author affiliations are clearly listed: all authors are from 'TATA 1mg Healthcare Solutions Private Limited' with institutional email addresses."
    221       },
    222       "funder_independent_of_outcome": {
    223         "applies": true,
    224         "answer": false,
    225         "justification": "TATA 1mg employees are evaluating DeputyDev, which TATA 1mg sells as a SaaS product to external companies. The employer has a direct financial interest in DeputyDev being shown to be effective."
    226       },
    227       "financial_interests_declared": {
    228         "applies": true,
    229         "answer": false,
    230         "justification": "No competing interests or financial interests statement is present. The authors work at the company that commercializes DeputyDev as SaaS, but this conflict is not explicitly declared."
    231       }
    232     },
    233     "contamination": {
    234       "training_cutoff_stated": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "This paper does not evaluate a pre-trained model's capability on a benchmark. It is an A/B experiment measuring real-world PR review times, not model performance on test data."
    238       },
    239       "train_test_overlap_discussed": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "Not applicable — the study is an A/B experiment measuring review times in production, not a benchmark evaluation of model knowledge."
    243       },
    244       "benchmark_contamination_addressed": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "Not applicable — no benchmark evaluation is performed. The study measures the effect of an AI tool on human review workflows."
    248       }
    249     },
    250     "human_studies": {
    251       "pre_registered": {
    252         "applies": true,
    253         "answer": false,
    254         "justification": "No mention of pre-registration. The experiment involves 200+ engineers but no pre-registration link (OSF, AsPredicted, etc.) is provided."
    255       },
    256       "irb_or_ethics_approval": {
    257         "applies": true,
    258         "answer": false,
    259         "justification": "No mention of IRB or ethics board approval for the experiment involving 200+ engineer participants."
    260       },
    261       "demographics_reported": {
    262         "applies": true,
    263         "answer": false,
    264         "justification": "The paper states 'over 200 engineers' but provides no demographic information: no experience levels, team distribution, programming languages used, or any participant characterization."
    265       },
    266       "inclusion_exclusion_criteria": {
    267         "applies": true,
    268         "answer": false,
    269         "justification": "Inclusion/exclusion criteria are described for repositories (balance requirement) and PRs (outlier exclusion by LOC) but not for engineer participants. No criteria for which engineers were included or excluded from the study."
    270       },
    271       "randomization_described": {
    272         "applies": true,
    273         "answer": false,
    274         "justification": "The paper states '33% allocation to each' set but does not describe the randomization mechanism — how PRs were assigned to groups (hash, random number generator, manual), whether randomization was stratified, or what unit was randomized (PR, repository, engineer)."
    275       },
    276       "blinding_described": {
    277         "applies": true,
    278         "answer": false,
    279         "justification": "No blinding is described. Engineers likely knew whether their PR received DeputyDev review (it posts comments on the PR), creating potential for behavioral changes in the test group."
    280       },
    281       "attrition_reported": {
    282         "applies": true,
    283         "answer": false,
    284         "justification": "No attrition information is provided. The paper does not report how many PRs or engineers were in the initial pool vs. the final analysis, beyond the final counts in Table 2 (244, 238, 239)."
    285       }
    286     },
    287     "cost_and_practicality": {
    288       "inference_cost_reported": {
    289         "applies": true,
    290         "answer": false,
    291         "justification": "No inference cost, API costs, or latency figures are reported. Section 5 mentions cost as a reason not to send entire codebases but does not quantify DeputyDev's actual per-review cost."
    292       },
    293       "compute_budget_stated": {
    294         "applies": true,
    295         "answer": false,
    296         "justification": "No total computational budget is stated — no API spend, no token counts, no hardware specifications for the experiment or the production system."
    297       }
    298     }
    299   },
    300   "claims": [
    301     {
    302       "claim": "DeputyDev reduces average PR review time by up to 28.82% compared to controls.",
    303       "evidence": "Table 2 shows TestSet average review time of 197.97 hrs vs ControlSet1 (239.57 hrs, -17.36%) and ControlSet2 (278.14 hrs, -28.82%), based on 721 PRs across three groups.",
    304       "supported": "moderate"
    305     },
    306     {
    307       "claim": "DeputyDev reduces average review time per LOC by up to 42.19%.",
    308       "evidence": "Table 2 shows TestSet average of 7.50 hrs/LOC vs ControlSet1 (12.97 hrs/LOC, -42.19%) and ControlSet2 (12.29 hrs/LOC, -38.98%).",
    309       "supported": "moderate"
    310     },
    311     {
    312       "claim": "DeputyDev reduces median review time by up to 47.52%.",
    313       "evidence": "Table 2 shows TestSet median of 0.41 hrs vs ControlSet1 (0.76 hrs, -46.36%) and ControlSet2 (0.78 hrs, -47.52%).",
    314       "supported": "moderate"
    315     },
    316     {
    317       "claim": "DeputyDev is most effective on smaller PRs.",
    318       "evidence": "Table 3 shows the largest review time reduction for small PRs (0-50 LOC: -43.87%/-41.40%) while medium and extra-large categories show mixed or negative results (+34.01%/+100.30% vs CS1).",
    319       "supported": "moderate"
    320     },
    321     {
    322       "claim": "The results are statistically significant.",
    323       "evidence": "Claimed in the abstract ('statistically significant reduction') but no statistical test, p-value, or test statistic is reported anywhere in the paper.",
    324       "supported": "unsupported"
    325     },
    326     {
    327       "claim": "There is weak correlation between lines of code and code review time.",
    328       "evidence": "Figure 5 shows correlation coefficients of 0.095, 0.004, and 0.052 for test, control 1, and control 2 sets respectively.",
    329       "supported": "strong"
    330     }
    331   ],
    332   "red_flags": [
    333     {
    334       "flag": "Company evaluating its own commercial product",
    335       "detail": "All authors are TATA 1mg employees evaluating DeputyDev, which TATA 1mg sells as SaaS to external companies. This is a textbook conflict of interest: the evaluators have a direct financial interest in positive results. No independent evaluation or external replication is provided."
    336     },
    337     {
    338       "flag": "Statistical significance claimed without any significance test",
    339       "detail": "The abstract asserts 'statistically significant reduction' but the paper contains no p-values, no test statistics, no significance tests of any kind. The claim is stated as fact with zero statistical support."
    340     },
    341     {
    342       "flag": "Post-hoc outlier exclusion may bias results",
    343       "detail": "PRs in the top 25% and bottom 10% by LOC were excluded before analysis (Section 9, point 4). These thresholds appear arbitrary and are not justified. Post-hoc exclusion of 35% of data could be tuned to favor the test group. No sensitivity analysis shows results hold under different thresholds."
    344     },
    345     {
    346       "flag": "No blinding in the A/B experiment",
    347       "detail": "Engineers in the test group received visible AI comments on their PRs. They knew they were getting AI assistance, which could change behavior (Hawthorne effect). Reviewers might also spend less time on PRs already reviewed by AI, regardless of AI quality."
    348     },
    349     {
    350       "flag": "Negative results partially obscured by aggregation",
    351       "detail": "Table 3 shows DeputyDev INCREASED review time per LOC in medium PRs (+34.01% vs CS1) and extra-large PRs (+100.30% vs CS1). These negative results are acknowledged as 'mixed performance' but the paper's framing and conclusion emphasize the positive aggregate results."
    352     },
    353     {
    354       "flag": "Experiment model differs from production model",
    355       "detail": "The A/B experiment used GPT-4o (Section 9, point 6: 'comprehensive workflow and while using OpenAI's GPT-4o as LLM'), but the production system uses Claude 3.5 Sonnet for code reviews (Section 6.2.3, Section 8). Results from one model cannot be assumed to transfer to another."
    356     },
    357     {
    358       "flag": "No measurement of review quality",
    359       "detail": "The paper claims to improve 'code quality' (Section 3, hypothesis 3) but measures only review time. Faster reviews could indicate rubber-stamping or lower-quality human reviews after seeing AI feedback. No quality metric (defect escape rate, comment acceptance rate, post-merge bug rate) is measured."
    360     }
    361   ],
    362   "cited_papers": [
    363     {
    364       "title": "Code review automation: Strengths and weaknesses of the state of the art",
    365       "authors": ["Rosalia Tufano", "Ozren Dabić", "Antonio Mastropaolo", "Matteo Ciniselli", "Gabriele Bavota"],
    366       "year": 2024,
    367       "relevance": "Directly surveys AI-based code review automation approaches, identifying strengths and limitations of current methods."
    368     },
    369     {
    370       "title": "Using pre-trained models to boost code review automation",
    371       "authors": ["Rosalia Tufano", "Simone Masiero", "Antonio Mastropaolo", "Luca Pascarella", "Denys Poshyvanyk", "Gabriele Bavota"],
    372       "year": 2022,
    373       "relevance": "Evaluates pre-trained language models for automating code review, directly relevant to LLM-based code analysis."
    374     },
    375     {
    376       "title": "CommentFinder: A simpler, faster, more accurate code review comments recommendation",
    377       "authors": ["Yang Hong", "Chakkrit Tantithamthavorn", "Patanamon Thongtanunam", "Aldeida Aleti"],
    378       "year": 2022,
    379       "relevance": "Proposes an automated system for recommending code review comments, a closely related task to DeputyDev's core functionality."
    380     },
    381     {
    382       "title": "ChatDev: Communicative agents for software development",
    383       "authors": ["Chen Qian", "Wei Liu", "Hongzhang Liu", "Nuo Chen"],
    384       "year": 2024,
    385       "relevance": "Multi-agent framework for software development that inspired DeputyDev's agentic architecture; directly relevant to agentic AI workflows."
    386     },
    387     {
    388       "title": "Self-Refine: Iterative refinement with self-feedback",
    389       "authors": ["Aman Madaan", "Niket Tandon", "Prakhar Gupta"],
    390       "year": 2023,
    391       "relevance": "Foundational work on LLM self-reflection and iterative improvement, the pattern used in DeputyDev's reflection design."
    392     },
    393     {
    394       "title": "Reflexion: Language agents with verbal reinforcement learning",
    395       "authors": ["Noah Shinn", "Federico Cassano", "Edward Berman"],
    396       "year": 2023,
    397       "relevance": "Proposes verbal self-reflection for LLM agents, directly relevant to the reflection design pattern used in agentic AI systems."
    398     },
    399     {
    400       "title": "Let me speak freely? A study on the impact of format restrictions on performance of large language models",
    401       "authors": ["Zhi Rui Tam", "Cheng-Kuang Wu", "Yi-Lin Tsai", "Chieh-Yen Lin", "Hung yi Lee", "Yun-Nung Chen"],
    402       "year": 2024,
    403       "relevance": "Studies how enforcing structured output formats affects LLM reasoning quality, directly relevant to LLM deployment practices."
    404     },
    405     {
    406       "title": "AI-assisted assessment of coding practices in modern code review",
    407       "authors": ["Manushree Vijayvergiya", "Małgorzata Salawa", "Ivan Budiselić"],
    408       "year": 2024,
    409       "relevance": "Evaluates AI-assisted code review practices with human studies, directly comparable to DeputyDev's goals and approach."
    410     }
    411   ],
    412   "engagement_factors": {
    413     "practical_relevance": {
    414       "score": 3,
    415       "justification": "DeputyDev is a deployed tool available as SaaS, already in production use at TATA 1mg and external companies — immediately usable by practitioners."
    416     },
    417     "surprise_contrarian": {
    418       "score": 0,
    419       "justification": "Confirms the expected finding that AI code review tools can reduce review turnaround time; no surprising or counterintuitive results."
    420     },
    421     "fear_safety": {
    422       "score": 0,
    423       "justification": "No AI safety, security, or risk concerns are raised by the paper."
    424     },
    425     "drama_conflict": {
    426       "score": 0,
    427       "justification": "No controversy or conflict with existing claims or practices."
    428     },
    429     "demo_ability": {
    430       "score": 2,
    431       "justification": "Available as SaaS at deputydev.ai but not open source; users can sign up but cannot inspect or self-host the system."
    432     },
    433     "brand_recognition": {
    434       "score": 1,
    435       "justification": "TATA 1mg is a known Indian healthcare company but not a major AI research lab or globally recognized tech brand."
    436     }
    437   }
    438 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs