ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (25383B)


      1 {
      2   "paper": {
      3     "title": "Adaptive Vision-Based Coverage Optimization in Mobile Wireless Sensor Networks: A Multi-Agent Deep Reinforcement Learning Approach",
      4     "authors": [
      5       "P. Soltani",
      6       "M. Eskandarpour",
      7       "S. Heidari",
      8       "F. Alizadeh",
      9       "H. Soleimani"
     10     ],
     11     "year": 2025,
     12     "venue": "Unknown (preprint or conference paper; no venue stated in paper text)"
     13   },
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No GitHub link, Zenodo archive, or any repository URL is provided in the paper. The implementation details are described (TensorFlow, Python, OpenAI Gym-style interfaces) but no code is released."
     20       },
     21       "data_released": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "The experiments use a custom simulation environment. No dataset is released. The CNN model was 'trained on a synthetic dataset of labeled sensor images' but this dataset is not made available."
     25       },
     26       "environment_specified": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "The paper mentions Python and TensorFlow and some hyperparameters but provides no requirements.txt, Dockerfile, or specific library versions that would allow environment recreation."
     30       },
     31       "reproduction_instructions": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "No README, reproduction scripts, or step-by-step instructions are provided. While hyperparameters are described in Section IV, there are no instructions to run and reproduce the experiments."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "Results in Table 2 are reported as point estimates only (e.g., 91.8% coverage, 68 energy units). No confidence intervals or error bars are reported, despite results being averaged over 5 runs."
     42       },
     43       "significance_tests": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "The paper claims the proposed method 'consistently outperforms' all baselines across all metrics, but no statistical significance tests (t-test, Mann-Whitney U, etc.) are performed. Comparisons are made by directly contrasting point estimates."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": true,
     50         "answer": true,
     51         "justification": "The paper reports percentage improvements with baseline context: '26.5% improvement in coverage' (with raw numbers 91.8% vs baselines in Table 2), '32% reduction in energy consumption' (68 vs 85/94/100 units), '22% decrease in redundancy,' and '45% extension of network lifetime.' The schema explicitly states that percentage improvement with baseline context counts as YES."
     52       },
     53       "sample_size_justified": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "The choice of 100 sensor nodes, 200 episodes, and 5 random seeds is not justified with any power analysis or argument for why these quantities are sufficient to draw reliable conclusions."
     57       },
     58       "variance_reported": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "Results in Table 2 are averaged over 5 runs but reported as point estimates only. The 'Learning Stability (Variance)' metric in Table 2 is reward variance within training (a performance metric), not variance across experimental runs. The statement about standard deviation dropping below 5% of the mean after episode 100 describes within-run convergence, not cross-run variability. No standard deviation, IQR, or spread measure for the main results across runs is provided."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": true,
     67         "answer": true,
     68         "justification": "The paper compares against multiple baselines: random placement, static grid deployment, centralized DRL, RSSI-based MARL [27], SLAM-DRL [29], and graph-based MARL [28], covering both naive and state-of-the-art alternatives."
     69       },
     70       "baselines_contemporary": {
     71         "applies": true,
     72         "answer": true,
     73         "justification": "The strongest baselines include work from 2022 (Zhao et al.), 2023 (Feng et al.), and 2024 (Khan et al.), which are recent and represent current approaches in the field."
     74       },
     75       "ablation_study": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "The paper performs a reward coefficient sensitivity analysis (varying alpha, beta, gamma across 27 configurations), but this is parameter tuning, not an ablation study. No system components are removed or isolated — the Dueling DQN architecture, Prioritized Experience Replay, target network stabilization, and vision-based localization are never individually ablated to measure their contribution."
     79       },
     80       "multiple_metrics": {
     81         "applies": true,
     82         "answer": true,
     83         "justification": "Multiple metrics are reported: final area coverage (%), energy consumption (units), redundancy rate (%), recovery time (seconds), network lifetime (episodes), and learning stability (variance)."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "This is a sensor network coverage optimization paper evaluated purely with automated simulation metrics. Human evaluation is not relevant to the claims being made."
     89       },
     90       "held_out_test_set": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "The system is evaluated on the same simulation environment used for training. There is no separate held-out test set or held-out environment with different parameters used exclusively for final evaluation."
     94       },
     95       "per_category_breakdown": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "Results are reported as single aggregate numbers per method. No per-environment, per-scenario, or per-density breakdowns are provided beyond the scalability results for 200 and 300 sensors."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": true,
    102         "answer": false,
    103         "justification": "The paper does not show or analyze failure cases or conditions under which the proposed method fails or underperforms. The limitations section is absent; all results show the proposed method as superior."
    104       },
    105       "negative_results_reported": {
    106         "applies": true,
    107         "answer": false,
    108         "justification": "Every experiment and configuration tested shows the proposed method as the best performer. No negative results (conditions where the approach fails or configurations that hurt performance) are reported, though the reward sensitivity analysis hints that reducing beta slightly reduces coverage."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The abstract claims '26.5% improvement in coverage' and '45% extension of network lifetime' 'compared to traditional distance-based localization.' The distance-based localization baseline (non-vision MARL using RSSI) achieves 78.6% coverage and 120 episodes lifetime. The actual improvements over this baseline are 16.8% (coverage) and 33% (lifetime), not 26.5% and 45%. The claimed percentages only match if cherry-picking different, weaker baselines for each metric (static grid for coverage, random placement for lifetime)."
    116       },
    117       "causal_claims_justified": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The paper makes causal claims such as 'incorporating real-time vision feedback into decentralized MARL significantly improves sensing efficiency' and 'vision-based feedback eliminates the energy cost associated with inter-node localization messaging.' These causal claims are not validated by controlled experiments isolating the vision component versus other design choices; the non-vision MARL baseline uses a different localization method entirely."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The paper was validated only in simulation with 100 nodes in a simplified 2D 500x500m environment. The conclusion section and abstract make broad claims about applicability to 'real-world MWSN deployments in a wide range of fields—from environmental monitoring and smart cities to disaster response and autonomous exploration' without bounding these claims to the tested settings."
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": true,
    129         "answer": false,
    130         "justification": "No alternative explanations for the observed performance advantages are discussed. The paper does not consider whether the strong results could be due to simulation artifacts, favorable hyperparameter tuning, or weak baseline implementations. There is no threats-to-validity section."
    131       }
    132     },
    133     "setup_transparency": {
    134       "model_versions_specified": {
    135         "applies": true,
    136         "answer": false,
    137         "justification": "The paper specifies 'TensorFlow' without a version. It uses a 'Dueling Double Deep Q-Network' architecture but no specific version of any framework or library is provided."
    138       },
    139       "prompts_provided": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "This paper does not use language model prompting. The agents are reinforcement learning algorithms, not prompted LLMs."
    143       },
    144       "hyperparameters_reported": {
    145         "applies": true,
    146         "answer": true,
    147         "justification": "Section IV reports key hyperparameters: learning rate 1e-4, discount factor gamma=0.99, mini-batch size 64, epsilon from 1.0 to 0.1 over 200 episodes, replay buffer size 100,000, target network updated every 1000 steps, reward parameters alpha=1, beta=2, gamma=3."
    148       },
    149       "scaffolding_described": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "This paper uses multi-agent reinforcement learning, not agentic LLM scaffolding. The RL framework is described in detail, but there is no LLM-based scaffolding to describe."
    153       },
    154       "data_preprocessing_documented": {
    155         "applies": true,
    156         "answer": true,
    157         "justification": "The image processing pipeline is described in Section III: Gaussian blurring, adaptive thresholding, bounding box extraction, geometric calibration via fiducial markers, histogram equalization, and temporal filtering. The CNN architecture is described with specific layer configurations."
    158       }
    159     },
    160     "limitations_and_scope": {
    161       "limitations_section_present": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "There is no dedicated limitations or threats-to-validity section in the paper. The paper goes directly from performance evaluation to conclusion without a substantive discussion of limitations."
    165       },
    166       "threats_to_validity_specific": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "No specific threats to validity are discussed. The paper briefly mentions 'exact hardware-level power profiling was not performed' and future extensions to edge computing, but does not discuss threats to the validity of its conclusions."
    170       },
    171       "scope_boundaries_stated": {
    172         "applies": true,
    173         "answer": false,
    174         "justification": "The paper does not explicitly state what its results do NOT show. The conclusion makes broad applicability claims without bounding results to the 2D simulation environment, single network density, or specific sensor types tested."
    175       }
    176     },
    177     "data_integrity": {
    178       "raw_data_available": {
    179         "applies": true,
    180         "answer": false,
    181         "justification": "No raw simulation logs, episode-level data, or experimental data are made available. Only aggregated results tables and figures are presented in the paper."
    182       },
    183       "data_collection_described": {
    184         "applies": true,
    185         "answer": true,
    186         "justification": "The simulation setup is described in Section IV: 100 mobile nodes in 500x500m area, sensing radius 20m, communication range 50m, energy budget 100 units, 5 repeated runs with different random seeds, 200 episodes each."
    187       },
    188       "recruitment_methods_described": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "There are no human participants. This is a simulation study with synthetic sensor networks, so recruitment is not applicable."
    192       },
    193       "data_pipeline_documented": {
    194         "applies": true,
    195         "answer": true,
    196         "justification": "The data pipeline from image capture to reward computation is described: camera captures every 10 seconds, CNN detects LED positions (95.2% IoU accuracy), sensor locations used to compute coverage and derive reward signals. The flow is documented sufficiently to understand the processing steps."
    197       }
    198     },
    199     "conflicts_of_interest": {
    200       "funding_disclosed": {
    201         "applies": true,
    202         "answer": false,
    203         "justification": "No acknowledgments section and no mention of any funding source, grants, or financial support is found anywhere in the paper."
    204       },
    205       "affiliations_disclosed": {
    206         "applies": true,
    207         "answer": true,
    208         "justification": "All authors are listed as affiliated with the School of Electrical Engineering, Iran University of Science and Technology, Tehran, Iran, which is disclosed on the title page."
    209       },
    210       "funder_independent_of_outcome": {
    211         "applies": false,
    212         "answer": false,
    213         "justification": "No funding source is disclosed, so this question cannot be assessed. The paper appears to be unfunded academic research."
    214       },
    215       "financial_interests_declared": {
    216         "applies": true,
    217         "answer": false,
    218         "justification": "There is no competing interests statement or patent disclosure in the paper. The absence of any such declaration means this criterion is not satisfied."
    219       }
    220     },
    221     "contamination": {
    222       "training_cutoff_stated": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "This paper does not evaluate a pre-trained language model or any model with a training data cutoff on a benchmark. It trains a DRL agent from scratch in a simulation environment."
    226       },
    227       "train_test_overlap_discussed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "The DRL agent is trained in simulation; there is no pre-trained model with potential training data contamination. The contamination concern does not apply."
    231       },
    232       "benchmark_contamination_addressed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "No pre-trained model is evaluated on an existing benchmark. The evaluation environment is a custom simulation, so benchmark contamination does not apply."
    236       }
    237     },
    238     "human_studies": {
    239       "pre_registered": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants are involved. This is a simulation study."
    243       },
    244       "irb_or_ethics_approval": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants are involved. IRB approval is not applicable."
    248       },
    249       "demographics_reported": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants are involved. Demographics are not applicable."
    253       },
    254       "inclusion_exclusion_criteria": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants are involved. Inclusion/exclusion criteria are not applicable."
    258       },
    259       "randomization_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants are involved. Randomization of participants is not applicable."
    263       },
    264       "blinding_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants are involved. Blinding is not applicable."
    268       },
    269       "attrition_reported": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants are involved. Attrition reporting is not applicable."
    273       }
    274     },
    275     "cost_and_practicality": {
    276       "inference_cost_reported": {
    277         "applies": true,
    278         "answer": false,
    279         "justification": "While the paper discusses computational complexity in O-notation (Section III.E), no actual wall-clock inference time, GPU/CPU requirements, or cost per decision step for real deployment is reported."
    280       },
    281       "compute_budget_stated": {
    282         "applies": true,
    283         "answer": false,
    284         "justification": "No GPU hours, hardware specifications for training, or total computational budget is stated. The paper mentions using TensorFlow but provides no information about training hardware or time required."
    285       }
    286     }
    287   },
    288   "claims": [
    289     {
    290       "claim": "The proposed MADRL+Vision method achieves 91.8% final area coverage, compared to 85.4% for centralized DRL, 78.6% for non-vision MARL, 72.5% for static grid, and 65.2% for random placement.",
    291       "evidence": "Table 2 (COMPARISON BETWEEN DIFFERENT METHODS), results averaged over 5 independent runs. Section IV states 'Table 1 summarizes the results averaged over five independent runs.'",
    292       "supported": "moderate"
    293     },
    294     {
    295       "claim": "The proposed method achieves a 32% reduction in energy consumption compared to the next best approach.",
    296       "evidence": "Table 2 shows 68 energy units vs. 85 for centralized DRL (next best among referenced DRL methods). The 32% figure is stated in Section IV and the abstract. The energy model is described as using three components: mobility, communication, and processing energy.",
    297       "supported": "moderate"
    298     },
    299     {
    300       "claim": "The proposed method achieves a 26.5% improvement in coverage compared to traditional distance-based localization.",
    301       "evidence": "Stated in abstract. Table 2 shows 91.8% vs. approximately 72.5% (static grid as proxy for traditional). The comparison baseline for the 26.5% figure is not explicitly identified in the results section.",
    302       "supported": "weak"
    303     },
    304     {
    305       "claim": "The system achieves 45% extension of network lifetime compared to baselines.",
    306       "evidence": "Abstract states '45%' lifetime extension. Table 2 shows 160 episodes for proposed vs. 145 for centralized DRL (best baseline) — a 10.3% improvement, or vs. 80 for random placement — a 100% improvement. The 45% figure is not directly traceable to a specific baseline comparison.",
    307       "supported": "weak"
    308     },
    309     {
    310       "claim": "The CNN-based LED localization achieves 95.2% average localization accuracy on a test set measured by IoU.",
    311       "evidence": "Section I states: 'The system achieves an average localization accuracy of 95.2% on a test set, measured by intersection-over-union (IoU) between predicted and actual LED locations.' The test set is described as synthetic images 'generated under varied lighting and angle conditions.'",
    312       "supported": "moderate"
    313     },
    314     {
    315       "claim": "The system is robust and scalable, maintaining high coverage (89.4% and 86.8%) with 200 and 300 sensors in the same 500x500m area.",
    316       "evidence": "Section III states: 'we ran additional simulations with 200 and 300 mobile sensors within the same 500 × 500 m² field. The system retained stable convergence and high final coverage—reaching 89.4% and 86.8%, respectively.' No comparison to baselines at these scales is provided.",
    317       "supported": "weak"
    318     }
    319   ],
    320   "methodology_tags": [
    321     "benchmark-eval",
    322     "case-study"
    323   ],
    324   "key_findings": "This paper proposes a Multi-Agent Deep Reinforcement Learning (MADRL) framework combined with vision-based (camera + CNN) localization for autonomous sensor deployment optimization in Mobile Wireless Sensor Networks. In simulation with 100 sensor nodes over 200 training episodes (5 runs), the proposed Dueling Double DQN approach achieves 91.8% area coverage — the highest among six compared methods — while also minimizing energy consumption (68 units), redundancy (8%), and recovery time (3s). The vision-based feedback eliminates the need for GPS or inter-node communication, enabling operation in GPS-denied environments. However, all results are simulation-only, no code is released, and no statistical significance tests are applied to the comparisons.",
    325   "red_flags": [
    326     {
    327       "flag": "Simulation-only validation with broad generalization claims",
    328       "detail": "All experiments are conducted in a simplified 2D simulation environment (Python/TensorFlow/OpenAI Gym). The paper claims applicability to 'real-world MWSN deployments' in 'environmental monitoring, smart cities, disaster response, and autonomous exploration' without any physical hardware experiments, despite the paper being fundamentally about a vision-based hardware system."
    329     },
    330     {
    331       "flag": "Claimed percentages not clearly traceable to specific baselines",
    332       "detail": "The abstract claims '26.5% improvement in coverage' and '45% extension of network lifetime,' but these figures do not clearly correspond to comparisons in Table 2. For example, 91.8% vs. 72.5% (static grid) would be a 26.6% improvement, but the next-best DRL baseline (centralized DRL at 85.4%) would give only 7.5% improvement. The 45% lifetime extension is similarly ambiguous."
    333     },
    334     {
    335       "flag": "No statistical significance testing",
    336       "detail": "All comparisons between methods in Table 2 are based on point estimates without confidence intervals or significance tests, despite results being averaged over only 5 runs. For metrics with small absolute differences (e.g., recovery time 3.0s vs. 4.2s), the lack of uncertainty quantification makes it impossible to assess whether differences are meaningful."
    337     },
    338     {
    339       "flag": "Potentially weak or self-implemented baselines",
    340       "detail": "Section IV states: 'We implement or simulate simplified versions of these baselines under comparable conditions.' 'Simplified versions' of baselines may not represent the full capability of cited methods (RSSI-MARL [27], SLAM-DRL [29], graph-MARL [28]), creating an unfair comparison that inflates the apparent advantage of the proposed method."
    341     },
    342     {
    343       "flag": "No limitations or threats-to-validity section",
    344       "detail": "The paper has no limitations section. Known limitations — simulation-only evaluation, single 2D environment topology, synthetic lighting conditions for CNN training, no real sensor hardware tested — are not discussed. The conclusion presents the system as broadly ready for real-world deployment."
    345     },
    346     {
    347       "flag": "Out-of-scope paper for agentic AI/LLM programming survey",
    348       "detail": "This paper is about mobile wireless sensor network coverage optimization using DRL, not about AI coding assistants, LLM capabilities, software engineering agents, or agentic programming systems. Its relevance to the survey's core topic is unclear."
    349     },
    350     {
    351       "flag": "No funding disclosure",
    352       "detail": "No funding source, grant numbers, or acknowledgments are present in the paper, which is unusual for academic research. This makes it impossible to assess potential conflicts of interest."
    353     }
    354   ],
    355   "cited_papers": [
    356     {
    357       "title": "A novel deep reinforcement learning approach for mobile sensor deployment",
    358       "authors": [
    359         "Liu, X.",
    360         "Wang, G."
    361       ],
    362       "year": 2021,
    363       "relevance": "Primary baseline method using centralized DRL for sensor deployment; directly compared against in this paper's evaluation."
    364     },
    365     {
    366       "title": "Collaborative multi-agent reinforcement learning for coverage optimization in wireless sensor networks",
    367       "authors": [
    368         "Zhao, Y.",
    369         "Chen, H."
    370       ],
    371       "year": 2022,
    372       "relevance": "RSSI-based MARL baseline for decentralized sensor coverage optimization; directly compared against in this paper."
    373     },
    374     {
    375       "title": "Graph-based MARL for scalable sensor deployment",
    376       "authors": [
    377         "Feng, J.",
    378         "Zhang, K."
    379       ],
    380       "year": 2023,
    381       "relevance": "Graph-based multi-agent RL approach for sensor deployment; directly compared against as a state-of-the-art baseline."
    382     },
    383     {
    384       "title": "SLAM-aided sensor coverage with deep reinforcement learning",
    385       "authors": [
    386         "Khan, A.",
    387         "Wu, Z."
    388       ],
    389       "year": 2024,
    390       "relevance": "SLAM-integrated DRL approach for sensor deployment in GPS-denied environments; directly compared as the most capable prior method."
    391     },
    392     {
    393       "title": "Enhancing Lifetime and Reliability in WSNs: Complementary of Dual-Battery Systems Energy Management Strategy",
    394       "authors": [
    395         "Eskandarpour, M.",
    396         "Soleimani, H."
    397       ],
    398       "year": 2025,
    399       "relevance": "Co-authored by two paper authors (Eskandarpour and Soleimani); cited for WSN energy management context, suggesting potential self-citation."
    400     }
    401   ]
    402 }

Impressum · Datenschutz