ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (17600B)


      1 {
      2   "paper": {
      3     "title": "The Generative AI Paradox: GenAI and the Erosion of Trust, the Corrosion of Information Verification, and the Demise of Truth",
      4     "authors": ["Emilio Ferrara"],
      5     "year": 2026,
      6     "venue": "Future Internet",
      7     "arxiv_id": "2601.00306",
      8     "doi": "10.3390/fi18020073"
      9   },
     10   "scan_version": 2,
     11   "active_modules": [],
     12   "methodology_tags": ["theoretical"],
     13   "key_findings": "The paper formalizes 'synthetic reality' as a four-layer stack (content, identity, interaction, institutions) and argues GenAI's most consequential risk is not isolated deepfakes but the progressive erosion of shared epistemic ground. It identifies seven qualitative shifts GenAI introduces (cost collapse, throughput, customization, micro-segmentation, synthetic interaction, provenance gaps, trust erosion) and proposes a layered mitigation stack spanning provenance infrastructure, platform governance, institutional redesign, and public resilience. The paper articulates the 'Generative AI Paradox': as synthetic media becomes ubiquitous, societies may rationally discount all digital evidence, raising the cost of truth.",
     14   "claims": [
     15     {
     16       "claim": "GenAI enables 'synthetic realities' — coherent, interactive, personalized information environments where content, identity, and social interaction are jointly manufactured and mutually reinforcing.",
     17       "evidence": "Conceptual argument developed across Sections 1-2 with the four-layer stack model (Figure 2) and illustrative examples (Figure 1). No empirical validation.",
     18       "supported": "weak"
     19     },
     20     {
     21       "claim": "The most consequential risk is not isolated fake artifacts but progressive erosion of shared epistemic ground and institutional verification practices.",
     22       "evidence": "Argued conceptually throughout. Supported by a case bank (Table 1, Section 4) of five documented risk realizations (2023-2025) including the Hong Kong deepfake fraud and New Hampshire robocalls.",
     23       "supported": "moderate"
     24     },
     25     {
     26       "claim": "GenAI changes the game through seven qualitative mechanisms: cost collapse, scale/throughput, customization, hyper-targeting, automated social engineering, provenance gaps, and trust erosion.",
     27       "evidence": "Section 3 describes each mechanism conceptually. Individual mechanisms are illustrated with references to reporting and prior work, but no quantitative evidence is presented for the claimed shifts.",
     28       "supported": "weak"
     29     },
     30     {
     31       "claim": "Mitigation requires a stack of complementary interventions (provenance, platform governance, institutional redesign, public resilience, accountability) rather than any single solution.",
     32       "evidence": "Section 5 argues this conceptually. No evaluation of any mitigation strategy is presented.",
     33       "supported": "weak"
     34     }
     35   ],
     36   "checklist": {
     37     "artifacts": {
     38       "code_released": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "Purely theoretical/conceptual paper with no code, models, or computational artifacts to release."
     42       },
     43       "data_released": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No dataset was created or used. The case bank in Table 1 is drawn from public reporting, not original data collection."
     47       },
     48       "environment_specified": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No computational experiments were performed."
     52       },
     53       "reproduction_instructions": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No experiments to reproduce. This is a conceptual framework paper."
     57       }
     58     },
     59     "statistical_methodology": {
     60       "confidence_intervals_or_error_bars": {
     61         "applies": false,
     62         "answer": false,
     63         "justification": "No quantitative experiments or statistical analyses are performed."
     64       },
     65       "significance_tests": {
     66         "applies": false,
     67         "answer": false,
     68         "justification": "No comparative quantitative claims requiring significance tests."
     69       },
     70       "effect_sizes_reported": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No quantitative experiments."
     74       },
     75       "sample_size_justified": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No empirical sample."
     79       },
     80       "variance_reported": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No experimental runs."
     84       }
     85     },
     86     "evaluation_design": {
     87       "baselines_included": {
     88         "applies": false,
     89         "answer": false,
     90         "justification": "Theoretical paper with no system or method to evaluate against baselines."
     91       },
     92       "baselines_contemporary": {
     93         "applies": false,
     94         "answer": false,
     95         "justification": "No baselines — no evaluation performed."
     96       },
     97       "ablation_study": {
     98         "applies": false,
     99         "answer": false,
    100         "justification": "No system with components to ablate."
    101       },
    102       "multiple_metrics": {
    103         "applies": false,
    104         "answer": false,
    105         "justification": "No evaluation with metrics."
    106       },
    107       "human_evaluation": {
    108         "applies": false,
    109         "answer": false,
    110         "justification": "No system outputs to evaluate."
    111       },
    112       "held_out_test_set": {
    113         "applies": false,
    114         "answer": false,
    115         "justification": "No experiments with data splits."
    116       },
    117       "per_category_breakdown": {
    118         "applies": false,
    119         "answer": false,
    120         "justification": "No quantitative results to break down."
    121       },
    122       "failure_cases_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The paper does not discuss where its proposed framework or taxonomy might fail or be inadequate."
    126       },
    127       "negative_results_reported": {
    128         "applies": false,
    129         "answer": false,
    130         "justification": "No experiments that could yield negative results."
    131       }
    132     },
    133     "claims_and_evidence": {
    134       "abstract_claims_supported": {
    135         "applies": true,
    136         "answer": true,
    137         "justification": "The abstract accurately describes what the paper delivers: a four-layer formalization, an expanded taxonomy, qualitative shift analysis, a case bank, a mitigation stack, and a research agenda. These are all present in the paper body."
    138       },
    139       "causal_claims_justified": {
    140         "applies": true,
    141         "answer": false,
    142         "justification": "The paper makes causal claims throughout (e.g., GenAI 'enables' synthetic realities, cost collapse 'expands the pool of capable adversaries', synthetic interaction 'operationalizes' content into lived experience). These causal mechanisms are argued conceptually but not empirically validated. No causal identification strategy is used."
    143       },
    144       "generalization_bounded": {
    145         "applies": true,
    146         "answer": false,
    147         "justification": "The paper makes sweeping claims about societal-level effects ('societies may rationally discount digital evidence altogether') without bounding these to specific contexts, populations, or conditions. The case bank covers only 5 cases from 2023-2025 but conclusions are drawn about systemic societal shifts."
    148       },
    149       "alternative_explanations_discussed": {
    150         "applies": true,
    151         "answer": false,
    152         "justification": "The paper does not consider alternative explanations for the trends it describes. For example, it does not discuss whether existing institutional resilience mechanisms might adapt, whether the 'paradox' might not materialize due to technological countermeasures, or whether historical analogues (photography, Photoshop) suggest different trajectories."
    153       },
    154       "proxy_outcome_distinction": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No measurements or proxies — this is a theoretical paper."
    158       }
    159     },
    160     "setup_transparency": {
    161       "model_versions_specified": {
    162         "applies": false,
    163         "answer": false,
    164         "justification": "No models used in experiments."
    165       },
    166       "prompts_provided": {
    167         "applies": false,
    168         "answer": false,
    169         "justification": "No prompting used."
    170       },
    171       "hyperparameters_reported": {
    172         "applies": false,
    173         "answer": false,
    174         "justification": "No experiments with hyperparameters."
    175       },
    176       "scaffolding_described": {
    177         "applies": false,
    178         "answer": false,
    179         "justification": "No agentic scaffolding used."
    180       },
    181       "data_preprocessing_documented": {
    182         "applies": false,
    183         "answer": false,
    184         "justification": "No data collection or preprocessing performed."
    185       }
    186     },
    187     "limitations_and_scope": {
    188       "limitations_section_present": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "No dedicated limitations section. The paper acknowledges in Section 4 that 'public documentation is uneven across regions and sectors' for the case bank, but this is a brief caveat, not a substantive limitations discussion."
    192       },
    193       "threats_to_validity_specific": {
    194         "applies": true,
    195         "answer": false,
    196         "justification": "No threats to validity discussion. The paper does not address threats to its own framework's validity."
    197       },
    198       "scope_boundaries_stated": {
    199         "applies": true,
    200         "answer": true,
    201         "justification": "Section 1.1 ('Contributions and scope') explicitly states the four contributions the paper offers, implicitly bounding scope. Section 4 notes cases 'should be interpreted as illustrative lower bounds rather than a complete census of harms.'"
    202       }
    203     },
    204     "data_integrity": {
    205       "raw_data_available": {
    206         "applies": false,
    207         "answer": false,
    208         "justification": "No original data collected. Case bank draws from public reporting."
    209       },
    210       "data_collection_described": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "The case bank selection methodology is not described. The paper states it selected cases for 'mechanism diversity' but does not explain how cases were identified, what search strategy was used, or what criteria determined inclusion/exclusion beyond the stated goals."
    214       },
    215       "recruitment_methods_described": {
    216         "applies": false,
    217         "answer": false,
    218         "justification": "No human participants and no standard benchmark — case selection from public reporting."
    219       },
    220       "data_pipeline_documented": {
    221         "applies": false,
    222         "answer": false,
    223         "justification": "No data pipeline. The paper is a conceptual argument with illustrative examples."
    224       }
    225     },
    226     "conflicts_of_interest": {
    227       "funding_disclosed": {
    228         "applies": true,
    229         "answer": true,
    230         "justification": "The paper explicitly states: 'This research received no external funding.'"
    231       },
    232       "affiliations_disclosed": {
    233         "applies": true,
    234         "answer": true,
    235         "justification": "Author affiliation with USC (Computer Science, Annenberg School for Communication, ISI) is clearly listed."
    236       },
    237       "funder_independent_of_outcome": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "No external funding — criterion not applicable."
    241       },
    242       "financial_interests_declared": {
    243         "applies": true,
    244         "answer": true,
    245         "justification": "The paper includes a conflicts of interest statement: 'The author declares no conflicts of interest.'"
    246       }
    247     },
    248     "contamination": {
    249       "training_cutoff_stated": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No pre-trained model evaluated on any benchmark."
    253       },
    254       "train_test_overlap_discussed": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No model evaluation performed."
    258       },
    259       "benchmark_contamination_addressed": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No benchmark evaluation."
    263       }
    264     },
    265     "human_studies": {
    266       "pre_registered": {
    267         "applies": false,
    268         "answer": false,
    269         "justification": "No human participants."
    270       },
    271       "irb_or_ethics_approval": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "No human participants."
    275       },
    276       "demographics_reported": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "No human participants."
    280       },
    281       "inclusion_exclusion_criteria": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "No human participants."
    285       },
    286       "randomization_described": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "No human participants."
    290       },
    291       "blinding_described": {
    292         "applies": false,
    293         "answer": false,
    294         "justification": "No human participants."
    295       },
    296       "attrition_reported": {
    297         "applies": false,
    298         "answer": false,
    299         "justification": "No human participants."
    300       }
    301     },
    302     "cost_and_practicality": {
    303       "inference_cost_reported": {
    304         "applies": false,
    305         "answer": false,
    306         "justification": "Theoretical paper — no computational method with inference costs."
    307       },
    308       "compute_budget_stated": {
    309         "applies": false,
    310         "answer": false,
    311         "justification": "No computation performed."
    312       }
    313     }
    314   },
    315   "red_flags": [
    316     {
    317       "flag": "Claims significantly outrun evidence",
    318       "detail": "The paper makes sweeping causal and societal-level claims (e.g., 'the demise of truth', societies 'rationally discount digital evidence altogether') based on a conceptual framework and five illustrative cases. No empirical validation of the proposed mechanisms or their magnitudes is provided."
    319     },
    320     {
    321       "flag": "No limitations section",
    322       "detail": "A paper making broad societal claims lacks a dedicated limitations discussion. The framework's applicability boundaries, potential counterarguments, and historical precedents where similar fears did not materialize are not addressed."
    323     },
    324     {
    325       "flag": "Self-citation concentration",
    326       "detail": "Of 38 references, at least 7 (refs [8,9,15,16,17,18,27]) are authored or co-authored by Ferrara, the sole author. The taxonomy is described as expanding one 'recently proposed in [17]' — the author's own prior work — without independent validation."
    327     },
    328     {
    329       "flag": "Selective case bank without systematic methodology",
    330       "detail": "The case bank (Table 1) selects 5 case categories to illustrate the framework but no systematic search or selection methodology is described. Cases that might contradict the thesis (e.g., successful institutional adaptation, effective detection) are not discussed."
    331     }
    332   ],
    333   "cited_papers": [
    334     {
    335       "title": "Factuality challenges in the era of large language models and opportunities for fact-checking",
    336       "authors": ["I. Augenstein", "T. Baldwin", "M. Cha", "T. Chakraborty", "G. L. Ciampaglia", "D. Corney", "R. DiResta", "E. Ferrara", "S. Hale", "A. Halevy"],
    337       "year": 2024,
    338       "relevance": "Surveys factuality challenges with LLMs including misinformation and fact-checking — relevant to AI safety and societal impact."
    339     },
    340     {
    341       "title": "Artificial intelligence risk management framework: Generative artificial intelligence profile",
    342       "authors": ["C. Autio", "R. Schwartz", "J. Dunietz", "S. Jain", "M. Stanley", "E. Tabassi", "P. Hall", "K. Roberts"],
    343       "year": 2024,
    344       "relevance": "NIST AI risk management framework for generative AI — relevant to AI safety governance and evaluation standards."
    345     },
    346     {
    347       "title": "Social bot detection in the age of ChatGPT: Challenges and opportunities",
    348       "authors": ["E. Ferrara"],
    349       "year": 2023,
    350       "relevance": "Discusses challenges of detecting AI-generated social media bots — relevant to AI safety and agentic AI risks."
    351     },
    352     {
    353       "title": "Sleeper agents: Training deceptive LLMs that persist through safety training",
    354       "authors": ["E. Hubinger", "C. Denison", "J. Mu", "M. Lambert", "M. Tong", "M. MacDiarmid", "T. Lanham", "D. M. Ziegler", "T. Maxwell", "N. Cheng"],
    355       "year": 2024,
    356       "arxiv_id": "2401.05566",
    357       "relevance": "Demonstrates that deceptive behaviors can be trained into LLMs and persist through safety training — directly relevant to AI safety and alignment."
    358     },
    359     {
    360       "title": "Data poisoning for in-context learning",
    361       "authors": ["P. He", "H. Xu", "Y. Xing", "H. Liu", "M. Yamada", "J. Tang"],
    362       "year": 2025,
    363       "relevance": "Demonstrates data poisoning attacks on in-context learning — relevant to AI safety and model supply-chain security."
    364     },
    365     {
    366       "title": "GenAI against humanity: Nefarious applications of generative artificial intelligence and large language models",
    367       "authors": ["E. Ferrara"],
    368       "year": 2024,
    369       "relevance": "Proposes the taxonomy of GenAI harms that this paper expands — foundational to understanding AI misuse risks."
    370     },
    371     {
    372       "title": "Fairness and bias in artificial intelligence: A brief survey of sources, impacts, and mitigation strategies",
    373       "authors": ["E. Ferrara"],
    374       "year": 2024,
    375       "relevance": "Surveys AI bias and fairness issues — relevant to understanding differential harms from AI systems."
    376     }
    377   ]
    378 }

Impressum · Datenschutz