scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (20588B)
      1 {
      2   "paper": {
      3     "title": "Precedent-Based Professional Role Ethics for AI Decision Analysis",
      4     "authors": ["Christopher Rauch"],
      5     "year": 2025,
      6     "venue": "AAAI/ACM Conference on AI, Ethics, and Society (AIES 2025)",
      7     "doi": "10.1609/aies.v8i3.36794"
      8   },
      9   "scan_version": 3,
     10   "active_modules": [],
     11   "methodology_tags": ["theoretical", "case-study"],
     12   "key_findings": "The paper introduces ProEthica, a system under development that combines LLMs with role-based ontologies and precedent retrieval for structured ethical reasoning in professional settings. Using NSPE engineering ethics cases as a demonstration domain, it proposes integrating vector similarity search, ontological mappings, FIRAC-based validation, and constraint enforcement. No quantitative results or system outputs are presented; the paper describes architecture and design intent only.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No source code, repository URL, or archive link is provided anywhere in the paper. The system is described as 'under development' with no released artifacts."
     19       },
     20       "data_released": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "No dataset is released. The paper references NSPE Board of Ethical Review cases as the demonstration domain but provides no data files, case corpus, or ontology files."
     24       },
     25       "environment_specified": {
     26         "applies": true,
     27         "answer": false,
     28         "justification": "No environment specifications, dependency lists, or technical infrastructure details are provided. The paper does not mention any specific software libraries, frameworks, or hardware."
     29       },
     30       "reproduction_instructions": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "No reproduction instructions are provided. The system is described at a high conceptual level without implementation details sufficient to reproduce it."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "No quantitative experiments or numerical results are reported. The paper is a theoretical system description with no data to attach confidence intervals to."
     41       },
     42       "significance_tests": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No quantitative comparative claims are made with data. The paper presents no numerical results requiring significance testing."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No quantitative results are reported. There are no effect sizes to measure."
     51       },
     52       "sample_size_justified": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "No experiments are conducted. The paper is a theoretical system description."
     56       },
     57       "variance_reported": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "No experimental runs are conducted, so there is no variance to report."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The Preliminary Results section mentions 'Compared to baseline LLM outputs' but provides no actual baseline data, comparison tables, examples, or metrics. The comparison is asserted but not shown."
     68       },
     69       "baselines_contemporary": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No actual baseline comparison is presented. The paper references prior systems (SIROCCO, MedEthEx) from 2003 or earlier but does not compare against them empirically."
     73       },
     74       "ablation_study": {
     75         "applies": true,
     76         "answer": false,
     77         "justification": "ProEthica has multiple components (ontology, vector retrieval, ontological mapping, FIRAC validation, constraint enforcement) but no ablation study is conducted to assess individual component contributions."
     78       },
     79       "multiple_metrics": {
     80         "applies": true,
     81         "answer": false,
     82         "justification": "No evaluation metrics of any kind are reported. The Ongoing Work section mentions planned metrics ('consistency, explainability, and alignment with domain codes') but none are measured."
     83       },
     84       "human_evaluation": {
     85         "applies": true,
     86         "answer": false,
     87         "justification": "No human evaluation is conducted. The Ongoing Work section mentions 'Planned pilot studies will investigate usability and practitioner trust' but these have not been performed."
     88       },
     89       "held_out_test_set": {
     90         "applies": true,
     91         "answer": false,
     92         "justification": "No formal test set is defined or used. The paper mentions NSPE cases for 'initial testing' but provides no details on how cases were split or selected."
     93       },
     94       "per_category_breakdown": {
     95         "applies": true,
     96         "answer": false,
     97         "justification": "No results of any kind are broken down by category, case type, or ethical principle."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": false,
    102         "justification": "No failure cases are discussed. The paper presents only positive descriptions of system capabilities."
    103       },
    104       "negative_results_reported": {
    105         "applies": true,
    106         "answer": false,
    107         "justification": "No negative results are reported. Every statement about system performance is positive and unqualified."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": false,
    114         "justification": "The abstract claims 'Preliminary evaluations using NSPE cases indicate that it can retrieve relevant precedents and produce structured analyses that align with engineering ethics.' No evaluation data, examples, or metrics support this claim in the paper body."
    115       },
    116       "causal_claims_justified": {
    117         "applies": true,
    118         "answer": false,
    119         "justification": "The paper makes causal-style claims such as 'The incorporation of professional codes appears to support consistent ethical responses' and 'Precedent-based case matching contributes to improved reasoning quality' without any controlled study, ablation, or quantitative evidence."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The title says 'AI Decision Analysis' broadly. While the paper mentions engineering ethics as a demonstration domain, it also claims the approach generalizes to healthcare and law without evidence: 'Development is ongoing to extend the ProEthica ontology to include additional professional domains.'"
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": true,
    128         "answer": false,
    129         "justification": "No alternative explanations for claimed results are discussed. The paper does not consider whether observed improvements could be due to factors other than the proposed system design."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": true,
    133         "answer": false,
    134         "justification": "The paper claims 'improved alignment with domain-specific standards' and 'improved reasoning quality' without defining these outcomes, specifying how they were measured, or distinguishing between the proxy measured and the outcome claimed."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": true,
    140         "answer": false,
    141         "justification": "The paper refers to 'LLMs' and 'large language models' throughout but never specifies which model, version, or API was used."
    142       },
    143       "prompts_provided": {
    144         "applies": true,
    145         "answer": false,
    146         "justification": "The paper describes the FIRAC framework and constraint validation conceptually but provides no actual prompts, system instructions, or prompt templates used with the LLM."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": true,
    150         "answer": false,
    151         "justification": "No hyperparameters (temperature, top-p, embedding dimensions, retrieval thresholds, etc.) are reported."
    152       },
    153       "scaffolding_described": {
    154         "applies": true,
    155         "answer": false,
    156         "justification": "The paper describes ProEthica's architecture at a high conceptual level (ontology + retrieval + FIRAC validation + constraint enforcement) but provides no implementation details: no workflow diagrams, no tool descriptions, no retry logic, no specifics of the ontological mapping or constraint validation process."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": true,
    160         "answer": false,
    161         "justification": "No description of how NSPE cases were processed, encoded, or prepared for the system. No mention of how the ontology was constructed from engineering ethics codes."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "There is no limitations section. The Ongoing Work section describes future plans but does not discuss current limitations."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": false,
    173         "justification": "No threats to validity are discussed anywhere in the paper."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": false,
    178         "justification": "No explicit scope boundaries are stated. The paper does not clarify what its preliminary results do NOT show or what settings are excluded."
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": true,
    184         "answer": false,
    185         "justification": "No raw data is available. The NSPE cases used for evaluation are not provided, nor are system outputs or evaluation records."
    186       },
    187       "data_collection_described": {
    188         "applies": true,
    189         "answer": false,
    190         "justification": "The paper mentions using 'NSPE cases' and 'Board of Ethical Review precedents' but does not describe how many cases were used, how they were selected, or what time period they cover."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": false,
    194         "answer": false,
    195         "justification": "No human participants are involved. The data source (NSPE Board of Ethical Review cases) is a known institutional corpus, not a recruited sample."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": true,
    199         "answer": false,
    200         "justification": "No data pipeline is documented. The path from NSPE case documents to system input to evaluation output is entirely undescribed."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": false,
    207         "justification": "No funding information is disclosed. There is no acknowledgments section or funding statement in the paper."
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "The author's affiliation with Drexel University is clearly stated. The paper does not evaluate any commercial product the author has a stake in."
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": true,
    216         "answer": false,
    217         "justification": "No funding source is disclosed, so independence of the funder cannot be verified."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": false,
    222         "justification": "No competing interests or financial interests statement is present in the paper."
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "The paper does not evaluate a pre-trained model's capability on any benchmark. It describes a system architecture for ethical reasoning without testing model knowledge."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "No benchmark evaluation of model capabilities is conducted. The paper is a system design description."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "No benchmark evaluation is performed. The system uses NSPE cases as domain knowledge, not as a model capability benchmark."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants are involved in this study."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants are involved in this study."
    252       },
    253       "demographics_reported": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants are involved in this study."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants are involved in this study."
    262       },
    263       "randomization_described": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants are involved in this study."
    267       },
    268       "blinding_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "No human participants are involved in this study."
    272       },
    273       "attrition_reported": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "No human participants are involved in this study."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "This is a theoretical system description paper with no empirical evaluation. Cost reporting is not applicable."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "This is a theoretical system description paper. No experiments were run, so there is no compute budget to state."
    289       }
    290     }
    291   },
    292   "claims": [
    293     {
    294       "claim": "ProEthica can retrieve relevant precedents and produce structured analyses that align with engineering ethics.",
    295       "evidence": "Abstract and Preliminary Results section assert this based on 'initial testing' with NSPE cases. No data, examples, or metrics are provided to support the claim.",
    296       "supported": "unsupported"
    297     },
    298     {
    299       "claim": "ProEthica shows improved alignment with domain-specific standards compared to baseline LLM outputs.",
    300       "evidence": "Preliminary Results states 'Compared to baseline LLM outputs, evaluations indicate improved alignment with domain-specific standards.' No baseline data, comparison methodology, or metrics are provided.",
    301       "supported": "unsupported"
    302     },
    303     {
    304       "claim": "The incorporation of professional codes supports consistent ethical responses.",
    305       "evidence": "Preliminary Results states this 'aligning with findings that emphasize the value of converting principles into rules and validation procedures (Prem 2023).' The support comes from citing other work, not from the authors' own evaluation.",
    306       "supported": "unsupported"
    307     },
    308     {
    309       "claim": "Structured validation mechanisms enable traceable output review.",
    310       "evidence": "Preliminary Results asserts this claim with reference to Chhabra et al. 2024, but no examples of traceable output review from ProEthica itself are provided.",
    311       "supported": "unsupported"
    312     }
    313   ],
    314   "red_flags": [
    315     {
    316       "flag": "Claims significantly outrun evidence",
    317       "detail": "The abstract and Preliminary Results section make multiple claims about system performance ('improved alignment,' 'improved reasoning quality,' 'consistent ethical responses') but the paper contains zero quantitative results, zero examples of system output, zero evaluation metrics. Every performance claim is unsubstantiated."
    318     },
    319     {
    320       "flag": "Preliminary results cite others' work instead of own evaluation",
    321       "detail": "The Preliminary Results section supports its claims by citing Bai et al. 2022, Abel et al. 2016, Prem 2023, Ashley & McLaren 1995, and Chhabra et al. 2024 rather than presenting ProEthica's own evaluation data. This creates an illusion of evidence from borrowed authority."
    322     },
    323     {
    324       "flag": "System under development presented as evaluated",
    325       "detail": "The abstract says the system is 'under development' yet the Preliminary Results section discusses evaluation findings. No actual evaluation methodology, data, or results are disclosed, making verification impossible."
    326     },
    327     {
    328       "flag": "No limitations or threats to validity discussed",
    329       "detail": "For a system that proposes to support ethical decision-making in high-stakes professional domains (healthcare, law, engineering), the complete absence of any limitations discussion is a significant omission."
    330     }
    331   ],
    332   "cited_papers": [
    333     {
    334       "title": "Constitutional AI: Harmlessness from AI Feedback",
    335       "authors": ["Yuntao Bai", "Saurav Kadavath", "Sandipan Kundu"],
    336       "year": 2022,
    337       "relevance": "Foundational work on aligning AI outputs with ethical constraints through constitutional principles, directly relevant to AI safety and alignment methodology."
    338     },
    339     {
    340       "title": "Reinforcement Learning as a Framework for Ethical Decision Making",
    341       "authors": ["David Abel", "James MacGlashan", "Michael L. Littman"],
    342       "year": 2016,
    343       "relevance": "Proposes RL-based ethical decision-making framework, relevant to computational approaches for AI safety and value alignment."
    344     },
    345     {
    346       "title": "Evaluating Computational Models of Ethics for Autonomous Decision Making",
    347       "authors": ["Jatin Chhabra", "Kriti Sama", "Jayesh Deshmukh", "Srinivasa Srinivasa"],
    348       "year": 2024,
    349       "relevance": "SPECTRA testbed for evaluating ethical AI models across paradigms, relevant to methodology for assessing AI ethical reasoning capabilities."
    350     },
    351     {
    352       "title": "The Ethics of ChatGPT in Medicine and Healthcare: A Systematic Review on Large Language Models (LLMs)",
    353       "authors": ["Joschka Haltaufderheide", "Robert Ranisch"],
    354       "year": 2024,
    355       "relevance": "Systematic review documenting LLM limitations in professional healthcare settings, relevant to understanding risks of LLM deployment."
    356     },
    357     {
    358       "title": "From ethical AI frameworks to tools: a review of approaches",
    359       "authors": ["Erich Prem"],
    360       "year": 2023,
    361       "relevance": "Reviews approaches for translating ethical AI principles into practical tools, directly relevant to the operationalization challenge in AI ethics."
    362     },
    363     {
    364       "title": "Moral Imitation: Can an Algorithm Really Be Ethical?",
    365       "authors": ["Anupam Puri"],
    366       "year": 2020,
    367       "relevance": "Examines limitations of algorithmic ethical reasoning, relevant to the fundamental question of whether AI can enforce domain-specific ethical standards."
    368     }
    369   ],
    370   "engagement_factors": {
    371     "practical_relevance": {
    372       "score": 1,
    373       "justification": "Describes a potentially useful system for ethical reasoning in professional domains, but nothing is implemented or available for use."
    374     },
    375     "surprise_contrarian": {
    376       "score": 1,
    377       "justification": "The framing of professional role ethics for AI is somewhat novel but the idea of combining ontologies with LLMs for ethical reasoning is not surprising."
    378     },
    379     "fear_safety": {
    380       "score": 1,
    381       "justification": "Touches on AI safety in high-stakes professional domains (healthcare, law, engineering) but presents no novel risk findings."
    382     },
    383     "drama_conflict": {
    384       "score": 0,
    385       "justification": "No controversy, no critique of existing approaches beyond general limitations, no provocative claims."
    386     },
    387     "demo_ability": {
    388       "score": 0,
    389       "justification": "No code, demo, or working system is available. The system is explicitly described as 'under development.'"
    390     },
    391     "brand_recognition": {
    392       "score": 0,
    393       "justification": "Solo researcher from Drexel University; not a well-known lab or associated with a major AI product."
    394     }
    395   }
    396 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs