scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (25923B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "LESSON: Multi-Label Adversarial False Data Injection Attack for Deep Learning Locational Detection",
      6     "authors": [
      7       "Jiwei Tian",
      8       "Chao Shen",
      9       "Buhong Wang",
     10       "Xiaofang Xia",
     11       "Meng Zhang",
     12       "Chenhao Lin",
     13       "Qian Li"
     14     ],
     15     "year": 2024,
     16     "venue": "IEEE Transactions on Dependable and Secure Computing",
     17     "arxiv_id": "2401.16001",
     18     "doi": "10.1109/TDSC.2024.3353302"
     19   },
     20   "checklist": {
     21     "claims_and_evidence": {
     22       "abstract_claims_supported": {
     23         "applies": true,
     24         "answer": true,
     25         "justification": "The abstract claims that LESSON achieves effectiveness against multi-label FDIA locational detectors; experiments on three IEEE test systems (14-, 30-, 118-bus) confirm high attack success rates across all four variants.",
     26         "source": "haiku"
     27       },
     28       "causal_claims_justified": {
     29         "applies": true,
     30         "answer": true,
     31         "justification": "Causal claims (e.g., increasing FDIA scale reduces success rate, larger grids are more vulnerable) are supported by controlled variation of a single factor across multiple conditions; the mathematical derivation also grounds the BDD-evasion guarantee.",
     32         "source": "haiku"
     33       },
     34       "generalization_bounded": {
     35         "applies": true,
     36         "answer": false,
     37         "justification": "The conclusion claims the attack 'poses a serious and imperative security breach and risk for practical large-scale power systems,' but experiments are limited to three simulated IEEE test systems using DC state estimation; the paper itself concedes no conclusive evidence for the grid-size relationship.",
     38         "source": "haiku"
     39       },
     40       "alternative_explanations_discussed": {
     41         "applies": true,
     42         "answer": false,
     43         "justification": "The paper does not consider whether the high success rates stem from weaknesses specific to the CNN architectures chosen (single family, no architectural variants tested), nor whether the simulated Matpower data makes the models particularly easy to attack.",
     44         "source": "haiku"
     45       },
     46       "proxy_outcome_distinction": {
     47         "applies": true,
     48         "answer": true,
     49         "justification": "Claims are about attack success rate (Psuc) and perturbation magnitude (ρc, ρa), which are exactly what is measured; the paper does not conflate these with broader real-world security impact beyond noting the threat implications.",
     50         "source": "haiku"
     51       }
     52     },
     53     "limitations_and_scope": {
     54       "limitations_section_present": {
     55         "applies": true,
     56         "answer": false,
     57         "justification": "Limitations are discussed in the conclusion paragraphs (white-box assumption, ideal attack cost, AC/DC gap, no defense methods) but there is no dedicated limitations or threats-to-validity section.",
     58         "source": "haiku"
     59       },
     60       "threats_to_validity_specific": {
     61         "applies": true,
     62         "answer": true,
     63         "justification": "Specific threats are identified: the white-box full-knowledge assumption 'may not always be accurate,' the attack cost model is described as 'ideal,' and DC-only modeling is explicitly flagged as a scope limitation requiring future AC investigation.",
     64         "source": "haiku"
     65       },
     66       "scope_boundaries_stated": {
     67         "applies": true,
     68         "answer": true,
     69         "justification": "The paper explicitly bounds scope to DC state estimation, white-box adversarial setting, CNN-based NAL models, and three IEEE standard test systems; future black-box extensions are noted as out of scope.",
     70         "source": "haiku"
     71       }
     72     },
     73     "conflicts_of_interest": {
     74       "funding_disclosed": {
     75         "applies": true,
     76         "answer": true,
     77         "justification": "Multiple funding sources are disclosed in the acknowledgment footnote, including the National Key R&D Program of China (2021YFB3100700), NSFC grants, Shaanxi Province programs, and China Postdoctoral Science Foundation.",
     78         "source": "haiku"
     79       },
     80       "affiliations_disclosed": {
     81         "applies": true,
     82         "answer": true,
     83         "justification": "All author affiliations are stated: Xi'an Jiaotong University, Air Force Engineering University (two authors), and Xidian University.",
     84         "source": "haiku"
     85       },
     86       "funder_independent_of_outcome": {
     87         "applies": true,
     88         "answer": true,
     89         "justification": "Funders are Chinese government research programs and universities with no financial stake in the specific attack framework outcome.",
     90         "source": "haiku"
     91       },
     92       "financial_interests_declared": {
     93         "applies": true,
     94         "answer": false,
     95         "justification": "No competing interests or financial interests statement appears anywhere in the paper.",
     96         "source": "haiku"
     97       }
     98     },
     99     "scope_and_framing": {
    100       "key_terms_defined": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "All key terms (FDIA, AFDIA, BDD, NAD, NAL, multi-label learning) are defined with mathematical notation; a full nomenclature table is provided at the start.",
    104         "source": "haiku"
    105       },
    106       "intended_contribution_clear": {
    107         "applies": true,
    108         "answer": true,
    109         "justification": "Three explicit contributions are enumerated: (1) the general LESSON framework with three key designs, (2) four typical attack variants based on two-dimensional objectives, and (3) extensive experimental analyses with influencing factors.",
    110         "source": "haiku"
    111       },
    112       "engagement_with_prior_work": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "The paper has structured related work covering DL-based FDIA detection/localization, single-label AFDIA, and multi-label adversarial examples; it explicitly positions LESSON as extending prior single-label AFDIA work (refs [19],[20]) to multi-label settings.",
    116         "source": "haiku"
    117       }
    118     }
    119   },
    120   "type_checklist": {
    121     "empirical": {
    122       "artifacts": {
    123         "code_released": {
    124           "applies": true,
    125           "answer": false,
    126           "justification": "No code repository is mentioned or linked; no promise of release is made.",
    127           "source": "haiku"
    128         },
    129         "data_released": {
    130           "applies": true,
    131           "answer": false,
    132           "justification": "The datasets are synthetically generated from Matpower simulations; while Matpower is public, the specific 30,000-sample datasets with the precise generation parameters applied are not released.",
    133           "source": "haiku"
    134         },
    135         "environment_specified": {
    136           "applies": true,
    137           "answer": false,
    138           "justification": "Only 'PyTorch' is mentioned without a version; no requirements file, Dockerfile, or environment specification is provided.",
    139           "source": "haiku"
    140         },
    141         "reproduction_instructions": {
    142           "applies": true,
    143           "answer": false,
    144           "justification": "The data generation procedure and CNN architectures are described in text and Table II, but no step-by-step reproduction script or workflow is provided.",
    145           "source": "haiku"
    146         }
    147       },
    148       "statistical_methodology": {
    149         "confidence_intervals_or_error_bars": {
    150           "applies": true,
    151           "answer": false,
    152           "justification": "All Psuc values are reported as single point estimates; no confidence intervals or error bars appear in any figure or table.",
    153           "source": "haiku"
    154         },
    155         "significance_tests": {
    156           "applies": true,
    157           "answer": false,
    158           "justification": "No statistical significance tests are applied to any comparative results, including cross-system or cross-scale comparisons.",
    159           "source": "haiku"
    160         },
    161         "effect_sizes_reported": {
    162           "applies": true,
    163           "answer": true,
    164           "justification": "Attack success rates (Psuc), state-variable perturbation magnitudes (ρc), and measurement perturbation magnitudes (ρa) are reported numerically, providing contextual effect size information.",
    165           "source": "haiku"
    166         },
    167         "sample_size_justified": {
    168           "applies": true,
    169           "answer": false,
    170           "justification": "500 samples per condition are used for attack evaluation with no justification for this choice; no power analysis is mentioned.",
    171           "source": "haiku"
    172         },
    173         "variance_reported": {
    174           "applies": true,
    175           "answer": false,
    176           "justification": "No standard deviation, variance, or run-to-run variability is reported for any experimental result.",
    177           "source": "haiku"
    178         }
    179       },
    180       "evaluation_design": {
    181         "baselines_included": {
    182           "applies": true,
    183           "answer": false,
    184           "justification": "No prior adversarial attack methods are included as baselines; the four LESSON variants are compared only against each other, not against any existing AFDIA approach.",
    185           "source": "haiku"
    186         },
    187         "baselines_contemporary": {
    188           "applies": true,
    189           "answer": false,
    190           "justification": "No baseline attack comparisons exist; the paper evaluates only its own variants without comparing to contemporary single-label AFDIA methods (e.g., from refs [19],[20]).",
    191           "source": "haiku"
    192         },
    193         "ablation_study": {
    194           "applies": true,
    195           "answer": true,
    196           "justification": "The four LESSON variants systematically vary two attack objective dimensions; additional sensitivity analyses on learning rate (6 values) and state perturbation range (two µ values) serve as component-level ablations.",
    197           "source": "haiku"
    198         },
    199         "multiple_metrics": {
    200           "applies": true,
    201           "answer": true,
    202           "justification": "Three metrics are reported: attack success rate (Psuc), state-variable perturbation norm (ρc), and measurement perturbation norm (ρa).",
    203           "source": "haiku"
    204         },
    205         "human_evaluation": {
    206           "applies": false,
    207           "answer": false,
    208           "justification": "This is a fully automated cyber-attack framework evaluated on simulated power systems; human evaluation is not applicable.",
    209           "source": "haiku"
    210         },
    211         "held_out_test_set": {
    212           "applies": true,
    213           "answer": true,
    214           "justification": "The 30,000 samples are split 2:1 into 20,000 training and 10,000 testing; attack evaluation uses only correctly predicted test samples.",
    215           "source": "haiku"
    216         },
    217         "per_category_breakdown": {
    218           "applies": true,
    219           "answer": true,
    220           "justification": "Results are broken down across power system scale (14-, 30-, 118-bus), attack variant (LESSON-1 to -4), FDIA scale (small/medium/large), and learning rate values.",
    221           "source": "haiku"
    222         },
    223         "failure_cases_discussed": {
    224           "applies": true,
    225           "answer": true,
    226           "justification": "Conditions under which success rates drop sharply are analyzed: large FDIA scale reduces LESSON-4 to ~24% on 14-bus; excessively large or small learning rates are shown to collapse success rates to near 0%.",
    227           "source": "haiku"
    228         },
    229         "negative_results_reported": {
    230           "applies": true,
    231           "answer": true,
    232           "justification": "Reduced perturbation range (µ=0.5 vs 1.0), large FDIA scale scenarios, and certain learning rate choices all yield substantially lower success rates; these are reported transparently.",
    233           "source": "haiku"
    234         }
    235       },
    236       "setup_transparency": {
    237         "model_versions_specified": {
    238           "applies": true,
    239           "answer": false,
    240           "justification": "PyTorch is named as the framework but no version is specified; CNN architectures are given in Table II but these are author-trained models, and the training hyperparameters are only partially given.",
    241           "source": "haiku"
    242         },
    243         "prompts_provided": {
    244           "applies": false,
    245           "answer": false,
    246           "justification": "No LLMs or prompt-based systems are used; this is a classical deep learning attack paper.",
    247           "source": "haiku"
    248         },
    249         "hyperparameters_reported": {
    250           "applies": true,
    251           "answer": true,
    252           "justification": "Adam optimizer is used; initial learning rates (six values tested), µ values (1 rad and 0.5 rad), max iterations (500), and threshold χ=0.5 are all reported.",
    253           "source": "haiku"
    254         },
    255         "scaffolding_described": {
    256           "applies": false,
    257           "answer": false,
    258           "justification": "No agentic scaffolding is involved; the paper proposes a classical optimization-based adversarial attack.",
    259           "source": "haiku"
    260         },
    261         "data_preprocessing_documented": {
    262           "applies": true,
    263           "answer": true,
    264           "justification": "Data generation from Matpower is described in detail: load distribution U(80%,120% baseload), noise model (zero-mean Gaussian at 2% of meter mean), FDIA vector generation (uniform number of targets, Gaussian magnitudes at three variance levels).",
    265           "source": "haiku"
    266         }
    267       },
    268       "data_integrity": {
    269         "raw_data_available": {
    270           "applies": true,
    271           "answer": false,
    272           "justification": "The generated datasets are not publicly released; only Matpower (the source tool) is publicly available.",
    273           "source": "haiku"
    274         },
    275         "data_collection_described": {
    276           "applies": true,
    277           "answer": true,
    278           "justification": "The synthetic data generation process is described precisely: 30,000 samples total, 15,000 normal and 15,000 attacked, at three FDIA scales (5,000 each), 2:1 train/test split.",
    279           "source": "haiku"
    280         },
    281         "recruitment_methods_described": {
    282           "applies": false,
    283           "answer": false,
    284           "justification": "No human participants; data is entirely synthetic from power system simulation.",
    285           "source": "haiku"
    286         },
    287         "data_pipeline_documented": {
    288           "applies": true,
    289           "answer": true,
    290           "justification": "The full pipeline from Matpower topology → load sampling → noise injection → FDIA vector construction → dataset split is documented in Section IV-A.",
    291           "source": "haiku"
    292         }
    293       },
    294       "contamination": {
    295         "training_cutoff_stated": {
    296           "applies": false,
    297           "answer": false,
    298           "justification": "No pre-trained language or foundation models are evaluated on benchmarks; the CNN models are trained by the authors on self-generated data, so training cutoff contamination is not applicable.",
    299           "source": "haiku"
    300         },
    301         "train_test_overlap_discussed": {
    302           "applies": false,
    303           "answer": false,
    304           "justification": "Not applicable; all data is synthetically generated and explicitly split before training.",
    305           "source": "haiku"
    306         },
    307         "benchmark_contamination_addressed": {
    308           "applies": false,
    309           "answer": false,
    310           "justification": "No pre-trained models are evaluated on standard benchmarks; contamination is not a relevant concern.",
    311           "source": "haiku"
    312         }
    313       },
    314       "human_studies": {
    315         "pre_registered": {
    316           "applies": false,
    317           "answer": false,
    318           "justification": "No human participants.",
    319           "source": "haiku"
    320         },
    321         "irb_or_ethics_approval": {
    322           "applies": false,
    323           "answer": false,
    324           "justification": "No human participants.",
    325           "source": "haiku"
    326         },
    327         "demographics_reported": {
    328           "applies": false,
    329           "answer": false,
    330           "justification": "No human participants.",
    331           "source": "haiku"
    332         },
    333         "inclusion_exclusion_criteria": {
    334           "applies": false,
    335           "answer": false,
    336           "justification": "No human participants.",
    337           "source": "haiku"
    338         },
    339         "randomization_described": {
    340           "applies": false,
    341           "answer": false,
    342           "justification": "No human participants.",
    343           "source": "haiku"
    344         },
    345         "blinding_described": {
    346           "applies": false,
    347           "answer": false,
    348           "justification": "No human participants.",
    349           "source": "haiku"
    350         },
    351         "attrition_reported": {
    352           "applies": false,
    353           "answer": false,
    354           "justification": "No human participants.",
    355           "source": "haiku"
    356         }
    357       },
    358       "cost_and_practicality": {
    359         "inference_cost_reported": {
    360           "applies": true,
    361           "answer": false,
    362           "justification": "No latency or computational cost for generating adversarial perturbations (running Adam for up to 500 iterations) is reported.",
    363           "source": "haiku"
    364         },
    365         "compute_budget_stated": {
    366           "applies": true,
    367           "answer": false,
    368           "justification": "No GPU, CPU, or wall-clock time budget is mentioned anywhere in the paper.",
    369           "source": "haiku"
    370         }
    371       }
    372     }
    373   },
    374   "claims": [
    375     {
    376       "claim": "LESSON achieves 100% attack success rate across all four variants on the 118-bus system with small-scale FDIA.",
    377       "evidence": "Experimental results in Fig. 2(a) show Psuc=100% for all LESSON-1 through LESSON-4 on the 118-bus system at small FDIA scale.",
    378       "supported": "strong"
    379     },
    380     {
    381       "claim": "Even the hardest attack variant (LESSON-4) achieves >60% success on 14-bus and 30-bus systems.",
    382       "evidence": "Figure 2(a) shows LESSON-4 exceeds 60% on both smaller systems for small-scale FDIA; stated explicitly in the experimental analysis.",
    383       "supported": "strong"
    384     },
    385     {
    386       "claim": "Increasing FDIA attack scale significantly reduces attack success rate.",
    387       "evidence": "LESSON-2 on 14-bus drops from 98.73% (small) to 74.42% (medium) to 24.37% (large), shown in Fig. 3(a).",
    388       "supported": "strong"
    389     },
    390     {
    391       "claim": "Larger power grid scale corresponds to higher LESSON attack success rates.",
    392       "evidence": "The 118-bus system consistently outperforms 14-bus and 30-bus systems; for large FDIA scale, 118-bus LESSON-1 achieves 80.52% vs substantially lower rates on smaller systems.",
    393       "supported": "moderate"
    394     },
    395     {
    396       "claim": "Requiring the original induced estimation error to remain unchanged (Objective One type 2) is a greater difficulty driver than requiring all meter labels to appear normal.",
    397       "evidence": "Comparative analysis of LESSON-1 vs LESSON-3 and LESSON-2 vs LESSON-4 shows greater success rate reduction from the estimation-error constraint than from the all-labels constraint.",
    398       "supported": "moderate"
    399     },
    400     {
    401       "claim": "Adversarial perturbations generated by LESSON are guaranteed to evade BDD detection by design.",
    402       "evidence": "Mathematical proof shows that perturbing state variables via Hζ leaves BDD residuals unchanged (constraint 12c removed), following the derivation in referenced prior work.",
    403       "supported": "strong"
    404     }
    405   ],
    406   "methodology_tags": [
    407     "benchmark-eval",
    408     "theoretical"
    409   ],
    410   "key_findings": "The LESSON framework successfully generates physically-constrained adversarial perturbations that bypass both Bad Data Detection (BDD) and CNN-based multi-label Neural Attack Location (NAL) in simulated smart grid environments, achieving up to 100% success rates on the 118-bus system. Attack difficulty is primarily determined by whether the original estimation error must be preserved (Objective One), with the requirement to hide all meter labels (Objective Two) being a secondary factor. Larger FDIA attack scales reduce success rates due to physical perturbation constraints, while larger power grid scale paradoxically increases attacker success. The white-box assumption is acknowledged as a limitation, with black-box extensions deferred to future work.",
    411   "red_flags": [
    412     {
    413       "flag": "No attack baselines",
    414       "detail": "The paper evaluates four of its own variants against each other but never compares LESSON against any prior adversarial attack method, making it impossible to assess how much the multi-label framing adds over existing AFDIA approaches."
    415     },
    416     {
    417       "flag": "White-box unrealism",
    418       "detail": "The threat model assumes full attacker knowledge of NAL architecture and parameters; the paper acknowledges this is not always realistic but presents no degradation analysis for partial or no model knowledge."
    419     },
    420     {
    421       "flag": "Single model family",
    422       "detail": "All NAL targets are CNN variants with the same basic architecture; robustness of the attack against alternative architectures (GNNs, transformers, ensemble methods) is untested despite those being common in the literature."
    423     },
    424     {
    425       "flag": "Simulated data only",
    426       "detail": "All experiments use Matpower-generated synthetic data with idealized load distributions; no real-world measurement traces are used, and the gap between simulation and real power systems is not quantified."
    427     },
    428     {
    429       "flag": "No confidence intervals or variance",
    430       "detail": "All success rates are single point estimates from a single experimental run; no variance across random seeds, dataset splits, or repeated trials is reported."
    431     },
    432     {
    433       "flag": "Unsupported grid-scale generalization",
    434       "detail": "The conclusion warns of 'serious risk for practical large-scale power systems' but only three small IEEE test systems (14, 30, 118 buses) are tested; the paper itself concedes no conclusive evidence for the grid-size relationship."
    435     },
    436     {
    437       "flag": "No code or dataset release",
    438       "detail": "Neither the attack implementation nor the generated datasets are released, preventing independent verification or application to other systems."
    439     }
    440   ],
    441   "cited_papers": [
    442     {
    443       "title": "False data injection attacks against state estimation in electric power grids",
    444       "relevance": "Foundational FDIA paper establishing the mathematical conditions for stealthy attacks; the BDD-evasion guarantee in LESSON directly builds on this."
    445     },
    446     {
    447       "title": "Joint adversarial example and false data injection attacks for state estimation in power systems",
    448       "relevance": "Direct predecessor work proposing single-label AFDIA; LESSON extends this framework to multi-label settings."
    449     },
    450     {
    451       "title": "Exploring targeted and stealthy false data injection attacks via adversarial machine learning",
    452       "relevance": "Extended prior work introducing targeted single-label AFDIA; LESSON-3 and LESSON-4 are direct multi-label analogs."
    453     },
    454     {
    455       "title": "Locational detection of the false data injection attack in a smart grid: A multilabel classification approach",
    456       "relevance": "Establishes the multi-label CNN framework for FDIA localization that LESSON attacks."
    457     },
    458     {
    459       "title": "ConAML: Constrained adversarial machine learning for cyber-physical systems",
    460       "relevance": "Introduces physical constraint handling in adversarial attacks on power systems, a technique LESSON incorporates."
    461     },
    462     {
    463       "title": "Multi-label adversarial perturbations",
    464       "relevance": "First multi-label adversarial framework in image domain that LESSON adapts to power grid context."
    465     },
    466     {
    467       "title": "Domain knowledge alleviates adversarial attacks in multi-label classifiers",
    468       "relevance": "Discusses domain-knowledge-based defenses against multi-label adversarial examples, cited as future defense direction."
    469     },
    470     {
    471       "title": "A review on multi-label learning algorithms",
    472       "relevance": "Background on multi-label learning problem formulation used to set up the FDIA localization problem."
    473     },
    474     {
    475       "title": "Towards deep learning models resistant to adversarial attacks",
    476       "relevance": "PGD adversarial training defense and projected gradient methods referenced for the constrained optimization approach."
    477     }
    478   ],
    479   "engagement_factors": {
    480     "practical_relevance": {
    481       "score": 2,
    482       "justification": "Smart grid security is practically important, but the white-box assumption and simulation-only setting limit immediate practitioner applicability."
    483     },
    484     "surprise_contrarian": {
    485       "score": 1,
    486       "justification": "The finding that larger grid scale increases attacker success is somewhat counterintuitive, but the overall direction (deep learning is vulnerable to adversarial attacks) is expected."
    487     },
    488     "fear_safety": {
    489       "score": 2,
    490       "justification": "Power grid cyber-attacks with 100% success rates against state-of-the-art detectors raise legitimate critical infrastructure concerns."
    491     },
    492     "drama_conflict": {
    493       "score": 1,
    494       "justification": "Framed as an adversarial arms race but no controversy or community conflict is involved."
    495     },
    496     "demo_ability": {
    497       "score": 1,
    498       "justification": "Requires Matpower and a specific experimental setup; no public code or demo is available."
    499     },
    500     "brand_recognition": {
    501       "score": 0,
    502       "justification": "Authors are from Xi'an Jiaotong University and Air Force Engineering University; no famous lab or product association."
    503     }
    504   },
    505   "hn_data": {
    506     "threads": [
    507       {
    508         "hn_id": "39375235",
    509         "title": "Study Reveals Gender Bias in ChatGPT Translations",
    510         "points": 2,
    511         "comments": 0,
    512         "url": "https://news.ycombinator.com/item?id=39375235"
    513       }
    514     ],
    515     "top_points": 2,
    516     "total_points": 2,
    517     "total_comments": 0
    518   }
    519 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs