scan-v4.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v4.json (32239B)
      1 {
      2   "scan_version": 4,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Exploring adversarial robustness of JPEG AI: methodology, comparison and new methods",
      6     "authors": [
      7       "Egor Kovalev",
      8       "Georgii Bychkov",
      9       "Khaled Abud",
     10       "Aleksandr Gushchin",
     11       "Anna Chistyakova",
     12       "Sergey Lavrushkin",
     13       "Dmitriy Vatolin",
     14       "Anastasia Antsiferova"
     15     ],
     16     "year": 2024,
     17     "venue": "arXiv.org",
     18     "arxiv_id": "2411.11795",
     19     "doi": "10.48550/arXiv.2411.11795"
     20   },
     21   "checklist": {
     22     "claims_and_evidence": {
     23       "abstract_claims_supported": {
     24         "applies": true,
     25         "answer": true,
     26         "justification": "The abstract claims: new methodology for NIC robustness (supported by Section 4), first large-scale evaluation of JPEG AI (supported by Section 5 with 10 codecs, 6 attacks, 10 loss functions), and comparison with other NICs (supported by Figs. 2–9). Code availability claim is technically unfulfilled due to blind review.",
     27         "source": "opus"
     28       },
     29       "causal_claims_justified": {
     30         "applies": true,
     31         "answer": false,
     32         "justification": "The paper makes causal claims such as 'adversarial noise causes significant changes in latent representation, yielding noticeable quality degradation' (Section 5.2 on CDC), and 'This model may be less robust by design.' These claims are based on observational comparison, not controlled architectural ablation.",
     33         "source": "opus"
     34       },
     35       "generalization_bounded": {
     36         "applies": true,
     37         "answer": true,
     38         "justification": "The title and claims are specifically about JPEG AI and named NIC models. Claims are bounded to the tested codecs, attacks, and datasets. The paper explicitly scopes to white-box attacks with stated justification (Section 4.2).",
     39         "source": "opus"
     40       },
     41       "alternative_explanations_discussed": {
     42         "applies": true,
     43         "answer": false,
     44         "justification": "The paper offers single explanations for observations (e.g., CDC's vulnerability attributed to diffusion architecture, BPP increase attributed to noise structure) without considering alternative explanations or confounding factors.",
     45         "source": "opus"
     46       },
     47       "proxy_outcome_distinction": {
     48         "applies": true,
     49         "answer": true,
     50         "justification": "The paper defines its metrics clearly (Equation 5, Section 4.5): Δscore measures quality drop caused by adversarial attack. The metrics directly measure what is claimed — the gap between measurement and framing is minimal. The paper does not over-generalize from ΔPSNR to broader 'robustness' without defining the connection.",
     51         "source": "opus"
     52       }
     53     },
     54     "limitations_and_scope": {
     55       "limitations_section_present": {
     56         "applies": true,
     57         "answer": false,
     58         "justification": "There is no dedicated limitations or threats-to-validity section. The conclusion mentions challenges ('assessing attack success in NICs remains challenging') but provides no substantive limitations discussion.",
     59         "source": "opus"
     60       },
     61       "threats_to_validity_specific": {
     62         "applies": true,
     63         "answer": false,
     64         "justification": "No specific threats to validity are discussed. The conclusion makes brief mentions of challenges in assessing attack success but does not identify specific methodological threats.",
     65         "source": "opus"
     66       },
     67       "scope_boundaries_stated": {
     68         "applies": true,
     69         "answer": false,
     70         "justification": "While Section 4.2 explains the focus on white-box attacks, the paper does not explicitly state what the results do NOT show. No systematic scope boundaries are provided — e.g., no discussion of how results may not extend to black-box settings, other image domains, or real-world deployment conditions.",
     71         "source": "opus"
     72       }
     73     },
     74     "conflicts_of_interest": {
     75       "funding_disclosed": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "No funding or acknowledgments section is present in the paper. Multiple institutional affiliations are listed (MSU Institute for AI, ISP RAS, Innopolis University) but no funding sources disclosed.",
     79         "source": "opus"
     80       },
     81       "affiliations_disclosed": {
     82         "applies": true,
     83         "answer": true,
     84         "justification": "Author affiliations are clearly listed: MSU Institute for Artificial Intelligence, ISP RAS Research Center for Trusted Artificial Intelligence, Lomonosov Moscow State University, and Innopolis University.",
     85         "source": "opus"
     86       },
     87       "funder_independent_of_outcome": {
     88         "applies": true,
     89         "answer": false,
     90         "justification": "No funding is disclosed, making it impossible to assess funder independence. The ISP RAS 'Research Center for Trusted Artificial Intelligence' affiliation raises questions about whether institutional interests favor particular robustness outcomes.",
     91         "source": "opus"
     92       },
     93       "financial_interests_declared": {
     94         "applies": true,
     95         "answer": false,
     96         "justification": "No competing interests or financial interests statement is present in the paper.",
     97         "source": "opus"
     98       }
     99     },
    100     "scope_and_framing": {
    101       "key_terms_defined": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "Key terms defined: neural image compression (Section 1), adversarial attack (Eq. 2-3, Section 3), robustness metrics (∆PSNR, ∆VMAF, etc.), JPEG AI components (analysis transform, quantization, entropy coding, synthesis transform).",
    105         "source": "haiku"
    106       },
    107       "intended_contribution_clear": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "Three contributions explicitly listed: (1) extend methodology with 4 full-reference metrics, (2) first large-scale evaluation of 10 NIC models × 6 attacks × 6 loss functions, (3) evaluate purification defenses.",
    111         "source": "haiku"
    112       },
    113       "engagement_with_prior_work": {
    114         "applies": true,
    115         "answer": true,
    116         "justification": "Dedicated Related Work (Section 2) covers neural image compression (2.1), JPEG AI specifics (2.2), and prior NIC robustness work (2.3). Paper explicitly extends Chen & Ma methodology [12] with additional metrics.",
    117         "source": "haiku"
    118       }
    119     }
    120   },
    121   "type_checklist": {
    122     "empirical": {
    123       "artifacts": {
    124         "code_released": {
    125           "applies": true,
    126           "answer": false,
    127           "justification": "The abstract states 'Our evaluation results and code are publicly available online (link is hidden for a blind review).' Since the link is hidden and no working URL is provided, this counts as NO.",
    128           "source": "opus"
    129         },
    130         "data_released": {
    131           "applies": true,
    132           "answer": true,
    133           "justification": "All four evaluation datasets are publicly available standard benchmarks: KODAK Photo CD, CITYSCAPES, NIPS 2017 Adversarial Learning Development Set, and BSDS (Section 4.4).",
    134           "source": "opus"
    135         },
    136         "environment_specified": {
    137           "applies": true,
    138           "answer": false,
    139           "justification": "Section 4.6 lists hardware (120× NVIDIA A100 80GB, Intel Xeon Ice Lake) and mentions Slurm, but provides no software dependencies, library versions, requirements.txt, or environment specification.",
    140           "source": "opus"
    141         },
    142         "reproduction_instructions": {
    143           "applies": true,
    144           "answer": false,
    145           "justification": "No step-by-step reproduction instructions are provided. The code link is hidden for blind review, and the paper does not include a 'Reproducing Results' section.",
    146           "source": "opus"
    147         }
    148       },
    149       "statistical_methodology": {
    150         "confidence_intervals_or_error_bars": {
    151           "applies": true,
    152           "answer": false,
    153           "justification": "Results are reported as averaged metrics across 4 runs with varied parameters, but no confidence intervals, error bars, or uncertainty measures are shown in any figure or table.",
    154           "source": "opus"
    155         },
    156         "significance_tests": {
    157           "applies": true,
    158           "answer": false,
    159           "justification": "The paper makes numerous comparative claims (e.g., 'JPEG AI showed relatively high robustness,' 'CDC showed the lowest robustness') based solely on comparing averaged metric values without any statistical significance tests.",
    160           "source": "opus"
    161         },
    162         "effect_sizes_reported": {
    163           "applies": true,
    164           "answer": true,
    165           "justification": "Delta metrics (ΔPSNR, ΔMSE, ΔMS-SSIM, ΔVMAF) provide magnitude of quality degradation. BSQ-rate (Section 5.7) reports 'over 50% less bitrate with the same quality.' Fig. 7 shows specific transferability values. These provide meaningful effect size context.",
    166           "source": "opus"
    167         },
    168         "sample_size_justified": {
    169           "applies": true,
    170           "answer": false,
    171           "justification": "No justification is given for why 4 datasets with specific sizes were chosen, why 4 parameter variations per attack were used, or any power analysis.",
    172           "source": "opus"
    173         },
    174         "variance_reported": {
    175           "applies": true,
    176           "answer": false,
    177           "justification": "Section 4.6 states 'We applied each adversarial attack to each encoder four times with varied attack parameters. We then averaged the metrics for all launches.' No standard deviation, IQR, or any spread measure is reported.",
    178           "source": "opus"
    179         }
    180       },
    181       "evaluation_design": {
    182         "baselines_included": {
    183           "applies": true,
    184           "answer": true,
    185           "justification": "Ten NIC models are compared (Table 2), including established baselines like Ballé 2018, mbt2018, and Cheng2020. Random noise is included as a baseline attack (Table 3).",
    186           "source": "opus"
    187         },
    188         "baselines_contemporary": {
    189           "applies": true,
    190           "answer": true,
    191           "justification": "Baselines include models from 2018–2024: CDC (2024), JPEG AI 6.1 (2024), Li-tcm (2023), qres-vae (2023), alongside older established codecs. The mix of historical and contemporary models is appropriate.",
    192           "source": "opus"
    193         },
    194         "ablation_study": {
    195           "applies": false,
    196           "answer": false,
    197           "justification": "This paper evaluates and compares existing codecs, attacks, and defenses — it does not propose a novel system with components to ablate.",
    198           "source": "opus"
    199         },
    200         "multiple_metrics": {
    201           "applies": true,
    202           "answer": true,
    203           "justification": "Seven evaluation metrics are used: ΔPSNR, ΔMSE, ΔMS-SSIM, ΔVMAF, BPP change, Color metric, and Texture metric (Sections 4.5, 5.4).",
    204           "source": "opus"
    205         },
    206         "human_evaluation": {
    207           "applies": true,
    208           "answer": false,
    209           "justification": "All evaluation is automated using full-reference image quality metrics and artifact detection methods. No human perceptual evaluation of image quality under attack is conducted.",
    210           "source": "opus"
    211         },
    212         "held_out_test_set": {
    213           "applies": true,
    214           "answer": false,
    215           "justification": "The paper evaluates pre-trained codecs on standard datasets but does not discuss whether any of these datasets were used in NIC model training, nor is there an explicit separation of tuning vs. evaluation data.",
    216           "source": "opus"
    217         },
    218         "per_category_breakdown": {
    219           "applies": true,
    220           "answer": true,
    221           "justification": "Results are broken down by codec (Fig. 3, 6), by attack type and loss function (Fig. 2), by bitrate (Fig. 4, 7), and by defense strategy (Fig. 8). Section 5.4 breaks down artifacts by codec.",
    222           "source": "opus"
    223         },
    224         "failure_cases_discussed": {
    225           "applies": true,
    226           "answer": true,
    227           "justification": "Section 5.1 reports that reconstruction losses actually improve decoded quality rather than degrading it — a failure case for those attack objectives. Section 5.2 notes specific attacks are ineffective against JPEG AI while devastating for other codecs.",
    228           "source": "opus"
    229         },
    230         "negative_results_reported": {
    231           "applies": true,
    232           "answer": true,
    233           "justification": "Section 5.1 reports that reconstruction losses produce negative ΔPSNR/ΔSSIM (decoded images improve after attack), which is a negative result for those attack strategies. 'More complex losses showed less efficiency' is also a negative finding.",
    234           "source": "opus"
    235         }
    236       },
    237       "setup_transparency": {
    238         "model_versions_specified": {
    239           "applies": true,
    240           "answer": true,
    241           "justification": "Specific codec versions are listed in Table 2: JPEG AI 4.1, 5.1, 6.1 (each with HOP/BOP), and specific model variants for other codecs (e.g., Cheng2020+attn, mbt2018-mean). Section 4.6 notes they used source code of JPEG AI without additional pretraining.",
    242           "source": "opus"
    243         },
    244         "prompts_provided": {
    245           "applies": false,
    246           "answer": false,
    247           "justification": "This paper evaluates neural image compression codecs under adversarial attacks. No language model prompting is involved.",
    248           "source": "opus"
    249         },
    250         "hyperparameters_reported": {
    251           "applies": true,
    252           "answer": false,
    253           "justification": "Section 4.6 states attacks were run 'four times with varied attack parameters (learning rate, number of iterations, and perturbation bound)' but the specific values are not reported. Random noise σ range is given (Table 3), but attack hyperparameters are missing.",
    254           "source": "opus"
    255         },
    256         "scaffolding_described": {
    257           "applies": false,
    258           "answer": false,
    259           "justification": "No agentic scaffolding is used. This is a neural image compression adversarial robustness study.",
    260           "source": "opus"
    261         },
    262         "data_preprocessing_documented": {
    263           "applies": true,
    264           "answer": false,
    265           "justification": "Section 4.4 lists datasets with basic descriptions (image count, resolution) but does not document any preprocessing, filtering, or transformation steps applied to the images before experimentation.",
    266           "source": "opus"
    267         }
    268       },
    269       "data_integrity": {
    270         "raw_data_available": {
    271           "applies": true,
    272           "answer": false,
    273           "justification": "Results and code are claimed to be available but the link is hidden for blind review. No raw experimental data (per-image metrics, per-run results) is provided in the paper or supplements.",
    274           "source": "opus"
    275         },
    276         "data_collection_described": {
    277           "applies": true,
    278           "answer": true,
    279           "justification": "Section 4.4 describes datasets used (KODAK, CITYSCAPES, NIPS 2017, BSDS) with image counts and resolutions. Section 4.6 describes the experimental procedure: attacks applied 4 times with varied parameters, results averaged.",
    280           "source": "opus"
    281         },
    282         "recruitment_methods_described": {
    283           "applies": false,
    284           "answer": false,
    285           "justification": "No human participants. All datasets are standard public benchmarks.",
    286           "source": "opus"
    287         },
    288         "data_pipeline_documented": {
    289           "applies": true,
    290           "answer": false,
    291           "justification": "The high-level pipeline is described (attack images → compress → measure quality delta) but intermediate steps are poorly documented. The specific attack parameter configurations, filtering of ~3000 images for artifact analysis (Section 5.4), and how results were aggregated across datasets and bitrates are not fully specified.",
    292           "source": "opus"
    293         }
    294       },
    295       "contamination": {
    296         "training_cutoff_stated": {
    297           "applies": true,
    298           "answer": false,
    299           "justification": "None of the 10 NIC models have their training data described or training cutoff dates stated. The paper does not discuss what data was used to train any of the codecs.",
    300           "source": "opus"
    301         },
    302         "train_test_overlap_discussed": {
    303           "applies": true,
    304           "answer": false,
    305           "justification": "No discussion of whether KODAK, CITYSCAPES, NIPS 2017, or BSDS images (or similar images) appeared in the training data of any NIC model. KODAK is commonly used in compression research and may well be in training sets.",
    306           "source": "opus"
    307         },
    308         "benchmark_contamination_addressed": {
    309           "applies": true,
    310           "answer": false,
    311           "justification": "KODAK (1991) and other datasets predate all NIC models and are widely used in the compression field. The paper does not discuss whether NIC models may have been trained or tuned on these same benchmarks.",
    312           "source": "opus"
    313         }
    314       },
    315       "human_studies": {
    316         "pre_registered": {
    317           "applies": false,
    318           "answer": false,
    319           "justification": "No human participants in this study.",
    320           "source": "opus"
    321         },
    322         "irb_or_ethics_approval": {
    323           "applies": false,
    324           "answer": false,
    325           "justification": "No human participants in this study.",
    326           "source": "opus"
    327         },
    328         "demographics_reported": {
    329           "applies": false,
    330           "answer": false,
    331           "justification": "No human participants in this study.",
    332           "source": "opus"
    333         },
    334         "inclusion_exclusion_criteria": {
    335           "applies": false,
    336           "answer": false,
    337           "justification": "No human participants in this study.",
    338           "source": "opus"
    339         },
    340         "randomization_described": {
    341           "applies": false,
    342           "answer": false,
    343           "justification": "No human participants in this study.",
    344           "source": "opus"
    345         },
    346         "blinding_described": {
    347           "applies": false,
    348           "answer": false,
    349           "justification": "No human participants in this study.",
    350           "source": "opus"
    351         },
    352         "attrition_reported": {
    353           "applies": false,
    354           "answer": false,
    355           "justification": "No human participants in this study.",
    356           "source": "opus"
    357         }
    358       },
    359       "cost_and_practicality": {
    360         "inference_cost_reported": {
    361           "applies": true,
    362           "answer": false,
    363           "justification": "Section 4.6 lists hardware (120× A100 80GB GPUs) but does not report per-image attack/compression time, total wall-clock time, or any cost metric for running the evaluation.",
    364           "source": "opus"
    365         },
    366         "compute_budget_stated": {
    367           "applies": true,
    368           "answer": false,
    369           "justification": "Hardware is listed (Section 4.6) but total GPU hours, experiment duration, or compute budget are not quantified. With 10 codecs × 6 attacks × 10 losses × 4 datasets × 4 runs, the total compute is likely substantial but unreported.",
    370           "source": "opus"
    371         }
    372       },
    373       "experimental_rigor": {
    374         "seed_sensitivity_reported": {
    375           "applies": true,
    376           "answer": false,
    377           "justification": "Results are averaged across 4 runs with varied parameters but no seed sensitivity analysis is reported and no variance across runs is shown.",
    378           "source": "opus"
    379         },
    380         "number_of_runs_stated": {
    381           "applies": true,
    382           "answer": true,
    383           "justification": "Section 4.6 explicitly states: 'We applied each adversarial attack to each encoder four times with varied attack parameters.'",
    384           "source": "opus"
    385         },
    386         "hyperparameter_search_budget": {
    387           "applies": true,
    388           "answer": false,
    389           "justification": "Attack parameters (learning rate, iterations, perturbation bound) were varied across 4 configurations but the specific values and selection rationale are not reported.",
    390           "source": "opus"
    391         },
    392         "best_config_selection_justified": {
    393           "applies": true,
    394           "answer": true,
    395           "justification": "The paper averages results across all 4 parameter configurations rather than selecting the best one, avoiding cherry-picking. Section 4.6: 'We then averaged the metrics for all launches.'",
    396           "source": "opus"
    397         },
    398         "multiple_comparison_correction": {
    399           "applies": false,
    400           "answer": false,
    401           "justification": "No statistical significance tests are performed, so multiple comparison correction is not applicable.",
    402           "source": "opus"
    403         },
    404         "self_comparison_bias_addressed": {
    405           "applies": true,
    406           "answer": false,
    407           "justification": "The authors do not acknowledge potential bias in their evaluation methodology or discuss whether their modified JPEG AI interface could affect results. Section 4.6 notes they made 'minor changes to the interface' but does not discuss how this might bias comparisons.",
    408           "source": "opus"
    409         },
    410         "compute_budget_vs_performance": {
    411           "applies": true,
    412           "answer": false,
    413           "justification": "Different codecs have vastly different computational requirements (noted in Section 2.2 — HOP vs BOP trade off efficiency and complexity) but performance is never reported as a function of compute budget.",
    414           "source": "opus"
    415         },
    416         "benchmark_construct_validity": {
    417           "applies": true,
    418           "answer": false,
    419           "justification": "The paper uses ΔPSNR, ΔVMAF, etc. as robustness measures without discussing whether these metrics adequately capture perceptual robustness or whether the adversarial threat model reflects realistic deployment conditions.",
    420           "source": "opus"
    421         },
    422         "scaffold_confound_addressed": {
    423           "applies": false,
    424           "answer": false,
    425           "justification": "No agentic scaffolding is involved in this study.",
    426           "source": "opus"
    427         }
    428       },
    429       "data_leakage": {
    430         "temporal_leakage_addressed": {
    431           "applies": true,
    432           "answer": false,
    433           "justification": "The evaluation datasets (KODAK 1991, CITYSCAPES 2016, BSDS 2011) all predate the NIC models being tested. The paper does not discuss whether models were tuned on these or similar datasets.",
    434           "source": "opus"
    435         },
    436         "feature_leakage_addressed": {
    437           "applies": true,
    438           "answer": false,
    439           "justification": "No discussion of whether the evaluation setup introduces information leakage. The white-box attack setting is intentional but the potential for evaluation-time information leakage beyond the intended threat model is not discussed.",
    440           "source": "opus"
    441         },
    442         "non_independence_addressed": {
    443           "applies": true,
    444           "answer": false,
    445           "justification": "No discussion of whether evaluation images overlap with or are similar to NIC training data. KODAK is ubiquitous in compression research and very likely in many models' training pipelines.",
    446           "source": "opus"
    447         },
    448         "leakage_detection_method": {
    449           "applies": true,
    450           "answer": false,
    451           "justification": "No leakage detection or prevention methods are used. No analysis of train/test overlap for any of the 10 codecs.",
    452           "source": "opus"
    453         }
    454       }
    455     }
    456   },
    457   "claims": [
    458     {
    459       "claim": "JPEG AI shows relatively high robustness compared to other NIC models against adversarial attacks",
    460       "evidence": "Figure 3 (∆VMAF across codecs and attacks) shows JPEG AI consistently outperforming CDC, HiFiC, and other open-source models on most attacks.",
    461       "supported": "strong"
    462     },
    463     {
    464       "claim": "Adversarial attacks increase the bitrate (BPP) of compressed images even when not explicitly optimized to do so",
    465       "evidence": "Figure 4 demonstrates increased bitrate for adversarially perturbed images across all codec-attack combinations, explained by 'more noise structure yields different rate-distortion trade-off.'",
    466       "supported": "strong"
    467     },
    468     {
    469       "claim": "High-operation point JPEG AI versions are less robust than base-operation point versions",
    470       "evidence": "Figure 3 shows JPEG AI HOP variants consistently showing higher ∆VMAF (lower robustness) than corresponding BOP variants across multiple attacks.",
    471       "supported": "strong"
    472     },
    473     {
    474       "claim": "Adversarial attacks transfer across JPEG AI versions with varying effectiveness",
    475       "evidence": "Figure 7 transferability matrix shows generally positive transfer from lower to higher bitrates and between JPEG AI versions, with values ranging from -17.8 to 15.9.",
    476       "supported": "moderate"
    477     },
    478     {
    479       "claim": "Color artifacts rather than compression artifacts are the primary quality degradation mechanism under attack",
    480       "evidence": "Figure 5 shows Color metric correlation >0.5 with ∆-metrics, while Texture metric shows 'minimal correlation,' indicating color distortion drives reported quality drops.",
    481       "supported": "moderate"
    482     },
    483     {
    484       "claim": "Simple reversible defenses (flip, random roll, random rotate) can partially mitigate adversarial attacks on NICs",
    485       "evidence": "Figure 8 shows Flip, Random Ensemble, and Random Roll reducing ∆PSNR degradation by 5-10dB on average, though effectiveness varies by attack.",
    486       "supported": "moderate"
    487     }
    488   ],
    489   "methodology_tags": [
    490     "benchmark-eval",
    491     "comparative"
    492   ],
    493   "key_findings": "JPEG AI demonstrates superior adversarial robustness and compression efficiency (>50% bitrate savings vs mbt2018) compared to 9 other neural image codecs. Adversarial attacks increase both image quality degradation and bitrate requirements; different codecs exhibit vulnerability to different attack-loss function combinations. Simple reversible preprocessing defenses (geometric transforms) offer partial but limited mitigation. Color artifacts dominate quality loss under attack, with texture artifacts playing minimal role.",
    494   "red_flags": [
    495     {
    496       "flag": "No statistical uncertainty quantification",
    497       "detail": "Results averaged across only 4 runs per configuration; no error bars, confidence intervals, or statistical significance tests reported. Cannot distinguish codec differences from noise."
    498     },
    499     {
    500       "flag": "White-box only evaluation",
    501       "detail": "Only white-box attacks tested; real-world threat model includes black-box attacks. Authors acknowledge compression is a purification defense against weak black-box attacks but don't evaluate this empirically."
    502     },
    503     {
    504       "flag": "No formal limitations section",
    505       "detail": "Critical threats to validity (white-box assumption, small dataset sizes, generalization to real-world attack scenarios) not systematically discussed. Scope boundaries not explicitly stated."
    506     },
    507     {
    508       "flag": "Code reproducibility blocked",
    509       "detail": "Paper states code 'link is hidden for a blind review.' Cannot verify implementation details, exact hyperparameters, or reproduce results during/after review."
    510     },
    511     {
    512       "flag": "Undersized datasets",
    513       "detail": "KODAK contains only 24 images; BSDS 500. Statistical power for codec comparisons is limited; no power analysis provided."
    514     },
    515     {
    516       "flag": "Hyperparameters not fully specified",
    517       "detail": "Attack parameters (learning rate, iterations, perturbation bounds) stated as 'varied' but specific values not documented. Reproducibility compromised."
    518     },
    519     {
    520       "flag": "Metric reliability concerns",
    521       "detail": "Texture artifact metric shows 'minimal correlation' with quality metrics, suggesting measurement validity issue. No discussion of metric agreement or why texture metric fails."
    522     }
    523   ],
    524   "cited_papers": [
    525     {
    526       "title": "A survey on adversarial attacks and defences",
    527       "relevance": "Foundational overview of adversarial robustness methods; directly informs threat model and defense strategies for image compression."
    528     },
    529     {
    530       "title": "Toward robust neural image compression: Adversarial attack and model finetuning",
    531       "relevance": "Directly related work introducing ∆PSNR methodology and FTDA attack; this paper extends it to multi-metric evaluation."
    532     },
    533     {
    534       "title": "End-to-end optimized image compression",
    535       "relevance": "Foundational work on neural image compression (Ballé et al.); one of the baseline models evaluated."
    536     },
    537     {
    538       "title": "The JPEG AI standard: Providing efficient human and machine visual data consumption",
    539       "relevance": "Defines the JPEG AI standard being evaluated; critical reference for understanding codec design and operation points."
    540     },
    541     {
    542       "title": "Variational image compression with a scale hyperprior",
    543       "relevance": "Influential NIC architecture using hyperpriors; one of the evaluated baseline models."
    544     },
    545     {
    546       "title": "Learned image compression with discretized gaussian mixture likelihoods and attention modules",
    547       "relevance": "State-of-the-art NIC architecture (Cheng2020); baseline model showing particular vulnerability to I-FGSM attacks."
    548     }
    549   ],
    550   "engagement_factors": {
    551     "practical_relevance": {
    552       "score": 1,
    553       "justification": "Relevant to image compression researchers and standards bodies but not immediately actionable for general practitioners."
    554     },
    555     "surprise_contrarian": {
    556       "score": 1,
    557       "justification": "Confirms expected vulnerability of neural networks to adversarial attacks; the JPEG AI standard context adds moderate novelty but no fundamental surprise."
    558     },
    559     "fear_safety": {
    560       "score": 2,
    561       "justification": "Demonstrates that the first international neural compression standard (JPEG AI), intended for consumer devices, is vulnerable to adversarial attacks with transferable exploits across versions."
    562     },
    563     "drama_conflict": {
    564       "score": 0,
    565       "justification": "No controversy or conflict; a straightforward evaluation study with results favorable to JPEG AI's relative robustness."
    566     },
    567     "demo_ability": {
    568       "score": 0,
    569       "justification": "Code link is hidden for blind review, so no one can try the evaluation pipeline."
    570     },
    571     "brand_recognition": {
    572       "score": 1,
    573       "justification": "JPEG is a recognized standard but JPEG AI is not yet widely known outside the compression community."
    574     }
    575   },
    576   "hn_data": {
    577     "threads": [
    578       {
    579         "hn_id": "41947355",
    580         "title": "Universal optimality of Dijkstra via beyond-worst-case heaps",
    581         "points": 203,
    582         "comments": 47,
    583         "url": "https://news.ycombinator.com/item?id=41947355"
    584       },
    585       {
    586         "hn_id": "44742187",
    587         "title": "Deploying Large Language Models with Retrieval Augmented Generation (2024)",
    588         "points": 1,
    589         "comments": 0,
    590         "url": "https://news.ycombinator.com/item?id=44742187"
    591       },
    592       {
    593         "hn_id": "42185072",
    594         "title": "An Internet Voting System Fatally Flawed in Creative New Ways [pdf]",
    595         "points": 1,
    596         "comments": 0,
    597         "url": "https://news.ycombinator.com/item?id=42185072"
    598       },
    599       {
    600         "hn_id": "39198471",
    601         "title": "Image Conditioned Inpainting in Latent Diffusion Models for Virtual Try-All",
    602         "points": 1,
    603         "comments": 0,
    604         "url": "https://news.ycombinator.com/item?id=39198471"
    605       },
    606       {
    607         "hn_id": "39132573",
    608         "title": "ZkLogin: Privacy-Preserving Blockchain Authentication with Existing Credentials",
    609         "points": 1,
    610         "comments": 0,
    611         "url": "https://news.ycombinator.com/item?id=39132573"
    612       }
    613     ],
    614     "top_points": 203,
    615     "total_points": 207,
    616     "total_comments": 47
    617   }
    618 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs