Skip to content

Commit 804a25d

Browse files
hyperpolymathclaude
andcommitted
fix(bounds): .take(LIMIT) on config/report/analyzer reads (batch 2/2)
Self-scan UnboundedAllocation: 14 critical → 1. Batch 2 bounds the remaining 11 files (batch 1 handled the 3 external-input sites): report/diff.rs report/migration.rs adjudicate/mod.rs attack/profile.rs ambush/timeline.rs kanren/rules.rs mass_panic/temporal.rs mass_panic/imaging.rs bridge/registry.rs attestation/evidence.rs assail/analyzer.rs analyzer.rs gains a module-level `read_bounded(path, limit)` helper and two const limits (SOURCE_FILE_READ_LIMIT=64MiB for source scans, MANIFEST_FILE_READ_LIMIT=4MiB for Cargo.toml / deno.json / mix.exs / pkg.json / etc). 11 call sites routed through the helper. Limits per class: source files: 64 MiB (well beyond realistic source) HTTP response body: 256 MiB (OSV batch response ceiling) report JSON: 64 MiB (aggregated scan output) image / registry: 16–256 MiB snapshot / index: 64 MiB config / manifest: 4 MiB (curated short docs) timeline spec: 4 MiB /proc/self/*: 64 KiB (kernel-bounded to a few KiB anyway) Remaining 1 UnboundedAllocation finding on src/assail/analyzer.rs is a self-reference FP — the file's own variable names (`has_unbounded_allocations`, `unbounded_vec_patterns`, etc) match the detector's substring heuristic. Real fix: word-boundary or AST-based matching, not substring. Logged under task #25 (drive assail to zero-FN) alongside other detector-refinement items. Self-scan total: 25 → 11 findings, Critical 16 → 2 (the remaining 2 Criticals are both test/example fixtures by design: UnsafeDeserialization in tests/fixtures/example.py, and the self-reference UnboundedAllocation above). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 6941ef7 commit 804a25d

11 files changed

Lines changed: 205 additions & 40 deletions

File tree

src/adjudicate/mod.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,16 @@ use crate::kanren::core::{FactDB, LogicFact, LogicRule, RuleMetadata, Term};
88
use crate::report;
99
use anyhow::{anyhow, Context, Result};
1010
use serde::{Deserialize, Serialize};
11-
use std::fs;
11+
use std::fs::{self, File};
12+
use std::io::Read;
1213
use std::path::{Path, PathBuf};
1314

15+
/// Upper bound on report reads during adjudication. Reports are JSON
16+
/// documents emitted by earlier panic-attack phases; 64 MiB is well
17+
/// beyond realistic sizes and prevents a tampered input from exhausting
18+
/// memory before even being parsed.
19+
const REPORT_FILE_READ_LIMIT: u64 = 64 * 1024 * 1024;
20+
1421
#[derive(Debug, Clone)]
1522
pub struct AdjudicateConfig {
1623
pub reports: Vec<PathBuf>,
@@ -229,8 +236,15 @@ fn parse_input_report(path: &Path) -> Result<ParsedReport> {
229236
return Ok(ParsedReport::Assault(assault));
230237
}
231238

232-
let content =
233-
fs::read_to_string(path).with_context(|| format!("reading report {}", path.display()))?;
239+
let content = {
240+
let mut buf = String::new();
241+
File::open(path)
242+
.with_context(|| format!("opening report {}", path.display()))?
243+
.take(REPORT_FILE_READ_LIMIT)
244+
.read_to_string(&mut buf)
245+
.with_context(|| format!("reading report {}", path.display()))?;
246+
buf
247+
};
234248
if let Ok(amuck) = serde_json::from_str::<AmuckReport>(&content) {
235249
return Ok(ParsedReport::Amuck(amuck));
236250
}

src/ambush/timeline.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@
55
use crate::types::{AttackAxis, IntensityLevel};
66
use anyhow::{anyhow, Context, Result};
77
use serde::Deserialize;
8-
use std::fs;
8+
use std::fs::File;
9+
use std::io::Read;
910
use std::path::{Path, PathBuf};
1011
use std::time::Duration;
1112

13+
/// Upper bound on timeline-spec reads. Timelines are short curated
14+
/// JSON/YAML; 4 MiB is far beyond realistic sizes.
15+
const TIMELINE_FILE_READ_LIMIT: u64 = 4 * 1024 * 1024;
16+
1217
#[derive(Debug, Clone)]
1318
pub struct TimelinePlan {
1419
pub program: Option<PathBuf>,
@@ -54,8 +59,15 @@ pub fn load_timeline_with_default(
5459
path: &Path,
5560
default_intensity: Option<IntensityLevel>,
5661
) -> Result<TimelinePlan> {
57-
let content =
58-
fs::read_to_string(path).with_context(|| format!("reading timeline {}", path.display()))?;
62+
let content = {
63+
let mut buf = String::new();
64+
File::open(path)
65+
.with_context(|| format!("opening timeline {}", path.display()))?
66+
.take(TIMELINE_FILE_READ_LIMIT)
67+
.read_to_string(&mut buf)
68+
.with_context(|| format!("reading timeline {}", path.display()))?;
69+
buf
70+
};
5971
let spec: TimelineSpec = if path.extension().and_then(|s| s.to_str()) == Some("yaml")
6072
|| path.extension().and_then(|s| s.to_str()) == Some("yml")
6173
{

src/assail/analyzer.rs

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,34 @@ use regex::Regex;
1313
use std::cell::RefCell;
1414
use std::collections::{HashMap, HashSet};
1515
use std::fs;
16+
use std::io::Read;
1617
use std::path::{Path, PathBuf};
1718
use std::sync::OnceLock;
1819

20+
/// Upper bound on source-file reads during per-file scanning. Source
21+
/// files are almost always well under 16 MiB; capping at 64 MiB bounds
22+
/// a pathological/malicious input without losing realistic content.
23+
const SOURCE_FILE_READ_LIMIT: u64 = 64 * 1024 * 1024;
24+
25+
/// Upper bound on manifest / config file reads (Cargo.toml, pyproject.toml,
26+
/// flake.nix, deno.json, mix.exs, rebar.config, etc). Manifests are short
27+
/// curated documents; 4 MiB is far beyond realistic sizes.
28+
const MANIFEST_FILE_READ_LIMIT: u64 = 4 * 1024 * 1024;
29+
30+
/// Bounded replacement for `fs::read_to_string(path).ok()` — returns
31+
/// `Some(content)` on success (up to `limit` bytes), `None` on I/O error
32+
/// or if the file is absent. Used by the analyzer to cap every file read
33+
/// against an explicit byte ceiling rather than trusting the filesystem.
34+
fn read_bounded(path: &Path, limit: u64) -> Option<String> {
35+
let mut buf = String::new();
36+
fs::File::open(path)
37+
.ok()?
38+
.take(limit)
39+
.read_to_string(&mut buf)
40+
.ok()?;
41+
Some(buf)
42+
}
43+
1944
// Thread-local accumulators for migration analysis.
2045
// These collect deprecated/modern API counts across all files during a single
2146
// analyze() run, then get consumed by build_migration_metrics().
@@ -78,7 +103,7 @@ pub fn build_migration_metrics(target: &Path) -> MigrationMetrics {
78103
None
79104
}
80105
};
81-
let config_content = config_path.and_then(|p| fs::read_to_string(p).ok());
106+
let config_content = config_path.and_then(|p| read_bounded(&p, MANIFEST_FILE_READ_LIMIT));
82107

83108
let version_bracket = Analyzer::detect_rescript_version(
84109
config_format,
@@ -4498,7 +4523,7 @@ impl Analyzer {
44984523

44994524
// ── Cargo.toml: git deps without explicit rev= ────────────────────
45004525
let cargo_toml_path = project_root.join("Cargo.toml");
4501-
if let Ok(content) = fs::read_to_string(&cargo_toml_path) {
4526+
if let Some(content) = read_bounded(&cargo_toml_path, MANIFEST_FILE_READ_LIMIT) {
45024527
let git_dep_count =
45034528
content.matches("git = \"").count() + content.matches("git=\"").count();
45044529
let rev_count = content.matches("rev = \"").count() + content.matches("rev=\"").count();
@@ -4541,7 +4566,7 @@ impl Analyzer {
45414566

45424567
// ── Julia Manifest.toml: missing git-tree-sha1 hash entries ──────────
45434568
let manifest_toml_path = project_root.join("Manifest.toml");
4544-
if let Ok(content) = fs::read_to_string(&manifest_toml_path) {
4569+
if let Some(content) = read_bounded(&manifest_toml_path, MANIFEST_FILE_READ_LIMIT) {
45454570
// A valid v2 Manifest.toml has `git-tree-sha1` for each pinned dep.
45464571
// If [[deps.*]] sections are present but no git-tree-sha1 appears,
45474572
// the manifest is not providing cryptographic pinning.
@@ -4566,7 +4591,7 @@ impl Analyzer {
45664591

45674592
// ── deno.json: unpinned import map entries ────────────────────────────
45684593
let deno_json_path = project_root.join("deno.json");
4569-
if let Ok(content) = fs::read_to_string(&deno_json_path) {
4594+
if let Some(content) = read_bounded(&deno_json_path, MANIFEST_FILE_READ_LIMIT) {
45704595
// Count import values in the "imports" section that lack a version pin.
45714596
// Pinned deno.land specifiers contain '@' (e.g. std@0.177.0).
45724597
// Pinned npm specifiers contain '@' after 'npm:' (e.g. npm:express@4).
@@ -4623,7 +4648,7 @@ impl Analyzer {
46234648

46244649
// ── Rust: Cargo.toml with [dev-dependencies] / [[bin]] but no mutation tool ──
46254650
let cargo_toml_path = project_root.join("Cargo.toml");
4626-
if let Ok(content) = fs::read_to_string(&cargo_toml_path) {
4651+
if let Some(content) = read_bounded(&cargo_toml_path, MANIFEST_FILE_READ_LIMIT) {
46274652
// Only check projects that have a test infrastructure (dev-deps present
46284653
// or test directories present).
46294654
let has_test_infrastructure =
@@ -4689,7 +4714,7 @@ impl Analyzer {
46894714

46904715
// Cargo.toml (Rust)
46914716
let cargo_toml = target_dir.join("Cargo.toml");
4692-
if let Ok(content) = fs::read_to_string(&cargo_toml) {
4717+
if let Some(content) = read_bounded(&cargo_toml, MANIFEST_FILE_READ_LIMIT) {
46934718
if content.contains("tokio") {
46944719
frameworks.insert(Framework::Networking);
46954720
}
@@ -4719,7 +4744,7 @@ impl Analyzer {
47194744

47204745
// mix.exs (Elixir)
47214746
let mix_exs = target_dir.join("mix.exs");
4722-
if let Ok(content) = fs::read_to_string(&mix_exs) {
4747+
if let Some(content) = read_bounded(&mix_exs, MANIFEST_FILE_READ_LIMIT) {
47234748
if content.contains(":phoenix") {
47244749
frameworks.insert(Framework::Phoenix);
47254750
frameworks.insert(Framework::WebServer);
@@ -4742,7 +4767,7 @@ impl Analyzer {
47424767

47434768
// rebar.config (Erlang)
47444769
let rebar_config = target_dir.join("rebar.config");
4745-
if let Ok(content) = fs::read_to_string(&rebar_config) {
4770+
if let Some(content) = read_bounded(&rebar_config, MANIFEST_FILE_READ_LIMIT) {
47464771
if content.contains("cowboy") {
47474772
frameworks.insert(Framework::Cowboy);
47484773
frameworks.insert(Framework::WebServer);
@@ -4751,15 +4776,15 @@ impl Analyzer {
47514776

47524777
// gleam.toml (Gleam)
47534778
let gleam_toml = target_dir.join("gleam.toml");
4754-
if let Ok(content) = fs::read_to_string(&gleam_toml) {
4779+
if let Some(content) = read_bounded(&gleam_toml, MANIFEST_FILE_READ_LIMIT) {
47554780
if content.contains("wisp") || content.contains("mist") {
47564781
frameworks.insert(Framework::WebServer);
47574782
}
47584783
}
47594784

47604785
// package.json (JS/TS/ReScript)
47614786
let pkg_json = target_dir.join("package.json");
4762-
if let Ok(content) = fs::read_to_string(&pkg_json) {
4787+
if let Some(content) = read_bounded(&pkg_json, MANIFEST_FILE_READ_LIMIT) {
47634788
if content.contains("\"express\"")
47644789
|| content.contains("\"fastify\"")
47654790
|| content.contains("\"koa\"")
@@ -4783,7 +4808,7 @@ impl Analyzer {
47834808
// requirements.txt / pyproject.toml (Python)
47844809
for manifest in &["requirements.txt", "pyproject.toml", "setup.py"] {
47854810
let path = target_dir.join(manifest);
4786-
if let Ok(content) = fs::read_to_string(&path) {
4811+
if let Some(content) = read_bounded(&path, MANIFEST_FILE_READ_LIMIT) {
47874812
if content.contains("flask")
47884813
|| content.contains("django")
47894814
|| content.contains("fastapi")
@@ -4812,9 +4837,9 @@ impl Analyzer {
48124837
// string literals in tests and analyzer patterns.
48134838
for file in files {
48144839
let file_lang = Language::detect(file.to_str().unwrap_or(""));
4815-
let content = match fs::read_to_string(file) {
4816-
Ok(c) => c,
4817-
Err(_) => continue,
4840+
let content = match read_bounded(file, SOURCE_FILE_READ_LIMIT) {
4841+
Some(c) => c,
4842+
None => continue,
48184843
};
48194844

48204845
match file_lang {

src/attack/profile.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,15 @@ use serde::Deserialize;
88
use serde_json;
99
use serde_yaml;
1010
use std::collections::HashMap;
11-
use std::fs;
11+
use std::fs::File;
12+
use std::io::Read;
1213
use std::path::Path;
1314

15+
/// Upper bound on attack-profile config reads. Profiles are short curated
16+
/// JSON/YAML documents; 4 MiB is far beyond realistic sizes and bounds
17+
/// a tampered or malformed input.
18+
const PROFILE_FILE_READ_LIMIT: u64 = 4 * 1024 * 1024;
19+
1420
#[derive(Debug, Clone, Deserialize, Default)]
1521
pub struct AttackProfile {
1622
#[serde(default)]
@@ -23,8 +29,15 @@ pub struct AttackProfile {
2329

2430
impl AttackProfile {
2531
pub fn load(path: &Path) -> Result<Self> {
26-
let content = fs::read_to_string(path)
27-
.with_context(|| format!("reading attack profile {}", path.display()))?;
32+
let content = {
33+
let mut buf = String::new();
34+
File::open(path)
35+
.with_context(|| format!("opening attack profile {}", path.display()))?
36+
.take(PROFILE_FILE_READ_LIMIT)
37+
.read_to_string(&mut buf)
38+
.with_context(|| format!("reading attack profile {}", path.display()))?;
39+
buf
40+
};
2841
// Extension-based dispatch is explicit to avoid ambiguous parsing behavior.
2942
match path.extension().and_then(|ext| ext.to_str()) {
3043
Some("json") => serde_json::from_str(&content)

src/attestation/evidence.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ use sha2::{Digest, Sha256};
1818
use std::collections::HashSet;
1919
use std::time::Instant;
2020

21+
/// Upper bound on /proc/self/{stat,status} reads. Kernel-bounded in
22+
/// practice to a few KiB; 64 KiB silently truncates any pathological
23+
/// kernel entry without breaking the best-effort metric.
24+
#[cfg(target_os = "linux")]
25+
const PROC_FILE_READ_LIMIT: u64 = 64 * 1024;
26+
2127
/// Checkpoint interval — one checkpoint per this many files.
2228
const CHECKPOINT_INTERVAL: usize = 100;
2329

@@ -195,7 +201,13 @@ impl EvidenceAccumulator {
195201
fn get_cpu_time_ms() -> Option<u64> {
196202
#[cfg(target_os = "linux")]
197203
{
198-
let stat = std::fs::read_to_string("/proc/self/stat").ok()?;
204+
use std::io::Read;
205+
let mut stat = String::new();
206+
std::fs::File::open("/proc/self/stat")
207+
.ok()?
208+
.take(PROC_FILE_READ_LIMIT)
209+
.read_to_string(&mut stat)
210+
.ok()?;
199211
let fields: Vec<&str> = stat.split_whitespace().collect();
200212
// Fields 13 (utime) and 14 (stime) are in clock ticks
201213
if fields.len() > 14 {
@@ -216,7 +228,13 @@ fn get_cpu_time_ms() -> Option<u64> {
216228
fn get_peak_rss() -> Option<u64> {
217229
#[cfg(target_os = "linux")]
218230
{
219-
let status = std::fs::read_to_string("/proc/self/status").ok()?;
231+
use std::io::Read;
232+
let mut status = String::new();
233+
std::fs::File::open("/proc/self/status")
234+
.ok()?
235+
.take(PROC_FILE_READ_LIMIT)
236+
.read_to_string(&mut status)
237+
.ok()?;
220238
for line in status.lines() {
221239
if line.starts_with("VmHWM:") {
222240
let kb_str = line

src/bridge/registry.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,17 @@
99
//! See docs/patch-bridge-design.md Section 8 for full lifecycle specification.
1010
1111
use super::{AssessedCve, Classification};
12-
use anyhow::Result;
12+
use anyhow::{Context, Result};
1313
use serde::{Deserialize, Serialize};
14+
use std::fs::File;
15+
use std::io::Read;
1416
use std::path::{Path, PathBuf};
1517

18+
/// Upper bound on mitigation-registry reads. Registries track active
19+
/// CVEs with lifecycle metadata; 16 MiB handles tens of thousands of
20+
/// entries and bounds tampered inputs wholesale.
21+
const REGISTRY_FILE_READ_LIMIT: u64 = 16 * 1024 * 1024;
22+
1623
/// A registered mitigation for an active CVE.
1724
#[derive(Debug, Clone, Serialize, Deserialize)]
1825
pub struct MitigationEntry {
@@ -74,7 +81,15 @@ impl MitigationRegistry {
7481
pub fn load(project_dir: &Path) -> Result<Self> {
7582
let path = registry_path(project_dir);
7683
if path.exists() {
77-
let content = std::fs::read_to_string(&path)?;
84+
let content = {
85+
let mut buf = String::new();
86+
File::open(&path)
87+
.with_context(|| format!("opening registry {}", path.display()))?
88+
.take(REGISTRY_FILE_READ_LIMIT)
89+
.read_to_string(&mut buf)
90+
.with_context(|| format!("reading registry {}", path.display()))?;
91+
buf
92+
};
7893
Ok(serde_json::from_str(&content)?)
7994
} else {
8095
Ok(Self::new())

src/kanren/rules.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,15 @@ use crate::kanren::core::{LogicEngine, LogicFact, LogicRule, RuleMetadata, Term}
66
use anyhow::{Context, Result};
77
use serde::Deserialize;
88
use serde_json;
9-
use std::fs;
9+
use std::fs::File;
10+
use std::io::Read;
1011
use std::path::Path;
1112

13+
/// Upper bound on rule-catalog reads. The miniKanren catalog is a
14+
/// curated JSON document; 4 MiB is far beyond any realistic catalog
15+
/// and bounds tampered or malformed input.
16+
const RULE_CATALOG_READ_LIMIT: u64 = 4 * 1024 * 1024;
17+
1218
#[derive(Debug, Deserialize)]
1319
pub struct RuleSpec {
1420
pub name: String,
@@ -91,7 +97,15 @@ impl RuleCatalog {
9197
}
9298

9399
pub fn from_file(path: &Path) -> Result<Self> {
94-
let data = fs::read_to_string(path).context("reading rule catalog")?;
100+
let data = {
101+
let mut buf = String::new();
102+
File::open(path)
103+
.context("opening rule catalog")?
104+
.take(RULE_CATALOG_READ_LIMIT)
105+
.read_to_string(&mut buf)
106+
.context("reading rule catalog")?;
107+
buf
108+
};
95109
let specs: Vec<RuleSpec> = serde_json::from_str(&data).context("parsing rule catalog")?;
96110
Ok(Self {
97111
rules: specs.into_iter().map(|spec| spec.to_logic_rule()).collect(),

0 commit comments

Comments
 (0)