@@ -13,9 +13,34 @@ use regex::Regex;
1313use std:: cell:: RefCell ;
1414use std:: collections:: { HashMap , HashSet } ;
1515use std:: fs;
16+ use std:: io:: Read ;
1617use std:: path:: { Path , PathBuf } ;
1718use std:: sync:: OnceLock ;
1819
20+ /// Upper bound on source-file reads during per-file scanning. Source
21+ /// files are almost always well under 16 MiB; capping at 64 MiB bounds
22+ /// a pathological/malicious input without losing realistic content.
23+ const SOURCE_FILE_READ_LIMIT : u64 = 64 * 1024 * 1024 ;
24+
25+ /// Upper bound on manifest / config file reads (Cargo.toml, pyproject.toml,
26+ /// flake.nix, deno.json, mix.exs, rebar.config, etc). Manifests are short
27+ /// curated documents; 4 MiB is far beyond realistic sizes.
28+ const MANIFEST_FILE_READ_LIMIT : u64 = 4 * 1024 * 1024 ;
29+
30+ /// Bounded replacement for `fs::read_to_string(path).ok()` — returns
31+ /// `Some(content)` on success (up to `limit` bytes), `None` on I/O error
32+ /// or if the file is absent. Used by the analyzer to cap every file read
33+ /// against an explicit byte ceiling rather than trusting the filesystem.
34+ fn read_bounded ( path : & Path , limit : u64 ) -> Option < String > {
35+ let mut buf = String :: new ( ) ;
36+ fs:: File :: open ( path)
37+ . ok ( ) ?
38+ . take ( limit)
39+ . read_to_string ( & mut buf)
40+ . ok ( ) ?;
41+ Some ( buf)
42+ }
43+
1944// Thread-local accumulators for migration analysis.
2045// These collect deprecated/modern API counts across all files during a single
2146// analyze() run, then get consumed by build_migration_metrics().
@@ -78,7 +103,7 @@ pub fn build_migration_metrics(target: &Path) -> MigrationMetrics {
78103 None
79104 }
80105 } ;
81- let config_content = config_path. and_then ( |p| fs :: read_to_string ( p ) . ok ( ) ) ;
106+ let config_content = config_path. and_then ( |p| read_bounded ( & p , MANIFEST_FILE_READ_LIMIT ) ) ;
82107
83108 let version_bracket = Analyzer :: detect_rescript_version (
84109 config_format,
@@ -4498,7 +4523,7 @@ impl Analyzer {
44984523
44994524 // ── Cargo.toml: git deps without explicit rev= ────────────────────
45004525 let cargo_toml_path = project_root. join ( "Cargo.toml" ) ;
4501- if let Ok ( content) = fs :: read_to_string ( & cargo_toml_path) {
4526+ if let Some ( content) = read_bounded ( & cargo_toml_path, MANIFEST_FILE_READ_LIMIT ) {
45024527 let git_dep_count =
45034528 content. matches ( "git = \" " ) . count ( ) + content. matches ( "git=\" " ) . count ( ) ;
45044529 let rev_count = content. matches ( "rev = \" " ) . count ( ) + content. matches ( "rev=\" " ) . count ( ) ;
@@ -4541,7 +4566,7 @@ impl Analyzer {
45414566
45424567 // ── Julia Manifest.toml: missing git-tree-sha1 hash entries ──────────
45434568 let manifest_toml_path = project_root. join ( "Manifest.toml" ) ;
4544- if let Ok ( content) = fs :: read_to_string ( & manifest_toml_path) {
4569+ if let Some ( content) = read_bounded ( & manifest_toml_path, MANIFEST_FILE_READ_LIMIT ) {
45454570 // A valid v2 Manifest.toml has `git-tree-sha1` for each pinned dep.
45464571 // If [[deps.*]] sections are present but no git-tree-sha1 appears,
45474572 // the manifest is not providing cryptographic pinning.
@@ -4566,7 +4591,7 @@ impl Analyzer {
45664591
45674592 // ── deno.json: unpinned import map entries ────────────────────────────
45684593 let deno_json_path = project_root. join ( "deno.json" ) ;
4569- if let Ok ( content) = fs :: read_to_string ( & deno_json_path) {
4594+ if let Some ( content) = read_bounded ( & deno_json_path, MANIFEST_FILE_READ_LIMIT ) {
45704595 // Count import values in the "imports" section that lack a version pin.
45714596 // Pinned deno.land specifiers contain '@' (e.g. std@0.177.0).
45724597 // Pinned npm specifiers contain '@' after 'npm:' (e.g. npm:express@4).
@@ -4623,7 +4648,7 @@ impl Analyzer {
46234648
46244649 // ── Rust: Cargo.toml with [dev-dependencies] / [[bin]] but no mutation tool ──
46254650 let cargo_toml_path = project_root. join ( "Cargo.toml" ) ;
4626- if let Ok ( content) = fs :: read_to_string ( & cargo_toml_path) {
4651+ if let Some ( content) = read_bounded ( & cargo_toml_path, MANIFEST_FILE_READ_LIMIT ) {
46274652 // Only check projects that have a test infrastructure (dev-deps present
46284653 // or test directories present).
46294654 let has_test_infrastructure =
@@ -4689,7 +4714,7 @@ impl Analyzer {
46894714
46904715 // Cargo.toml (Rust)
46914716 let cargo_toml = target_dir. join ( "Cargo.toml" ) ;
4692- if let Ok ( content) = fs :: read_to_string ( & cargo_toml) {
4717+ if let Some ( content) = read_bounded ( & cargo_toml, MANIFEST_FILE_READ_LIMIT ) {
46934718 if content. contains ( "tokio" ) {
46944719 frameworks. insert ( Framework :: Networking ) ;
46954720 }
@@ -4719,7 +4744,7 @@ impl Analyzer {
47194744
47204745 // mix.exs (Elixir)
47214746 let mix_exs = target_dir. join ( "mix.exs" ) ;
4722- if let Ok ( content) = fs :: read_to_string ( & mix_exs) {
4747+ if let Some ( content) = read_bounded ( & mix_exs, MANIFEST_FILE_READ_LIMIT ) {
47234748 if content. contains ( ":phoenix" ) {
47244749 frameworks. insert ( Framework :: Phoenix ) ;
47254750 frameworks. insert ( Framework :: WebServer ) ;
@@ -4742,7 +4767,7 @@ impl Analyzer {
47424767
47434768 // rebar.config (Erlang)
47444769 let rebar_config = target_dir. join ( "rebar.config" ) ;
4745- if let Ok ( content) = fs :: read_to_string ( & rebar_config) {
4770+ if let Some ( content) = read_bounded ( & rebar_config, MANIFEST_FILE_READ_LIMIT ) {
47464771 if content. contains ( "cowboy" ) {
47474772 frameworks. insert ( Framework :: Cowboy ) ;
47484773 frameworks. insert ( Framework :: WebServer ) ;
@@ -4751,15 +4776,15 @@ impl Analyzer {
47514776
47524777 // gleam.toml (Gleam)
47534778 let gleam_toml = target_dir. join ( "gleam.toml" ) ;
4754- if let Ok ( content) = fs :: read_to_string ( & gleam_toml) {
4779+ if let Some ( content) = read_bounded ( & gleam_toml, MANIFEST_FILE_READ_LIMIT ) {
47554780 if content. contains ( "wisp" ) || content. contains ( "mist" ) {
47564781 frameworks. insert ( Framework :: WebServer ) ;
47574782 }
47584783 }
47594784
47604785 // package.json (JS/TS/ReScript)
47614786 let pkg_json = target_dir. join ( "package.json" ) ;
4762- if let Ok ( content) = fs :: read_to_string ( & pkg_json) {
4787+ if let Some ( content) = read_bounded ( & pkg_json, MANIFEST_FILE_READ_LIMIT ) {
47634788 if content. contains ( "\" express\" " )
47644789 || content. contains ( "\" fastify\" " )
47654790 || content. contains ( "\" koa\" " )
@@ -4783,7 +4808,7 @@ impl Analyzer {
47834808 // requirements.txt / pyproject.toml (Python)
47844809 for manifest in & [ "requirements.txt" , "pyproject.toml" , "setup.py" ] {
47854810 let path = target_dir. join ( manifest) ;
4786- if let Ok ( content) = fs :: read_to_string ( & path) {
4811+ if let Some ( content) = read_bounded ( & path, MANIFEST_FILE_READ_LIMIT ) {
47874812 if content. contains ( "flask" )
47884813 || content. contains ( "django" )
47894814 || content. contains ( "fastapi" )
@@ -4812,9 +4837,9 @@ impl Analyzer {
48124837 // string literals in tests and analyzer patterns.
48134838 for file in files {
48144839 let file_lang = Language :: detect ( file. to_str ( ) . unwrap_or ( "" ) ) ;
4815- let content = match fs :: read_to_string ( file) {
4816- Ok ( c) => c,
4817- Err ( _ ) => continue ,
4840+ let content = match read_bounded ( file, SOURCE_FILE_READ_LIMIT ) {
4841+ Some ( c) => c,
4842+ None => continue ,
48184843 } ;
48194844
48204845 match file_lang {
0 commit comments