Skip to content

Commit b8d177d

Browse files
hyperpolymathclaude
andcommitted
fix(assail): TODO-in-string-literal FP closed (Task #23)
UncheckedError detector was counting TODO/FIXME/HACK/XXX markers against raw `content` via `content.matches(\"TODO\").count()`. This inflated counts for every `.expect(\"TODO: handle error\")` call — observed on 007-lang where parser.rs has 155 such stubs, each double-counted as both PanicPath (correct) AND UncheckedError (FP). Fix: replace the substring count with a regex that requires a comment-starter on the same line. Comment-starters handled: // /* * # -- ;; %% Covers Rust/C/JS/Go/Zig (// + /*), Python/Ruby/Shell/Nix/Elixir (#), Haskell/Ada/SQL/Lua/Idris (--), Lisp/Scheme/Racket (;;), Erlang/Matlab (%%). OCaml (* *) and a few others are not yet handled — edge cases for later. Block-comment continuation lines starting with `*` also match (e.g. ` * TODO: …` inside a /** … */ block). Regex stored in OnceLock<Regex> — compiled once per process. Estate-wide impact: 007-lang self-scan: 24 findings -> 9 findings. UncheckedError: 15 -> 0 (all were TODO-in-.expect-string FPs) PanicPath: 4 -> 4 (155 .expect() in parser.rs is real debt) UnsafeCode: 4 -> 4 (legit — see audits/audit-ffi-unsafe.md) InsecureProtocol:1 -> 1 (integration_tests.rs — test context) Regression tests added: test_todo_in_string_literal_does_not_trigger_unchecked_error test_real_todo_comments_still_detected 190/190 lib tests + 12/12 unbounded corpus + new analyzer tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8b12fec commit b8d177d

2 files changed

Lines changed: 101 additions & 5 deletions

File tree

src/assail/analyzer.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,20 @@ static RE_SHELL_UNQUOTED_VAR: OnceLock<Regex> = OnceLock::new();
243243
static RE_HTTP_URL: OnceLock<Regex> = OnceLock::new();
244244
static RE_HTTP_LOCALHOST: OnceLock<Regex> = OnceLock::new();
245245
static RE_HARDCODED_SECRET: OnceLock<Regex> = OnceLock::new();
246+
/// Match TODO/FIXME/HACK/XXX markers only when preceded by a
247+
/// comment-starter on the same line. Excludes string-literal matches
248+
/// like `.expect("TODO: handle error")` which were previously
249+
/// inflating the UncheckedError count for every `.expect(...)` call
250+
/// that mentioned TODO in its message (observed on 007-lang:
251+
/// parser.rs has 155 `.expect("TODO: ...")` patterns, each of which
252+
/// was being double-counted as both PanicPath (correct) and
253+
/// UncheckedError (FP).
254+
///
255+
/// Comment-starters handled: `//` (Rust/C/JS/...), `/*` (block),
256+
/// `#` (Python/Ruby/Shell/Nix/Elixir-preamble), `--` (Haskell/Ada/
257+
/// SQL/Lua/Idris), `;;` (Lisp/Scheme/Racket), `%%` (Erlang/Matlab).
258+
/// Does not handle OCaml `(* *)` or Forth `\` — edge cases for later.
259+
static RE_TODO_COMMENT: OnceLock<Regex> = OnceLock::new();
246260

247261
pub struct Analyzer {
248262
target: PathBuf,
@@ -4543,11 +4557,15 @@ impl Analyzer {
45434557
});
45444558
}
45454559

4546-
// TODO/FIXME/HACK/XXX markers
4547-
let todo_count = content.matches("TODO").count()
4548-
+ content.matches("FIXME").count()
4549-
+ content.matches("HACK").count()
4550-
+ content.matches("XXX").count();
4560+
// TODO/FIXME/HACK/XXX markers — count only when the marker
4561+
// appears on a line that also contains a comment-starter, so
4562+
// string literals like `.expect("TODO: handle error")` don't
4563+
// inflate the count. See RE_TODO_COMMENT definition above.
4564+
let todo_re = RE_TODO_COMMENT.get_or_init(|| {
4565+
Regex::new(r"(?m)^[^\n]*?(//|/\*|\*|#|--|;;|%%)[^\n]*?\b(TODO|FIXME|HACK|XXX)\b")
4566+
.expect("static regex is valid")
4567+
});
4568+
let todo_count = todo_re.find_iter(content).count();
45514569
if todo_count > 10 {
45524570
weak_points.push(WeakPoint {
45534571
file: None,

tests/analyzer_tests.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,84 @@ fn test_framework_detection_database() {
209209
);
210210
}
211211

212+
#[test]
213+
fn test_todo_in_string_literal_does_not_trigger_unchecked_error() {
214+
// Regression test for 007-lang false positive: parser.rs contained
215+
// 155 `.expect("TODO: handle error")` calls. The old detector did
216+
// `content.matches("TODO").count()` on raw content, so each string
217+
// literal incremented the TODO count. This pattern is common in
218+
// stub code and shouldn't be classified as UncheckedError.
219+
let dir = TempDir::new().unwrap();
220+
let content = r#"
221+
pub fn parse_stubbed(input: &str) -> String {
222+
let first = input.split(',').next().expect("TODO: handle error");
223+
let second = input.split('.').next().expect("TODO: handle error");
224+
let third = input.split('/').next().expect("TODO: handle error");
225+
let fourth = input.split(':').next().expect("TODO: handle error");
226+
let fifth = input.split(';').next().expect("TODO: handle error");
227+
let sixth = input.split('-').next().expect("TODO: handle error");
228+
let seventh = input.split('_').next().expect("TODO: handle error");
229+
let eighth = input.split('+').next().expect("TODO: handle error");
230+
let ninth = input.split('*').next().expect("TODO: handle error");
231+
let tenth = input.split('!').next().expect("TODO: handle error");
232+
let eleventh = input.split('?').next().expect("TODO: handle error");
233+
format!("{}{}{}{}{}{}{}{}{}{}{}",
234+
first, second, third, fourth, fifth, sixth,
235+
seventh, eighth, ninth, tenth, eleventh)
236+
}
237+
"#;
238+
let file = create_test_file(&dir, "stubby.rs", content);
239+
let report = assail::analyze(&file).expect("analysis should succeed");
240+
241+
let unchecked: Vec<_> = report
242+
.weak_points
243+
.iter()
244+
.filter(|wp| wp.category == WeakPointCategory::UncheckedError)
245+
.collect();
246+
247+
assert!(
248+
unchecked.is_empty(),
249+
"TODO inside .expect() string literals must not count as \
250+
UncheckedError markers: got {:?}",
251+
unchecked
252+
);
253+
}
254+
255+
#[test]
256+
fn test_real_todo_comments_still_detected() {
257+
// Sanity: actual `// TODO` comments in the 11+ threshold still fire.
258+
let dir = TempDir::new().unwrap();
259+
let content = r#"
260+
// TODO: implement proper error handling
261+
// TODO: add tests for edge cases
262+
// TODO: optimise hot path
263+
// TODO: document invariants
264+
// FIXME: this leaks memory on panic
265+
// FIXME: race condition in iterator
266+
// FIXME: buffer overflow possible
267+
// HACK: using unsafe pointer cast as workaround
268+
// HACK: bypassing type check with transmute
269+
// HACK: relying on undocumented behaviour
270+
// XXX: this block needs review
271+
// XXX: performance critical but correctness unclear
272+
pub fn stub() -> i32 { 42 }
273+
"#;
274+
let file = create_test_file(&dir, "debt.rs", content);
275+
let report = assail::analyze(&file).expect("analysis should succeed");
276+
277+
let unchecked: Vec<_> = report
278+
.weak_points
279+
.iter()
280+
.filter(|wp| wp.category == WeakPointCategory::UncheckedError)
281+
.collect();
282+
283+
assert!(
284+
!unchecked.is_empty(),
285+
"real // TODO / // FIXME / // HACK / // XXX comments above \
286+
the threshold should still fire the detector"
287+
);
288+
}
289+
212290
#[test]
213291
fn test_per_file_stats_populated() {
214292
let dir = TempDir::new().unwrap();

0 commit comments

Comments
 (0)