Skip to content

Commit 9de6d26

Browse files
committed
Add uninitialized read detection tool and tests for CWE-457
1 parent 30cc9a5 commit 9de6d26

4 files changed

Lines changed: 567 additions & 0 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ Add the following:
168168
- `find_format_string_vulns`: Detect format string vulnerabilities (CWE-134) where non-literal format arguments are passed to printf-family functions.
169169
- `find_heap_overflow`: Detect heap overflow vulnerabilities (CWE-122) where writes to heap buffers may exceed their allocated size.
170170
- `find_stack_overflow`: Detect stack buffer overflow vulnerabilities (CWE-121) where writes to fixed-size local arrays (e.g. `char buf[64]`) may exceed their declared dimension.
171+
- `find_toctou`: Detect Time-of-Check-Time-of-Use race conditions (CWE-367) where a file is checked with `access()`/`stat()` and then opened or operated on in a separate step.
172+
- `find_uninitialized_reads`: Detect uninitialized variable reads (CWE-457) where local variables are used before being assigned a value.
171173

172174
## Contributing & Tests
173175

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
{
2+
import io.shiftleft.codepropertygraph.generated.nodes._
3+
import io.shiftleft.semanticcpg.language._
4+
import scala.collection.mutable
5+
6+
val fileFilter = "{{filename}}"
7+
val maxResults = {{limit}}
8+
9+
val output = new StringBuilder()
10+
11+
def pathBoundaryRegex(f: String): String = {
12+
val escaped = java.util.regex.Pattern.quote(f)
13+
"(^|.*/)" + escaped + "$"
14+
}
15+
16+
output.append("Uninitialized Read Analysis\n")
17+
output.append("=" * 60 + "\n\n")
18+
19+
// Collect all methods, optionally filtered by file
20+
val allMethods = if (fileFilter.nonEmpty) {
21+
val pattern = pathBoundaryRegex(fileFilter)
22+
cpg.method.filter(m => m.file.name.headOption.exists(_.matches(pattern))).l
23+
} else {
24+
cpg.method.l
25+
}
26+
27+
// Filter out compiler-generated / library methods (no body)
28+
val candidateMethods = allMethods.filter(m => m.block.nonEmpty).l
29+
30+
if (candidateMethods.isEmpty) {
31+
output.append("No methods found in the codebase.\n")
32+
} else {
33+
output.append(s"Analyzing ${candidateMethods.size} method(s) for uninitialized reads...\n\n")
34+
35+
// (file, methodName, varName, varType, declLine, useLine, useCode, confidence, reason)
36+
val issues = mutable.ListBuffer[(String, String, String, String, Int, Int, String, String, String)]()
37+
38+
candidateMethods.foreach { method =>
39+
val methName = method.name
40+
val methFile = method.file.name.headOption.getOrElse("unknown")
41+
42+
// Collect local variable declarations
43+
val locals = method.local.l
44+
45+
locals.foreach { local =>
46+
val varName = local.name
47+
val varType = local.typeFullName
48+
val declLine = local.lineNumber.getOrElse(-1)
49+
50+
// Skip: function parameters (they are always initialized by the caller)
51+
// Skip: static variables (zero-initialized by the C standard)
52+
// Skip: aggregate types that are typically zero-initialized with = {0} or memset
53+
// We approximate: skip anything whose type contains "*" (pointers initialised via
54+
// parameter), "[]" (array — tracked by stack_overflow), or "static" in the name.
55+
val isArray = varType.matches(".*\\[\\d*\\].*")
56+
if (!isArray) {
57+
58+
// Find the first explicit assignment to this variable within the method.
59+
// An assignment is a Call node whose name is "<operator>.assignment" and
60+
// whose first argument (the LHS) mentions varName.
61+
val assignments = method.call
62+
.nameExact("<operator>.assignment")
63+
.filter(c => c.argument.order(1).l.headOption.map(_.code.trim).getOrElse("") == varName)
64+
.lineNumber.l.sorted
65+
66+
val firstAssignLine: Option[Int] = assignments.headOption
67+
68+
// Find ALL reads of this variable: Identifier nodes with this name that
69+
// are NOT on the LHS of an assignment.
70+
val allReads = method.ast.isIdentifier
71+
.nameExact(varName)
72+
.filter { ident =>
73+
val parent = ident.astParent
74+
// Exclude if this identifier is the direct LHS of an assignment
75+
val isLhs = parent.isCall &&
76+
parent.asInstanceOf[Call].name == "<operator>.assignment" &&
77+
parent.asInstanceOf[Call].argument.order(1).l.headOption.exists(_.id == ident.id)
78+
!isLhs
79+
}
80+
.l
81+
82+
// For each read, check whether it precedes the first assignment
83+
allReads.foreach { ident =>
84+
val readLine = ident.lineNumber.getOrElse(-1)
85+
86+
if (readLine > 0 && declLine > 0) {
87+
val isBeforeAssignment = firstAssignLine match {
88+
case None => true // never assigned → always uninitialized
89+
case Some(asLn) => readLine < asLn // read before the first assignment
90+
}
91+
92+
if (isBeforeAssignment) {
93+
// Try to get the enclosing statement code for context
94+
val stmtCode = {
95+
val parentCode = ident.astParent.code.trim
96+
if (parentCode.length > 80) parentCode.take(77) + "..." else parentCode
97+
}
98+
99+
// Confidence heuristic
100+
val (confidence, reason) = firstAssignLine match {
101+
case None =>
102+
// Variable declared but never explicitly assigned — HIGH confidence
103+
("HIGH", "Variable declared but never assigned before use")
104+
case Some(asLn) =>
105+
// Read before first assignment — still HIGH if assignment is later in same block
106+
("HIGH", s"Read at line $readLine precedes first assignment at line $asLn")
107+
}
108+
109+
issues += ((methFile, methName, varName, varType, declLine, readLine, stmtCode, confidence, reason))
110+
}
111+
}
112+
}
113+
}
114+
}
115+
}
116+
117+
// Deduplicate and sort by file + method + read line
118+
val dedupIssues = issues.toList.distinct.sortBy(i => (i._1, i._2, i._6))
119+
120+
if (dedupIssues.isEmpty) {
121+
output.append("No uninitialized read issues detected.\n")
122+
output.append("\nNote: This analysis looks for:\n")
123+
output.append(" - Local variables that are read before any explicit assignment\n")
124+
output.append(" - Local variables declared but never assigned (used with garbage value)\n")
125+
output.append("\nFiltered out:\n")
126+
output.append(" - Fixed-size array declarations (tracked by stack overflow analysis)\n")
127+
output.append(" - Identifier reads that are the direct LHS of an assignment\n")
128+
} else {
129+
output.append(s"Found ${dedupIssues.size} potential uninitialized read issue(s):\n\n")
130+
131+
dedupIssues.take(maxResults).zipWithIndex.foreach { case ((file, meth, varName, varType, declLine, readLine, stmtCode, confidence, reason), idx) =>
132+
output.append(s"--- Issue ${idx + 1} ---\n")
133+
output.append(s"Confidence: $confidence\n")
134+
output.append(s"CWE: CWE-457 (Use of Uninitialized Variable)\n")
135+
output.append(s"Variable: $varName ($varType)\n")
136+
output.append(s"Declared: $file:$declLine in $meth()\n")
137+
output.append(s"Read at: $file:$readLine\n")
138+
output.append(s"Context: $stmtCode\n")
139+
output.append(s"Reason: $reason\n")
140+
output.append("\n")
141+
}
142+
143+
if (dedupIssues.size > maxResults)
144+
output.append(s"(Showing $maxResults of ${dedupIssues.size} issues. Use limit parameter to see more.)\n\n")
145+
146+
output.append(s"Total: ${dedupIssues.size} potential uninitialized read issue(s) found\n")
147+
}
148+
}
149+
150+
"<codebadger_result>\n" + output.toString() + "</codebadger_result>"
151+
}

src/tools/taint_analysis_tools.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,4 +1814,85 @@ def _execute():
18141814
return f"Validation Error: {str(e)}"
18151815
except Exception as e:
18161816
logger.error(f"Unexpected error detecting TOCTOU: {e}", exc_info=True)
1817+
return f"Internal Error: {str(e)}"
1818+
1819+
@mcp.tool(
1820+
description="""Detect uninitialized variable reads (CWE-457) — variables used before assignment.
1821+
1822+
Analyzes the codebase for local variables that are read before they have been
1823+
explicitly assigned a value, or that are declared but never assigned at all.
1824+
Reading an uninitialized variable causes undefined behavior in C/C++ and can
1825+
lead to information disclosure, incorrect control flow, or memory corruption.
1826+
1827+
Detection strategy:
1828+
1. Find every local variable declaration in each function
1829+
2. Locate the first explicit assignment to that variable (if any)
1830+
3. Flag any read of the variable that precedes the first assignment, or any
1831+
read of a variable that is never assigned
1832+
1833+
Args:
1834+
codebase_hash: The codebase hash from generate_cpg.
1835+
filename: Optional filename regex to filter results (e.g., 'parser.c').
1836+
limit: Maximum results to return (default 100).
1837+
timeout: Query timeout in seconds (default 240).
1838+
1839+
Returns:
1840+
Human-readable text showing each potential uninitialized read with:
1841+
- Variable name, type, and declaration location
1842+
- Line where the uninitialized read occurs
1843+
- Surrounding code context
1844+
- Reason (never assigned vs. read before first assignment)
1845+
1846+
Examples:
1847+
find_uninitialized_reads(codebase_hash="abc")
1848+
find_uninitialized_reads(codebase_hash="abc", filename="parser.c")""",
1849+
)
1850+
def find_uninitialized_reads(
1851+
codebase_hash: Annotated[str, Field(description="The codebase hash from generate_cpg")],
1852+
filename: Annotated[Optional[str], Field(description="Optional filename regex to filter results")] = None,
1853+
limit: Annotated[int, Field(description="Maximum results to return")] = 100,
1854+
timeout: Annotated[int, Field(description="Query timeout in seconds")] = 240,
1855+
) -> str:
1856+
"""Detect uninitialized variable reads (CWE-457) in the codebase."""
1857+
try:
1858+
validate_codebase_hash(codebase_hash)
1859+
1860+
codebase_tracker = services["codebase_tracker"]
1861+
query_executor = services["query_executor"]
1862+
1863+
codebase_info = codebase_tracker.get_codebase(codebase_hash)
1864+
if not codebase_info or not codebase_info.cpg_path:
1865+
raise ValidationError(f"CPG not found for codebase {codebase_hash}. Generate it first using generate_cpg.")
1866+
1867+
cache_params = {"filename": filename, "limit": limit}
1868+
1869+
def _execute():
1870+
query = QueryLoader.load(
1871+
"uninitialized_read",
1872+
filename=filename or "",
1873+
limit=limit,
1874+
)
1875+
result = query_executor.execute_query(
1876+
codebase_hash=codebase_hash,
1877+
cpg_path=codebase_info.cpg_path,
1878+
query=query,
1879+
timeout=timeout,
1880+
)
1881+
if not result.success:
1882+
return f"Error: {result.error}"
1883+
if isinstance(result.data, str):
1884+
return result.data.strip()
1885+
elif isinstance(result.data, list) and len(result.data) > 0:
1886+
output = result.data[0] if isinstance(result.data[0], str) else str(result.data[0])
1887+
return output.strip()
1888+
else:
1889+
return f"Query returned unexpected format: {type(result.data)}"
1890+
1891+
return _cached_taint_query(services, "find_uninitialized_reads", codebase_hash, cache_params, _execute)
1892+
1893+
except ValidationError as e:
1894+
logger.error(f"Error detecting uninitialized reads: {e}")
1895+
return f"Validation Error: {str(e)}"
1896+
except Exception as e:
1897+
logger.error(f"Unexpected error detecting uninitialized reads: {e}", exc_info=True)
18171898
return f"Internal Error: {str(e)}"

0 commit comments

Comments
 (0)