|
| 1 | +{ |
| 2 | + import io.shiftleft.codepropertygraph.generated.nodes._ |
| 3 | + import io.shiftleft.semanticcpg.language._ |
| 4 | + import scala.collection.mutable |
| 5 | + |
| 6 | + val fileFilter = "{{filename}}" |
| 7 | + val maxResults = {{limit}} |
| 8 | + |
| 9 | + val output = new StringBuilder() |
| 10 | + |
| 11 | + def pathBoundaryRegex(f: String): String = { |
| 12 | + val escaped = java.util.regex.Pattern.quote(f) |
| 13 | + "(^|.*/)" + escaped + "$" |
| 14 | + } |
| 15 | + |
| 16 | + output.append("Uninitialized Read Analysis\n") |
| 17 | + output.append("=" * 60 + "\n\n") |
| 18 | + |
| 19 | + // Collect all methods, optionally filtered by file |
| 20 | + val allMethods = if (fileFilter.nonEmpty) { |
| 21 | + val pattern = pathBoundaryRegex(fileFilter) |
| 22 | + cpg.method.filter(m => m.file.name.headOption.exists(_.matches(pattern))).l |
| 23 | + } else { |
| 24 | + cpg.method.l |
| 25 | + } |
| 26 | + |
| 27 | + // Filter out compiler-generated / library methods (no body) |
| 28 | + val candidateMethods = allMethods.filter(m => m.block.nonEmpty).l |
| 29 | + |
| 30 | + if (candidateMethods.isEmpty) { |
| 31 | + output.append("No methods found in the codebase.\n") |
| 32 | + } else { |
| 33 | + output.append(s"Analyzing ${candidateMethods.size} method(s) for uninitialized reads...\n\n") |
| 34 | + |
| 35 | + // (file, methodName, varName, varType, declLine, useLine, useCode, confidence, reason) |
| 36 | + val issues = mutable.ListBuffer[(String, String, String, String, Int, Int, String, String, String)]() |
| 37 | + |
| 38 | + candidateMethods.foreach { method => |
| 39 | + val methName = method.name |
| 40 | + val methFile = method.file.name.headOption.getOrElse("unknown") |
| 41 | + |
| 42 | + // Collect local variable declarations |
| 43 | + val locals = method.local.l |
| 44 | + |
| 45 | + locals.foreach { local => |
| 46 | + val varName = local.name |
| 47 | + val varType = local.typeFullName |
| 48 | + val declLine = local.lineNumber.getOrElse(-1) |
| 49 | + |
| 50 | + // Skip: function parameters (they are always initialized by the caller) |
| 51 | + // Skip: static variables (zero-initialized by the C standard) |
| 52 | + // Skip: aggregate types that are typically zero-initialized with = {0} or memset |
| 53 | + // We approximate: skip anything whose type contains "*" (pointers initialised via |
| 54 | + // parameter), "[]" (array — tracked by stack_overflow), or "static" in the name. |
| 55 | + val isArray = varType.matches(".*\\[\\d*\\].*") |
| 56 | + if (!isArray) { |
| 57 | + |
| 58 | + // Find the first explicit assignment to this variable within the method. |
| 59 | + // An assignment is a Call node whose name is "<operator>.assignment" and |
| 60 | + // whose first argument (the LHS) mentions varName. |
| 61 | + val assignments = method.call |
| 62 | + .nameExact("<operator>.assignment") |
| 63 | + .filter(c => c.argument.order(1).l.headOption.map(_.code.trim).getOrElse("") == varName) |
| 64 | + .lineNumber.l.sorted |
| 65 | + |
| 66 | + val firstAssignLine: Option[Int] = assignments.headOption |
| 67 | + |
| 68 | + // Find ALL reads of this variable: Identifier nodes with this name that |
| 69 | + // are NOT on the LHS of an assignment. |
| 70 | + val allReads = method.ast.isIdentifier |
| 71 | + .nameExact(varName) |
| 72 | + .filter { ident => |
| 73 | + val parent = ident.astParent |
| 74 | + // Exclude if this identifier is the direct LHS of an assignment |
| 75 | + val isLhs = parent.isCall && |
| 76 | + parent.asInstanceOf[Call].name == "<operator>.assignment" && |
| 77 | + parent.asInstanceOf[Call].argument.order(1).l.headOption.exists(_.id == ident.id) |
| 78 | + !isLhs |
| 79 | + } |
| 80 | + .l |
| 81 | + |
| 82 | + // For each read, check whether it precedes the first assignment |
| 83 | + allReads.foreach { ident => |
| 84 | + val readLine = ident.lineNumber.getOrElse(-1) |
| 85 | + |
| 86 | + if (readLine > 0 && declLine > 0) { |
| 87 | + val isBeforeAssignment = firstAssignLine match { |
| 88 | + case None => true // never assigned → always uninitialized |
| 89 | + case Some(asLn) => readLine < asLn // read before the first assignment |
| 90 | + } |
| 91 | + |
| 92 | + if (isBeforeAssignment) { |
| 93 | + // Try to get the enclosing statement code for context |
| 94 | + val stmtCode = { |
| 95 | + val parentCode = ident.astParent.code.trim |
| 96 | + if (parentCode.length > 80) parentCode.take(77) + "..." else parentCode |
| 97 | + } |
| 98 | + |
| 99 | + // Confidence heuristic |
| 100 | + val (confidence, reason) = firstAssignLine match { |
| 101 | + case None => |
| 102 | + // Variable declared but never explicitly assigned — HIGH confidence |
| 103 | + ("HIGH", "Variable declared but never assigned before use") |
| 104 | + case Some(asLn) => |
| 105 | + // Read before first assignment — still HIGH if assignment is later in same block |
| 106 | + ("HIGH", s"Read at line $readLine precedes first assignment at line $asLn") |
| 107 | + } |
| 108 | + |
| 109 | + issues += ((methFile, methName, varName, varType, declLine, readLine, stmtCode, confidence, reason)) |
| 110 | + } |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | + } |
| 115 | + } |
| 116 | + |
| 117 | + // Deduplicate and sort by file + method + read line |
| 118 | + val dedupIssues = issues.toList.distinct.sortBy(i => (i._1, i._2, i._6)) |
| 119 | + |
| 120 | + if (dedupIssues.isEmpty) { |
| 121 | + output.append("No uninitialized read issues detected.\n") |
| 122 | + output.append("\nNote: This analysis looks for:\n") |
| 123 | + output.append(" - Local variables that are read before any explicit assignment\n") |
| 124 | + output.append(" - Local variables declared but never assigned (used with garbage value)\n") |
| 125 | + output.append("\nFiltered out:\n") |
| 126 | + output.append(" - Fixed-size array declarations (tracked by stack overflow analysis)\n") |
| 127 | + output.append(" - Identifier reads that are the direct LHS of an assignment\n") |
| 128 | + } else { |
| 129 | + output.append(s"Found ${dedupIssues.size} potential uninitialized read issue(s):\n\n") |
| 130 | + |
| 131 | + dedupIssues.take(maxResults).zipWithIndex.foreach { case ((file, meth, varName, varType, declLine, readLine, stmtCode, confidence, reason), idx) => |
| 132 | + output.append(s"--- Issue ${idx + 1} ---\n") |
| 133 | + output.append(s"Confidence: $confidence\n") |
| 134 | + output.append(s"CWE: CWE-457 (Use of Uninitialized Variable)\n") |
| 135 | + output.append(s"Variable: $varName ($varType)\n") |
| 136 | + output.append(s"Declared: $file:$declLine in $meth()\n") |
| 137 | + output.append(s"Read at: $file:$readLine\n") |
| 138 | + output.append(s"Context: $stmtCode\n") |
| 139 | + output.append(s"Reason: $reason\n") |
| 140 | + output.append("\n") |
| 141 | + } |
| 142 | + |
| 143 | + if (dedupIssues.size > maxResults) |
| 144 | + output.append(s"(Showing $maxResults of ${dedupIssues.size} issues. Use limit parameter to see more.)\n\n") |
| 145 | + |
| 146 | + output.append(s"Total: ${dedupIssues.size} potential uninitialized read issue(s) found\n") |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + "<codebadger_result>\n" + output.toString() + "</codebadger_result>" |
| 151 | +} |
0 commit comments