diff --git a/sast-engine/cmd/ci.go b/sast-engine/cmd/ci.go index 86b0b7c2..98c62f4b 100644 --- a/sast-engine/cmd/ci.go +++ b/sast-engine/cmd/ci.go @@ -221,7 +221,7 @@ Examples: }) logger.FinishProgress() if len(codeGraph.Nodes) == 0 { - logger.Progress("No source files found in project") + reportEmptyProject(logger, codeGraph.ProjectStats) } else { logger.Statistic("Code graph built: %d nodes", len(codeGraph.Nodes)) } diff --git a/sast-engine/cmd/empty_project.go b/sast-engine/cmd/empty_project.go new file mode 100644 index 00000000..e46df3d6 --- /dev/null +++ b/sast-engine/cmd/empty_project.go @@ -0,0 +1,56 @@ +package cmd + +import ( + "fmt" + "strings" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph" + "github.com/shivasurya/code-pathfinder/sast-engine/output" +) + +// emptyProjectTopLanguages caps the "Detected:" line so a polyglot repo with +// twenty extensions doesn't produce an unreadable scroll of language counts. +const emptyProjectTopLanguages = 5 + +// reportEmptyProject prints a user-facing explanation of why the scan ended +// up with zero analyzable files. Always visible (verbosity-independent) so a +// user pointed at the wrong directory or running pathfinder on a repo we +// don't support yet sees something concrete instead of a successful but +// silent run. +func reportEmptyProject(logger *output.Logger, stats graph.ProjectStats) { + logger.Info(emptyProjectMessage(stats)) +} + +// emptyProjectMessage formats the multi-line explanation. Split out from +// reportEmptyProject so unit tests can assert the exact output without +// piping a logger through a buffer. +func emptyProjectMessage(stats graph.ProjectStats) string { + var sb strings.Builder + unsupported := stats.UnsupportedFileCount() + + switch { + case stats.TotalFiles == 0: + // Walk found nothing: empty directory, or everything was filtered + // out by skip-dirs (vendor/, node_modules/, ...) and --exclude. + sb.WriteString("No files to analyze. Project directory is empty (after applying skip and exclude rules).") + + case unsupported > 0: + // Mixed or all-unsupported: this is the pathfinder-api case. + // "Scanned 0 of 47 files (47 unsupported)." + fmt.Fprintf(&sb, "Scanned 0 of %d files (%d unsupported).\n", stats.TotalFiles, unsupported) + if summary := stats.UnsupportedSummary(emptyProjectTopLanguages); summary != "" { + fmt.Fprintf(&sb, "Detected: %s\n", summary) + } + fmt.Fprintf(&sb, "Supported: %s\n", strings.Join(graph.SupportedLanguages(), ", ")) + sb.WriteString("\nNo files to analyze. (Pathfinder doesn't analyze these languages yet.)") + + default: + // Files existed and were all in supported languages, but the + // parsers still produced zero graph nodes (e.g. every file + // failed to parse, or the only files were tests excluded via + // --skip-tests). Surface the count so the user knows the walk + // did happen. + fmt.Fprintf(&sb, "Scanned %d file(s) but no analyzable content was produced.", stats.TotalFiles) + } + return sb.String() +} diff --git a/sast-engine/cmd/empty_project_test.go b/sast-engine/cmd/empty_project_test.go new file mode 100644 index 00000000..e1ed9fd4 --- /dev/null +++ b/sast-engine/cmd/empty_project_test.go @@ -0,0 +1,103 @@ +package cmd + +import ( + "bytes" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph" + "github.com/shivasurya/code-pathfinder/sast-engine/output" +) + +func TestEmptyProjectMessage_TotallyEmptyDir(t *testing.T) { + msg := emptyProjectMessage(graph.ProjectStats{}) + assert.Equal(t, "No files to analyze. Project directory is empty (after applying skip and exclude rules).", msg) +} + +func TestEmptyProjectMessage_OnlySupportedFilesButZeroNodes(t *testing.T) { + // Files existed, all in supported languages, but graph ended empty + // (e.g. every file failed to parse). Headline should reflect that + // the walk did happen, distinct from the "empty directory" case. + stats := graph.ProjectStats{ + TotalFiles: 3, + ScannedFiles: 3, + ByLanguage: map[string]int{"Java": 3}, + } + msg := emptyProjectMessage(stats) + assert.Equal(t, "Scanned 3 file(s) but no analyzable content was produced.", msg) +} + +func TestEmptyProjectMessage_OnlyUnsupportedFiles_MatchesDesignSpec(t *testing.T) { + // Matches the example from the brainstorm: + // Scanned 0 of 47 files (47 unsupported). + // Detected: TypeScript (32), JavaScript (8), JSON (5), Markdown (2) + // Supported: Java, Python, Go, C/C++, Dockerfile, docker-compose + // + // No files to analyze. (Pathfinder doesn't analyze these languages yet.) + stats := graph.ProjectStats{ + TotalFiles: 47, + ScannedFiles: 0, + ByLanguage: map[string]int{ + "TypeScript": 32, + "JavaScript": 8, + "JSON": 5, + "Markdown": 2, + }, + } + msg := emptyProjectMessage(stats) + want := "Scanned 0 of 47 files (47 unsupported).\n" + + "Detected: TypeScript (32), JavaScript (8), JSON (5), Markdown (2)\n" + + "Supported: Java, Python, Go, C/C++, Dockerfile, docker-compose\n" + + "\nNo files to analyze. (Pathfinder doesn't analyze these languages yet.)" + assert.Equal(t, want, msg) +} + +func TestEmptyProjectMessage_MixedButGraphEmpty(t *testing.T) { + // Walk found 50 files: 5 supported (parsed but produced 0 nodes) and 45 + // unsupported. Treated as "files unsupported > 0" branch since the user + // still has a noisy unsupported population to know about. + stats := graph.ProjectStats{ + TotalFiles: 50, + ScannedFiles: 5, + ByLanguage: map[string]int{ + "Java": 5, + "TypeScript": 45, + }, + } + msg := emptyProjectMessage(stats) + assert.Contains(t, msg, "Scanned 0 of 50 files (45 unsupported).") + assert.Contains(t, msg, "Detected: TypeScript (45)") + assert.Contains(t, msg, "Supported: Java, Python, Go, C/C++, Dockerfile, docker-compose") +} + +func TestEmptyProjectMessage_FilesExistButNoneCategorised(t *testing.T) { + // Walk saw N files but every one was a binary blob or had an + // unrecognised extension (languageOf returns ""). ScannedFiles is 0 + // and UnsupportedFileCount is also 0 because nothing got bucketed. + // Falls through to the default branch, which still surfaces that the + // walk happened. + stats := graph.ProjectStats{TotalFiles: 3} + msg := emptyProjectMessage(stats) + assert.Equal(t, "Scanned 3 file(s) but no analyzable content was produced.", msg) +} + +func TestReportEmptyProject_AlwaysVisibleAtDefaultVerbosity(t *testing.T) { + // Logger.Info must print regardless of verbosity (unlike Progress + // which is gated to verbose+). Verify by piping a logger through a + // buffer at the lowest verbosity and checking the output is non-empty. + var buf bytes.Buffer + logger := output.NewLoggerWithWriter(output.VerbosityDefault, &buf) + stats := graph.ProjectStats{ + TotalFiles: 1, + ScannedFiles: 0, + ByLanguage: map[string]int{"TypeScript": 1}, + } + reportEmptyProject(logger, stats) + got := buf.String() + assert.True(t, strings.Contains(got, "Scanned 0 of 1 files"), + "reportEmptyProject must write at default verbosity, got: %q", got) + assert.True(t, strings.Contains(got, "Supported: Java"), + "reportEmptyProject must include Supported line, got: %q", got) +} diff --git a/sast-engine/cmd/scan.go b/sast-engine/cmd/scan.go index 309ff171..7c4899ba 100644 --- a/sast-engine/cmd/scan.go +++ b/sast-engine/cmd/scan.go @@ -193,13 +193,16 @@ Examples: }) logger.FinishProgress() if len(codeGraph.Nodes) == 0 { - analytics.ReportEventWithProperties(analytics.ScanFailed, map[string]any{ - "error_type": "empty_project", - "phase": "graph_building", - }) - return fmt.Errorf("no source files found in project") + // No supported source under projectPath. Fall through to the + // formatter step so a valid (empty) output document is still + // written: downstream consumers like cpf-executor read the + // JSON regardless of finding count, and treating this as a + // hard error misclassifies "repo we don't analyze yet" as a + // scanner failure. + reportEmptyProject(logger, codeGraph.ProjectStats) + } else { + logger.Statistic("Code graph built: %d nodes", len(codeGraph.Nodes)) } - logger.Statistic("Code graph built: %d nodes", len(codeGraph.Nodes)) // Step 1.5: Execute container rules if Docker/Compose files are present var containerDetections []*dsl.EnrichedDetection diff --git a/sast-engine/graph/graph_test.go b/sast-engine/graph/graph_test.go index 7c519ebd..852bd3d9 100644 --- a/sast-engine/graph/graph_test.go +++ b/sast-engine/graph/graph_test.go @@ -226,7 +226,7 @@ func TestGetFiles(t *testing.T) { } // Run getFiles - files, err := getFiles(tempDir, nil) + files, _, err := getFiles(tempDir, nil) if err != nil { t.Fatalf("getFiles returned an error: %v", err) } @@ -258,7 +258,7 @@ func TestGetFilesEmptyDirectory(t *testing.T) { } defer os.RemoveAll(tempDir) - files, err := getFiles(tempDir, nil) + files, _, err := getFiles(tempDir, nil) if err != nil { t.Fatalf("getFiles returned an error: %v", err) } @@ -270,7 +270,7 @@ func TestGetFilesEmptyDirectory(t *testing.T) { func TestGetFilesNonExistentDirectory(t *testing.T) { nonExistentDir := "/path/to/non/existent/directory" - _, err := getFiles(nonExistentDir, nil) + _, _, err := getFiles(nonExistentDir, nil) if err == nil { t.Error("Expected an error for non-existent directory, but got nil") } @@ -297,7 +297,7 @@ func TestGetFilesWithSymlinks(t *testing.T) { t.Fatalf("Failed to create symlink: %v", err) } - files, err := getFiles(tempDir, nil) + files, _, err := getFiles(tempDir, nil) if err != nil { t.Fatalf("getFiles returned an error: %v", err) } @@ -992,7 +992,7 @@ func TestGetFilesMixedLanguages(t *testing.T) { } // Run getFiles - files, err := getFiles(tempDir, nil) + files, _, err := getFiles(tempDir, nil) if err != nil { t.Fatalf("getFiles returned an error: %v", err) } @@ -1412,7 +1412,7 @@ func TestGetFilesIncludesGo(t *testing.T) { } } - files, err := getFiles(tempDir, nil) + files, _, err := getFiles(tempDir, nil) if err != nil { t.Fatalf("getFiles returned an error: %v", err) } @@ -1468,7 +1468,7 @@ func TestGetFilesSkipsVendor(t *testing.T) { t.Fatalf("Failed to create vendor file: %v", err) } - files, err := getFiles(tempDir, nil) + files, _, err := getFiles(tempDir, nil) if err != nil { t.Fatalf("getFiles returned an error: %v", err) } @@ -1499,7 +1499,7 @@ func TestGetFilesSkipsTestdata(t *testing.T) { t.Fatalf("Failed to create testdata file: %v", err) } - files, err := getFiles(tempDir, nil) + files, _, err := getFiles(tempDir, nil) if err != nil { t.Fatalf("getFiles returned an error: %v", err) } @@ -1530,7 +1530,7 @@ func TestGetFilesSkipsUnderscoreDirs(t *testing.T) { t.Fatalf("Failed to create underscore dir file: %v", err) } - files, err := getFiles(tempDir, nil) + files, _, err := getFiles(tempDir, nil) if err != nil { t.Fatalf("getFiles returned an error: %v", err) } diff --git a/sast-engine/graph/initialize.go b/sast-engine/graph/initialize.go index 008ff5c6..b51b6a23 100644 --- a/sast-engine/graph/initialize.go +++ b/sast-engine/graph/initialize.go @@ -38,7 +38,8 @@ func Initialize(directory string, callbacks *ProgressCallbacks) *CodeGraph { if callbacks != nil { excludePatterns = callbacks.ExcludePatterns } - files, err := getFiles(directory, excludePatterns) + files, stats, err := getFiles(directory, excludePatterns) + codeGraph.ProjectStats = stats if err != nil { //nolint:all Log("Directory not found:", err) diff --git a/sast-engine/graph/project_stats.go b/sast-engine/graph/project_stats.go new file mode 100644 index 00000000..8a567919 --- /dev/null +++ b/sast-engine/graph/project_stats.go @@ -0,0 +1,197 @@ +package graph + +import ( + "fmt" + "path/filepath" + "sort" + "strings" +) + +// ProjectStats summarises the file population that a single getFiles walk +// observed. It is exposed on CodeGraph so callers (cmd/scan, cmd/ci) can +// render a meaningful empty-state message when the graph ended up with zero +// parseable nodes, instead of exiting with a generic "no source files" error. +// +// Counts only include files that survived the always-skipped directories +// (vendor, node_modules, .git, etc.) and the user's --exclude prefixes; the +// goal is files the user expected pathfinder to look at, not every regular +// file on disk. +type ProjectStats struct { + // TotalFiles is the count of every regular file walked, after applying + // the skip-directory list and --exclude patterns. + TotalFiles int + // ScannedFiles is the count of files routed to one of the supported + // tree-sitter parsers. Equals len(files) returned from getFiles. + ScannedFiles int + // ByLanguage is a language-display-name → file-count map covering every + // file walked, both supported and unsupported. Files in formats + // pathfinder cannot classify (binaries, lock files, dotfiles) are + // omitted entirely rather than bucketed as "Other"; this keeps the + // downstream "Detected: …" line honest about what's actually source. + ByLanguage map[string]int +} + +// supportedLanguageNames lists the display names of the languages this build +// of pathfinder can analyse, in stable presentation order. The values must +// match the keys produced by languageOf for supported files so the +// "Detected:" / "Supported:" split renders consistently. +var supportedLanguageNames = []string{ + "Java", "Python", "Go", "C/C++", "Dockerfile", "docker-compose", +} + +// SupportedLanguages returns a copy of the supported-language display names +// for use in user-facing messages. +func SupportedLanguages() []string { + out := make([]string, len(supportedLanguageNames)) + copy(out, supportedLanguageNames) + return out +} + +// recordFile increments TotalFiles, and ScannedFiles when supported is true, +// and bumps the per-language count if the file maps to a known language. +func (s *ProjectStats) recordFile(path string, supported bool) { + if s.ByLanguage == nil { + s.ByLanguage = make(map[string]int) + } + s.TotalFiles++ + if supported { + s.ScannedFiles++ + } + if lang := languageOf(path); lang != "" { + s.ByLanguage[lang]++ + } +} + +// languageOf returns the human-readable language name for path, or "" if the +// file should not be counted at all (binary blobs, lock files, anything we +// have no opinion about). Both supported and unsupported languages return a +// non-empty name so the "Detected:" line can show every language present. +func languageOf(path string) string { + base := strings.ToLower(filepath.Base(path)) + ext := filepath.Ext(base) + + if strings.HasPrefix(base, "dockerfile") { + return "Dockerfile" + } + if strings.Contains(base, "docker-compose") && (ext == ".yml" || ext == ".yaml") { + return "docker-compose" + } + + switch ext { + case ".java": + return "Java" + case ".py", ".pyi": + return "Python" + case ".go": + return "Go" + case ".c", ".h": + return "C/C++" + case ".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx": + return "C/C++" + case ".ts", ".tsx": + return "TypeScript" + case ".js", ".jsx", ".mjs", ".cjs": + return "JavaScript" + case ".rb": + return "Ruby" + case ".php": + return "PHP" + case ".rs": + return "Rust" + case ".kt", ".kts": + return "Kotlin" + case ".swift": + return "Swift" + case ".scala": + return "Scala" + case ".cs": + return "C#" + case ".sh", ".bash", ".zsh": + return "Shell" + case ".yml", ".yaml": + return "YAML" + case ".json": + return "JSON" + case ".xml": + return "XML" + case ".toml": + return "TOML" + case ".md", ".markdown": + return "Markdown" + case ".html", ".htm": + return "HTML" + case ".css", ".scss", ".sass", ".less": + return "CSS" + case ".sql": + return "SQL" + case ".proto": + return "Protobuf" + case ".tf": + return "Terraform" + } + return "" +} + +// UnsupportedFileCount returns the total number of detected files whose +// language is not in the supported set. Used for the headline number in the +// "Scanned 0 of N files (M unsupported)" line. +func (s ProjectStats) UnsupportedFileCount() int { + if len(s.ByLanguage) == 0 { + return 0 + } + supported := supportedSet() + var n int + for name, count := range s.ByLanguage { + if _, ok := supported[name]; !ok { + n += count + } + } + return n +} + +// UnsupportedSummary returns the top-N unsupported languages formatted as +// "TypeScript (32), JavaScript (8), JSON (5), Markdown (2)". Returns "" if +// no unsupported files were detected. A topN of 0 or less means unbounded. +// Ties are broken by language name (alphabetical) for deterministic output. +func (s ProjectStats) UnsupportedSummary(topN int) string { + if len(s.ByLanguage) == 0 { + return "" + } + supported := supportedSet() + type pair struct { + name string + count int + } + rows := make([]pair, 0, len(s.ByLanguage)) + for name, count := range s.ByLanguage { + if _, ok := supported[name]; ok { + continue + } + rows = append(rows, pair{name, count}) + } + if len(rows) == 0 { + return "" + } + sort.SliceStable(rows, func(i, j int) bool { + if rows[i].count != rows[j].count { + return rows[i].count > rows[j].count + } + return rows[i].name < rows[j].name + }) + if topN > 0 && len(rows) > topN { + rows = rows[:topN] + } + parts := make([]string, 0, len(rows)) + for _, r := range rows { + parts = append(parts, fmt.Sprintf("%s (%d)", r.name, r.count)) + } + return strings.Join(parts, ", ") +} + +func supportedSet() map[string]struct{} { + m := make(map[string]struct{}, len(supportedLanguageNames)) + for _, n := range supportedLanguageNames { + m[n] = struct{}{} + } + return m +} diff --git a/sast-engine/graph/project_stats_test.go b/sast-engine/graph/project_stats_test.go new file mode 100644 index 00000000..90550b00 --- /dev/null +++ b/sast-engine/graph/project_stats_test.go @@ -0,0 +1,238 @@ +package graph + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// --- languageOf ----------------------------------------------------------- + +func TestLanguageOf_SupportedExtensions(t *testing.T) { + cases := map[string]string{ + "/repo/Main.java": "Java", + "/repo/app.py": "Python", + "/repo/types.pyi": "Python", + "/repo/main.go": "Go", + "/repo/main.c": "C/C++", + "/repo/main.h": "C/C++", + "/repo/main.cpp": "C/C++", + "/repo/main.cc": "C/C++", + "/repo/main.cxx": "C/C++", + "/repo/main.hpp": "C/C++", + "/repo/main.hh": "C/C++", + "/repo/main.hxx": "C/C++", + "/repo/Dockerfile": "Dockerfile", + "/repo/Dockerfile.dev": "Dockerfile", + "/repo/dockerfile": "Dockerfile", + "/repo/docker-compose.yml": "docker-compose", + "/repo/docker-compose.yaml": "docker-compose", + "/repo/docker-compose.prod.yml": "docker-compose", + } + for path, want := range cases { + t.Run(path, func(t *testing.T) { + assert.Equal(t, want, languageOf(path)) + }) + } +} + +func TestLanguageOf_UnsupportedExtensions(t *testing.T) { + cases := map[string]string{ + "/repo/app.ts": "TypeScript", + "/repo/app.tsx": "TypeScript", + "/repo/app.js": "JavaScript", + "/repo/app.jsx": "JavaScript", + "/repo/app.mjs": "JavaScript", + "/repo/app.cjs": "JavaScript", + "/repo/app.rb": "Ruby", + "/repo/app.php": "PHP", + "/repo/app.rs": "Rust", + "/repo/App.kt": "Kotlin", + "/repo/build.kts": "Kotlin", + "/repo/App.swift": "Swift", + "/repo/App.scala": "Scala", + "/repo/App.cs": "C#", + "/repo/run.sh": "Shell", + "/repo/run.bash": "Shell", + "/repo/run.zsh": "Shell", + "/repo/conf.yaml": "YAML", + "/repo/conf.yml": "YAML", + "/repo/conf.json": "JSON", + "/repo/conf.xml": "XML", + "/repo/conf.toml": "TOML", + "/repo/README.md": "Markdown", + "/repo/index.html": "HTML", + "/repo/index.htm": "HTML", + "/repo/styles.css": "CSS", + "/repo/styles.scss": "CSS", + "/repo/q.sql": "SQL", + "/repo/api.proto": "Protobuf", + "/repo/main.tf": "Terraform", + } + for path, want := range cases { + t.Run(path, func(t *testing.T) { + assert.Equal(t, want, languageOf(path)) + }) + } +} + +func TestLanguageOf_Uncounted(t *testing.T) { + // Extensions we deliberately don't bucket so the "Detected:" line stays + // honest about what's actually source code. + cases := []string{ + "/repo/binary", // no extension + "/repo/image.png", // binary + "/repo/package-lock.json", // technically JSON; still rendered as JSON + "/repo/Makefile", // no opinion + "/repo/.gitignore", // dotfile, no opinion + } + for _, path := range cases { + t.Run(path, func(t *testing.T) { + got := languageOf(path) + // "package-lock.json" is intentionally still JSON; everything + // else is empty. + if path == "/repo/package-lock.json" { + assert.Equal(t, "JSON", got) + } else { + assert.Equal(t, "", got) + } + }) + } +} + +func TestLanguageOf_DockerfileMustBePrefix(t *testing.T) { + // "Dockerfile" prefix match — "Dockerfilexyz" still matches. That's + // fine because the actual file walker only feeds in real Dockerfile + // variants; the test pins the rule explicitly so a future refactor + // doesn't tighten it unintentionally without us noticing. + assert.Equal(t, "Dockerfile", languageOf("/repo/dockerfilexyz")) +} + +func TestLanguageOf_DockerComposeRequiresYamlExt(t *testing.T) { + // "docker-compose.json" or "docker-compose.txt" should NOT match the + // supported docker-compose bucket. + assert.Equal(t, "JSON", languageOf("/repo/docker-compose.json")) + assert.Equal(t, "", languageOf("/repo/docker-compose.txt")) +} + +// --- SupportedLanguages ---------------------------------------------------- + +func TestSupportedLanguages_ReturnsCopy(t *testing.T) { + a := SupportedLanguages() + b := SupportedLanguages() + assert.Equal(t, a, b) + // Mutate a; b should be unaffected. + a[0] = "MUTATED" + assert.NotEqual(t, a[0], b[0], "SupportedLanguages must return a defensive copy") +} + +func TestSupportedLanguages_Contents(t *testing.T) { + got := SupportedLanguages() + assert.Equal(t, []string{"Java", "Python", "Go", "C/C++", "Dockerfile", "docker-compose"}, got) +} + +// --- ProjectStats.recordFile ----------------------------------------------- + +func TestProjectStats_RecordFile_SupportedAndUnsupported(t *testing.T) { + var s ProjectStats + s.recordFile("/repo/Main.java", true) + s.recordFile("/repo/app.ts", false) + s.recordFile("/repo/conf.json", false) + s.recordFile("/repo/binary", false) // not counted into ByLanguage + + assert.Equal(t, 4, s.TotalFiles) + assert.Equal(t, 1, s.ScannedFiles) + assert.Equal(t, map[string]int{ + "Java": 1, + "TypeScript": 1, + "JSON": 1, + }, s.ByLanguage) +} + +func TestProjectStats_RecordFile_NilMapInitialised(t *testing.T) { + var s ProjectStats + assert.Nil(t, s.ByLanguage) + s.recordFile("/repo/foo.java", true) + assert.NotNil(t, s.ByLanguage, "first recordFile must initialise the map") +} + +// --- ProjectStats.UnsupportedFileCount ------------------------------------ + +func TestProjectStats_UnsupportedFileCount_Empty(t *testing.T) { + var s ProjectStats + assert.Equal(t, 0, s.UnsupportedFileCount()) +} + +func TestProjectStats_UnsupportedFileCount_AllSupported(t *testing.T) { + s := ProjectStats{ByLanguage: map[string]int{"Java": 5, "Python": 3, "Go": 2}} + assert.Equal(t, 0, s.UnsupportedFileCount()) +} + +func TestProjectStats_UnsupportedFileCount_MixedAndUnsupported(t *testing.T) { + s := ProjectStats{ByLanguage: map[string]int{ + "Java": 5, + "TypeScript": 32, + "JavaScript": 8, + "JSON": 5, + }} + assert.Equal(t, 45, s.UnsupportedFileCount()) +} + +// --- ProjectStats.UnsupportedSummary --------------------------------------- + +func TestProjectStats_UnsupportedSummary_Empty(t *testing.T) { + var s ProjectStats + assert.Equal(t, "", s.UnsupportedSummary(5)) +} + +func TestProjectStats_UnsupportedSummary_OnlySupported(t *testing.T) { + s := ProjectStats{ByLanguage: map[string]int{"Java": 5}} + assert.Equal(t, "", s.UnsupportedSummary(5)) +} + +func TestProjectStats_UnsupportedSummary_SortedByCountDesc(t *testing.T) { + s := ProjectStats{ByLanguage: map[string]int{ + "TypeScript": 32, + "JavaScript": 8, + "JSON": 5, + "Markdown": 2, + }} + assert.Equal(t, "TypeScript (32), JavaScript (8), JSON (5), Markdown (2)", s.UnsupportedSummary(0)) +} + +func TestProjectStats_UnsupportedSummary_TopNCap(t *testing.T) { + s := ProjectStats{ByLanguage: map[string]int{ + "TypeScript": 32, + "JavaScript": 8, + "JSON": 5, + "Markdown": 2, + "YAML": 1, + }} + assert.Equal(t, "TypeScript (32), JavaScript (8), JSON (5)", s.UnsupportedSummary(3)) +} + +func TestProjectStats_UnsupportedSummary_NegativeTopNMeansUnbounded(t *testing.T) { + s := ProjectStats{ByLanguage: map[string]int{"TypeScript": 1, "Ruby": 1, "Rust": 1}} + // 3 entries, all unsupported, topN <= 0 → all shown + got := s.UnsupportedSummary(-1) + assert.Contains(t, got, "TypeScript (1)") + assert.Contains(t, got, "Ruby (1)") + assert.Contains(t, got, "Rust (1)") +} + +func TestProjectStats_UnsupportedSummary_TiesBrokenAlphabetically(t *testing.T) { + s := ProjectStats{ByLanguage: map[string]int{ + "Ruby": 3, + "Rust": 3, + "TypeScript": 3, + }} + assert.Equal(t, "Ruby (3), Rust (3), TypeScript (3)", s.UnsupportedSummary(0)) +} + +func TestProjectStats_UnsupportedSummary_ExcludesSupported(t *testing.T) { + s := ProjectStats{ByLanguage: map[string]int{ + "Java": 100, // supported, must not appear + "TypeScript": 1, + }} + assert.Equal(t, "TypeScript (1)", s.UnsupportedSummary(0)) +} diff --git a/sast-engine/graph/types.go b/sast-engine/graph/types.go index a3e2ae31..763660c0 100644 --- a/sast-engine/graph/types.go +++ b/sast-engine/graph/types.go @@ -84,4 +84,8 @@ type Edge struct { type CodeGraph struct { Nodes map[string]*Node Edges []*Edge + // ProjectStats summarises what the file walk saw. Set by Initialize. + // Callers read it to render meaningful empty-state messages when + // len(Nodes) == 0 (e.g. "Detected: TypeScript (32), JavaScript (8)"). + ProjectStats ProjectStats } diff --git a/sast-engine/graph/utils.go b/sast-engine/graph/utils.go index bce445cc..4d872a46 100644 --- a/sast-engine/graph/utils.go +++ b/sast-engine/graph/utils.go @@ -252,14 +252,20 @@ func extractMethodName(node *sitter.Node, sourceCode []byte, filepath string) (s return methodName, methodID } -// getFiles walks through a directory and returns all source files (Java, Python, Go, C/C++, Dockerfile, docker-compose). +// getFiles walks through a directory and returns all source files (Java, Python, Go, C/C++, Dockerfile, docker-compose) +// along with a ProjectStats summary of every file the walk observed (both +// supported and unsupported). +// // It skips vendor/, testdata/, node_modules/, .git/, common C/C++ build artifact directories, -// directories starting with "_", and any path covered by excludePatterns. +// directories starting with "_", and any path covered by excludePatterns. Files inside skipped +// directories are not counted in ProjectStats either: the stats reflect "files the user expected +// pathfinder to look at," not every regular file on disk. // // excludePatterns is a list of normalized, repo-relative path prefixes (no leading or trailing slash). // A path is skipped when its repo-relative form starts with "/", or equals the prefix exactly. -func getFiles(directory string, excludePatterns []string) ([]string, error) { +func getFiles(directory string, excludePatterns []string) ([]string, ProjectStats, error) { var files []string + var stats ProjectStats err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -298,24 +304,31 @@ func getFiles(directory string, excludePatterns []string) ([]string, error) { base := filepath.Base(path) baseLower := strings.ToLower(base) + var supported bool switch { case ext == ".java" || ext == ".py" || ext == ".go": files = append(files, path) + supported = true case ext == ".c" || ext == ".h": files = append(files, path) + supported = true case ext == ".cpp" || ext == ".cc" || ext == ".cxx" || ext == ".hpp" || ext == ".hh" || ext == ".hxx": files = append(files, path) + supported = true case strings.HasPrefix(baseLower, "dockerfile"): // Match Dockerfile, Dockerfile.dev, dockerfile, etc. files = append(files, path) + supported = true case strings.Contains(baseLower, "docker-compose") && (ext == ".yml" || ext == ".yaml"): // Match docker-compose.yml, docker-compose.yaml, etc. files = append(files, path) + supported = true } + stats.recordFile(path, supported) return nil }) - return files, err + return files, stats, err } // isExcludedPath reports whether relPath (forward-slash, repo-relative) is covered diff --git a/sast-engine/graph/utils_test.go b/sast-engine/graph/utils_test.go index 18ac9e97..ebb34944 100644 --- a/sast-engine/graph/utils_test.go +++ b/sast-engine/graph/utils_test.go @@ -233,7 +233,7 @@ func TestGetFilesComprehensive(t *testing.T) { } // Test getFiles - files, err := getFiles(tmpDir, nil) + files, _, err := getFiles(tmpDir, nil) if err != nil { t.Fatalf("getFiles failed: %v", err) } @@ -298,7 +298,7 @@ func TestGetFilesIncludesCAndCpp(t *testing.T) { } } - got, err := getFiles(dir, nil) + got, _, err := getFiles(dir, nil) if err != nil { t.Fatalf("getFiles: %v", err) } @@ -361,7 +361,7 @@ func TestGetFilesErrors(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - files, err := getFiles(tt.directory, nil) + files, _, err := getFiles(tt.directory, nil) if tt.wantError && err == nil { t.Error("Expected error but got none") @@ -769,7 +769,7 @@ func TestGetFilesWithExcludePatterns(t *testing.T) { } } - got, err := getFiles(dir, []string{"rules"}) + got, _, err := getFiles(dir, []string{"rules"}) if err != nil { t.Fatalf("getFiles: %v", err) } @@ -815,7 +815,7 @@ func TestGetFilesExcludeIndividualFile(t *testing.T) { } } - got, err := getFiles(dir, []string{"src/skip_me.py"}) + got, _, err := getFiles(dir, []string{"src/skip_me.py"}) if err != nil { t.Fatalf("getFiles: %v", err) } @@ -832,6 +832,132 @@ func TestGetFilesExcludeIndividualFile(t *testing.T) { } } +// --- ProjectStats integration coverage on getFiles ----------------------- + +// TestGetFiles_ProjectStats_OnlyUnsupported exercises the empty-graph +// branch's input: a directory where every regular file is in a language +// pathfinder doesn't analyze. ScannedFiles must be 0, TotalFiles must equal +// the number of real files seen, ByLanguage must bucket them. +func TestGetFiles_ProjectStats_OnlyUnsupported(t *testing.T) { + dir := t.TempDir() + files := []string{ + "src/app.ts", + "src/util.ts", + "src/index.js", + "README.md", + "package.json", + } + for _, rel := range files { + full := filepath.Join(dir, rel) + if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(full, []byte("// stub\n"), 0644); err != nil { + t.Fatalf("write: %v", err) + } + } + + got, stats, err := getFiles(dir, nil) + if err != nil { + t.Fatalf("getFiles: %v", err) + } + if len(got) != 0 { + t.Errorf("expected zero scanned files, got %d (%v)", len(got), got) + } + if stats.ScannedFiles != 0 { + t.Errorf("ScannedFiles = %d, want 0", stats.ScannedFiles) + } + if stats.TotalFiles != len(files) { + t.Errorf("TotalFiles = %d, want %d", stats.TotalFiles, len(files)) + } + if got, want := stats.ByLanguage["TypeScript"], 2; got != want { + t.Errorf("TypeScript = %d, want %d", got, want) + } + if got, want := stats.ByLanguage["JavaScript"], 1; got != want { + t.Errorf("JavaScript = %d, want %d", got, want) + } + if got, want := stats.ByLanguage["Markdown"], 1; got != want { + t.Errorf("Markdown = %d, want %d", got, want) + } + if got, want := stats.ByLanguage["JSON"], 1; got != want { + t.Errorf("JSON = %d, want %d", got, want) + } +} + +// TestGetFiles_ProjectStats_Mixed verifies stats when supported and +// unsupported files coexist. ScannedFiles should reflect only supported. +func TestGetFiles_ProjectStats_Mixed(t *testing.T) { + dir := t.TempDir() + files := []string{ + "Main.java", // supported + "app/api.py", // supported + "web/index.ts", // unsupported + "web/styles.css", // unsupported + } + for _, rel := range files { + full := filepath.Join(dir, rel) + if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(full, []byte("// stub\n"), 0644); err != nil { + t.Fatalf("write: %v", err) + } + } + got, stats, err := getFiles(dir, nil) + if err != nil { + t.Fatalf("getFiles: %v", err) + } + if len(got) != 2 { + t.Errorf("len(got) = %d, want 2", len(got)) + } + if stats.ScannedFiles != 2 { + t.Errorf("ScannedFiles = %d, want 2", stats.ScannedFiles) + } + if stats.TotalFiles != 4 { + t.Errorf("TotalFiles = %d, want 4", stats.TotalFiles) + } + if stats.UnsupportedFileCount() != 2 { + t.Errorf("UnsupportedFileCount = %d, want 2", stats.UnsupportedFileCount()) + } +} + +// TestGetFiles_ProjectStats_SkipDirsNotCounted confirms files inside the +// always-skipped directories (node_modules, vendor, .git, ...) do NOT +// inflate the stats, even though the walker descends into the parent path. +func TestGetFiles_ProjectStats_SkipDirsNotCounted(t *testing.T) { + dir := t.TempDir() + for _, rel := range []string{ + "src/app.ts", + "node_modules/foo/index.js", + "node_modules/foo/package.json", + "vendor/bar/lib.go", + ".git/HEAD", + } { + full := filepath.Join(dir, rel) + if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(full, []byte("stub"), 0644); err != nil { + t.Fatalf("write: %v", err) + } + } + _, stats, err := getFiles(dir, nil) + if err != nil { + t.Fatalf("getFiles: %v", err) + } + // Only src/app.ts should be counted. + if stats.TotalFiles != 1 { + t.Errorf("TotalFiles = %d, want 1 (everything else lives under skip dirs)", stats.TotalFiles) + } + if stats.ByLanguage["TypeScript"] != 1 { + t.Errorf("TypeScript = %d, want 1", stats.ByLanguage["TypeScript"]) + } + if _, ok := stats.ByLanguage["JavaScript"]; ok { + t.Error("JavaScript must not be counted (lives in node_modules)") + } +} + + func BenchmarkGenerateMethodID(b *testing.B) { params := []string{"int", "String", "Object"} for i := 0; i < b.N; i++ { diff --git a/sast-engine/output/logger.go b/sast-engine/output/logger.go index 025f9603..fa12e23b 100644 --- a/sast-engine/output/logger.go +++ b/sast-engine/output/logger.go @@ -76,6 +76,13 @@ func (l *Logger) Debug(format string, args ...any) { } } +// Info logs informational messages that should always be visible to the +// user, regardless of verbosity. No prefix. Use for high-signal status that +// isn't a warning (e.g. the "no source files found" empty-project block). +func (l *Logger) Info(format string, args ...any) { + fmt.Fprintf(l.writer, format+"\n", args...) +} + // Warning logs warnings (always shown). func (l *Logger) Warning(format string, args ...any) { fmt.Fprintf(l.writer, "Warning: %s\n", fmt.Sprintf(format, args...)) diff --git a/sast-engine/output/logger_test.go b/sast-engine/output/logger_test.go index 045a8219..190f69f5 100644 --- a/sast-engine/output/logger_test.go +++ b/sast-engine/output/logger_test.go @@ -131,6 +131,22 @@ func TestLoggerWarningAlwaysShown(t *testing.T) { } } +func TestLoggerInfoAlwaysShown(t *testing.T) { + // Info is unconditional output (used for empty-project explanations + // where the user must see the message regardless of verbosity). Verify + // it prints at every level, with no "Info:" or other prefix. + verbosities := []VerbosityLevel{VerbosityDefault, VerbosityVerbose, VerbosityDebug} + for _, v := range verbosities { + var buf bytes.Buffer + l := NewLoggerWithWriter(v, &buf) + l.Info("hello %s", "world") + got := buf.String() + if got != "hello world\n" { + t.Errorf("verbosity %v: expected %q, got %q", v, "hello world\n", got) + } + } +} + func TestLoggerErrorAlwaysShown(t *testing.T) { verbosities := []VerbosityLevel{VerbosityDefault, VerbosityVerbose, VerbosityDebug}