Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sast-engine/cmd/ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ Examples:
debug, _ := cmd.Flags().GetBool("debug")
failOnStr, _ := cmd.Flags().GetString("fail-on")
skipTests, _ := cmd.Flags().GetBool("skip-tests")
rawExcludes, _ := cmd.Flags().GetStringArray("exclude")
baseRef, _ := cmd.Flags().GetString("base")
headRef, _ := cmd.Flags().GetString("head")
noDiff, _ := cmd.Flags().GetBool("no-diff")
Expand Down Expand Up @@ -132,6 +133,11 @@ Examples:
return fmt.Errorf("--project flag is required")
}

excludes, err := validateExcludePatterns(rawExcludes)
if err != nil {
return err
}

if outputFormat != "sarif" && outputFormat != "json" && outputFormat != "csv" {
analytics.ReportEventWithProperties(analytics.CIFailed, map[string]any{
"error_type": "validation",
Expand Down Expand Up @@ -211,6 +217,7 @@ Examples:
OnProgress: func() {
logger.UpdateProgress(1)
},
ExcludePatterns: excludes,
})
logger.FinishProgress()
if len(codeGraph.Nodes) == 0 {
Expand Down Expand Up @@ -514,6 +521,7 @@ func init() {
ciCmd.Flags().Bool("debug", false, "Show detailed debug diagnostics with file-level progress and timestamps")
ciCmd.Flags().String("fail-on", "", "Fail with exit code 1 if findings match severities (e.g., critical,high)")
ciCmd.Flags().Bool("skip-tests", true, "Skip test files (test_*.py, *_test.py, conftest.py, etc.)")
ciCmd.Flags().StringArray("exclude", nil, "Exclude files or directories from the scan. Repo-relative path prefix; repeatable. e.g. --exclude rules/ --exclude sast-engine/test-fixtures")
ciCmd.Flags().String("base", "", "Base git ref for diff-aware scanning (auto-detected in CI)")
ciCmd.Flags().String("head", "HEAD", "Head git ref for diff-aware scanning")
ciCmd.Flags().Bool("no-diff", false, "Disable diff-aware scanning (scan all files)")
Expand Down
74 changes: 74 additions & 0 deletions sast-engine/cmd/ci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"path/filepath"
"testing"

"github.com/spf13/cobra"
"github.com/spf13/pflag"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -448,3 +450,75 @@ func TestCICmdEnableDBCacheOpenError(t *testing.T) {
err := ciCmd.RunE(ciCmd, []string{})
require.NoError(t, err)
}

// setCIExcludeFlag replaces (not appends) the StringArray exclude flag on ciCmd.
// pflag's StringArray.Set appends after the first call; Replace resets cleanly.
func setCIExcludeFlag(cmd *cobra.Command, values []string) {
flag := cmd.Flags().Lookup("exclude")
if sv, ok := flag.Value.(pflag.SliceValue); ok {
sv.Replace(values)
flag.Changed = len(values) > 0
}
}

// TestCICmdExcludeFlag verifies --exclude flag registration and validation.
func TestCICmdExcludeFlag(t *testing.T) {
t.Run("flag is registered", func(t *testing.T) {
flag := ciCmd.Flags().Lookup("exclude")
require.NotNil(t, flag, "exclude flag should be registered on ci command")
})

// resetForExclude puts the command in a state where RunE reaches validateExcludePatterns.
resetForExclude := func(t *testing.T) {
t.Helper()
ciCmd.Flags().Set("rules", "/tmp/fake-rules.py")
ciCmd.Flags().Set("project", "/tmp/fake-project")
ciCmd.Flags().Set("output", "sarif")
ciCmd.Flags().Set("output-file", "")
ciCmd.Flags().Set("verbose", "false")
ciCmd.Flags().Set("debug", "false")
ciCmd.Flags().Set("fail-on", "")
ciCmd.Flags().Set("skip-tests", "true")
ciCmd.Flags().Set("no-diff", "true")
ciCmd.Flags().Set("base", "")
ciCmd.Flags().Set("head", "HEAD")
ciCmd.Flags().Set("ruleset", "")
ciCmd.Flags().Set("github-token", "")
ciCmd.Flags().Set("github-repo", "")
ciCmd.Flags().Set("github-pr", "0")
ciCmd.Flags().Set("pr-comment", "false")
ciCmd.Flags().Set("pr-inline", "false")
ciCmd.Flags().Set("enable-db-cache", "false")
setCIExcludeFlag(ciCmd, nil) // clear exclude before each test
}

t.Run("absolute pattern rejected", func(t *testing.T) {
resetForExclude(t)
setCIExcludeFlag(ciCmd, []string{"/etc/passwd"})
defer setCIExcludeFlag(ciCmd, nil)

err := ciCmd.RunE(ciCmd, []string{})
require.Error(t, err)
assert.Contains(t, err.Error(), "no leading slash")
})

t.Run("traversal pattern rejected", func(t *testing.T) {
resetForExclude(t)
setCIExcludeFlag(ciCmd, []string{"../outside"})
defer setCIExcludeFlag(ciCmd, nil)

err := ciCmd.RunE(ciCmd, []string{})
require.Error(t, err)
assert.Contains(t, err.Error(), "..")
})

t.Run("backslash pattern rejected", func(t *testing.T) {
resetForExclude(t)
setCIExcludeFlag(ciCmd, []string{"foo\\bar"})
defer setCIExcludeFlag(ciCmd, nil)

err := ciCmd.RunE(ciCmd, []string{})
require.Error(t, err)
assert.Contains(t, err.Error(), "backslash")
})
}
70 changes: 70 additions & 0 deletions sast-engine/cmd/exclude.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package cmd

import (
"fmt"
"path/filepath"
"strings"
)

// validateExcludePatterns normalizes and validates a list of repo-relative path prefixes.
// It returns the cleaned slice on success, or an error listing the offending pattern.
//
// A pattern is rejected if it:
// - is absolute (starts with "/")
// - contains ".." as a path component
// - contains a null byte
// - contains a backslash (only forward slashes are allowed)
// - exceeds 512 characters
//
// Valid patterns are normalized: leading slashes stripped, trailing slashes
// stripped. Exact duplicates (after normalization) are dropped silently so the
// caller can repeat --exclude flags without bloating the per-file check loop.
func validateExcludePatterns(patterns []string) ([]string, error) {
cleaned := make([]string, 0, len(patterns))
seen := make(map[string]struct{}, len(patterns))
for _, p := range patterns {
if len(p) > 512 {
return nil, fmt.Errorf("--exclude pattern too long (>512 chars): %q", p)
}
if strings.HasPrefix(p, "/") {
return nil, fmt.Errorf("--exclude pattern must be repo-relative (no leading slash): %q", p)
}
if strings.Contains(p, "\\") {
return nil, fmt.Errorf("--exclude pattern must use forward slashes, not backslashes: %q", p)
}
if strings.ContainsRune(p, 0) {
return nil, fmt.Errorf("--exclude pattern contains null byte: %q", p)
}
// Check every path component for ".."
norm := filepath.ToSlash(strings.Trim(p, "/"))
for _, seg := range strings.Split(norm, "/") {
if seg == ".." {
return nil, fmt.Errorf("--exclude pattern must not contain '..': %q", p)
}
}
if _, dup := seen[norm]; dup {
continue
}
seen[norm] = struct{}{}
cleaned = append(cleaned, norm)
}
return cleaned, nil
}

// isExcluded reports whether relPath (forward-slash, repo-relative) is covered by
// any of the given patterns. A file is covered when its path starts with
// "<pattern>/", ensuring "rules" matches "rules/foo.py" but not "rulesx/foo.py".
// An exact match (relPath == pattern) is also considered covered.
func isExcluded(relPath string, patterns []string) bool {
rel := filepath.ToSlash(relPath)
for _, p := range patterns {
if p == "" {
continue
}
// Exact match or prefix match with a separator boundary.
if rel == p || strings.HasPrefix(rel, p+"/") {
return true
}
}
return false
}
162 changes: 162 additions & 0 deletions sast-engine/cmd/exclude_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package cmd

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// --- validateExcludePatterns ---

func TestValidateExcludePatterns_EmptyList(t *testing.T) {
got, err := validateExcludePatterns(nil)
require.NoError(t, err)
assert.Empty(t, got)

got, err = validateExcludePatterns([]string{})
require.NoError(t, err)
assert.Empty(t, got)
}

func TestValidateExcludePatterns_SingleValid(t *testing.T) {
got, err := validateExcludePatterns([]string{"rules/"})
require.NoError(t, err)
// Trailing slash must be stripped.
assert.Equal(t, []string{"rules"}, got)
}

func TestValidateExcludePatterns_MultipleValid(t *testing.T) {
got, err := validateExcludePatterns([]string{"rules/", "sast-engine/test-fixtures"})
require.NoError(t, err)
assert.Equal(t, []string{"rules", "sast-engine/test-fixtures"}, got)
}

func TestValidateExcludePatterns_TrailingSlashStripped(t *testing.T) {
got, err := validateExcludePatterns([]string{"foo/bar/"})
require.NoError(t, err)
assert.Equal(t, []string{"foo/bar"}, got)
}

func TestValidateExcludePatterns_AbsoluteRejected(t *testing.T) {
_, err := validateExcludePatterns([]string{"/etc/passwd"})
require.Error(t, err)
assert.Contains(t, err.Error(), "no leading slash")
}

func TestValidateExcludePatterns_TraversalRejected(t *testing.T) {
cases := []string{
"../secret",
"foo/../bar",
"foo/..",
}
for _, p := range cases {
t.Run(p, func(t *testing.T) {
_, err := validateExcludePatterns([]string{p})
require.Error(t, err, "pattern %q should be rejected", p)
assert.Contains(t, err.Error(), "..")
})
}
}

func TestValidateExcludePatterns_BackslashRejected(t *testing.T) {
_, err := validateExcludePatterns([]string{"foo\\bar"})
require.Error(t, err)
assert.Contains(t, err.Error(), "backslash")
}

func TestValidateExcludePatterns_NullByteRejected(t *testing.T) {
_, err := validateExcludePatterns([]string{"foo\x00bar"})
require.Error(t, err)
assert.Contains(t, err.Error(), "null byte")
}

func TestValidateExcludePatterns_TooLongRejected(t *testing.T) {
long := strings.Repeat("a", 513)
_, err := validateExcludePatterns([]string{long})
require.Error(t, err)
assert.Contains(t, err.Error(), "too long")
}

func TestValidateExcludePatterns_ExactlyMaxLengthAllowed(t *testing.T) {
maxLen := strings.Repeat("a", 512)
got, err := validateExcludePatterns([]string{maxLen})
require.NoError(t, err)
assert.Equal(t, []string{maxLen}, got)
}

func TestValidateExcludePatterns_UnicodeAllowed(t *testing.T) {
got, err := validateExcludePatterns([]string{"src/testi18n/世界"})
require.NoError(t, err)
assert.Equal(t, []string{"src/testi18n/世界"}, got)
}

func TestValidateExcludePatterns_ExactDuplicatesDropped(t *testing.T) {
got, err := validateExcludePatterns([]string{"rules", "vendor", "rules"})
require.NoError(t, err)
assert.Equal(t, []string{"rules", "vendor"}, got)
}

func TestValidateExcludePatterns_DuplicatesAfterNormalization(t *testing.T) {
// "rules/" and "rules" both normalize to "rules"; only one survives.
// "/vendor/" and "vendor" both normalize to "vendor".
got, err := validateExcludePatterns([]string{"rules/", "rules", "vendor", "vendor/"})
require.NoError(t, err)
assert.Equal(t, []string{"rules", "vendor"}, got)
}

func TestValidateExcludePatterns_DedupPreservesFirstOccurrenceOrder(t *testing.T) {
got, err := validateExcludePatterns([]string{"c", "a", "b", "a", "c"})
require.NoError(t, err)
assert.Equal(t, []string{"c", "a", "b"}, got)
}

// --- isExcluded ---

func TestIsExcluded_EmptyPatterns(t *testing.T) {
assert.False(t, isExcluded("rules/foo.py", nil))
assert.False(t, isExcluded("rules/foo.py", []string{}))
}

func TestIsExcluded_SinglePrefixMatch(t *testing.T) {
assert.True(t, isExcluded("rules/foo.py", []string{"rules"}))
}

func TestIsExcluded_SinglePrefixNoMatch(t *testing.T) {
// "rules" must NOT match "rulesx/foo.py" (no separator boundary).
assert.False(t, isExcluded("rulesx/foo.py", []string{"rules"}))
}

func TestIsExcluded_ExactDirMatch(t *testing.T) {
// Exact match of the directory name itself is excluded.
assert.True(t, isExcluded("rules", []string{"rules"}))
}

func TestIsExcluded_NestedPrefix(t *testing.T) {
assert.True(t, isExcluded("sast-engine/test-fixtures/java/Main.java", []string{"sast-engine/test-fixtures"}))
}

func TestIsExcluded_MultiplePatterns(t *testing.T) {
patterns := []string{"rules", "sast-engine/test-fixtures"}
assert.True(t, isExcluded("rules/owasp.py", patterns))
assert.True(t, isExcluded("sast-engine/test-fixtures/x.java", patterns))
assert.False(t, isExcluded("sast-engine/cmd/scan.go", patterns))
}

func TestIsExcluded_EmptyPattern(t *testing.T) {
// An empty string in the pattern list must be a no-op, not a wildcard.
assert.False(t, isExcluded("anything.py", []string{""}))
}

func TestIsExcluded_CaseSensitive(t *testing.T) {
// Patterns are case-sensitive on Linux; verify no lowercasing occurs.
assert.False(t, isExcluded("Rules/foo.py", []string{"rules"}))
assert.True(t, isExcluded("Rules/foo.py", []string{"Rules"}))
}

func TestIsExcluded_WindowsPathNormalized(t *testing.T) {
// Even if relPath uses OS separator, forward-slash comparison must work.
// filepath.ToSlash is applied inside isExcluded.
assert.True(t, isExcluded("rules/foo.py", []string{"rules"}))
}
8 changes: 8 additions & 0 deletions sast-engine/cmd/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Examples:
outputFormat, _ := cmd.Flags().GetString("output")
outputFile, _ := cmd.Flags().GetString("output-file")
skipTests, _ := cmd.Flags().GetBool("skip-tests")
rawExcludes, _ := cmd.Flags().GetStringArray("exclude")
diffAware, _ := cmd.Flags().GetBool("diff-aware")
baseRef, _ := cmd.Flags().GetString("base")
headRef, _ := cmd.Flags().GetString("head")
Expand Down Expand Up @@ -98,6 +99,11 @@ Examples:
return fmt.Errorf("--project flag is required")
}

excludes, err := validateExcludePatterns(rawExcludes)
if err != nil {
return err
}

// Setup logger with appropriate verbosity
verbosity := output.VerbosityDefault
if debug {
Expand Down Expand Up @@ -183,6 +189,7 @@ Examples:
OnProgress: func() {
logger.UpdateProgress(1)
},
ExcludePatterns: excludes,
})
logger.FinishProgress()
if len(codeGraph.Nodes) == 0 {
Expand Down Expand Up @@ -1132,6 +1139,7 @@ func init() {
scanCmd.Flags().Bool("debug", false, "Show detailed debug diagnostics with file-level progress and timestamps")
scanCmd.Flags().String("fail-on", "", "Fail with exit code 1 if findings match severities (e.g., critical,high)")
scanCmd.Flags().Bool("skip-tests", true, "Skip test files (test_*.py, *_test.py, conftest.py, etc.)")
scanCmd.Flags().StringArray("exclude", nil, "Exclude files or directories from the scan. Repo-relative path prefix; repeatable. e.g. --exclude rules/ --exclude sast-engine/test-fixtures")
scanCmd.Flags().Bool("diff-aware", false, "Enable diff-aware scanning (only report findings in changed files)")
scanCmd.Flags().String("base", "", "Base git ref for diff-aware scanning (required with --diff-aware)")
scanCmd.Flags().String("head", "HEAD", "Head git ref for diff-aware scanning")
Expand Down
Loading
Loading