Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 27 additions & 13 deletions sast-engine/cmd/ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,32 @@ Examples:
logger.Debug("Skipping test files (use --skip-tests=false to include)")
}

// Open the analysis index (shared by the Python and Go builders), then
// build the call graph. The index persists the Python FQN surface as a
// side effect; opening it is best-effort and never blocks the scan.
var analysisCache *builder.AnalysisCache
if enableDBCache, _ := cmd.Flags().GetBool("enable-db-cache"); enableDBCache {
indexPath, _ := cmd.Flags().GetString("index-path")
rebuildIndex, _ := cmd.Flags().GetBool("rebuild-index")
var cacheErr error
analysisCache, cacheErr = builder.OpenAnalysisCacheWithOptions(builder.CacheOptions{
ProjectRoot: projectPath,
IndexPath: indexPath,
EngineVersion: Version,
ForceRebuild: rebuildIndex,
})
if cacheErr != nil {
logger.Warning("Could not open analysis cache: %v. Running full analysis instead.", cacheErr)
analysisCache = nil
} else {
logger.Debug("Analysis index: %s", analysisCache.DBPath())
defer analysisCache.Close()
}
}

// Build callgraph
logger.StartProgress("Building callgraph", -1)
cg, err := builder.BuildCallGraph(codeGraph, moduleRegistry, projectPath, logger)
cg, err := builder.BuildCallGraphWithCache(codeGraph, moduleRegistry, projectPath, logger, analysisCache)
logger.FinishProgress()
if err != nil {
analytics.ReportEventWithProperties(analytics.CIFailed, map[string]any{
Expand All @@ -294,18 +317,7 @@ Examples:
builder.InitGoStdlibLoader(goRegistry, projectPath, logger)
goTypeEngine := resolution.NewGoTypeInferenceEngine(goRegistry)

enableDBCache, _ := cmd.Flags().GetBool("enable-db-cache")
var analysisCache *builder.AnalysisCache
if enableDBCache {
var cacheErr error
analysisCache, cacheErr = builder.OpenAnalysisCache(projectPath)
if cacheErr != nil {
logger.Warning("Could not open analysis cache: %v — running full analysis", cacheErr)
} else {
defer analysisCache.Close()
}
}

// Reuses the analysis cache opened above for the call-graph build.
goCG, err := builder.BuildGoCallGraph(codeGraph, goRegistry, goTypeEngine, logger, analysisCache)
if err != nil {
logger.Warning("Failed to build Go call graph: %v", err)
Expand Down Expand Up @@ -560,5 +572,7 @@ func init() {
ciCmd.Flags().Bool("pr-comment", false, "Post summary comment on the pull request")
ciCmd.Flags().Bool("pr-inline", false, "Post inline review comments for critical/high findings")
ciCmd.Flags().Bool("enable-db-cache", false, "Enable SQLite-backed incremental analysis cache (experimental)")
ciCmd.Flags().String("index-path", "", "Override the analysis index location (default $HOME/.codepathfinder/<project-hash>.sqlite; also reads CODEPATHFINDER_INDEX_PATH)")
ciCmd.Flags().Bool("rebuild-index", false, "Drop and rebuild the analysis index before scanning")
ciCmd.MarkFlagRequired("project")
}
47 changes: 25 additions & 22 deletions sast-engine/cmd/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,31 @@ Examples:
logger.Debug("Skipping test files (use --skip-tests=false to include)")
}

// Step 3: Build callgraph
// Step 3: Open the analysis index (shared by the Python and Go builders),
// then build the call graph. The index persists the Python FQN surface as
// a side effect; opening it is best-effort and never blocks the scan.
var analysisCache *builder.AnalysisCache
if enableDBCache, _ := cmd.Flags().GetBool("enable-db-cache"); enableDBCache {
indexPath, _ := cmd.Flags().GetString("index-path")
rebuildIndex, _ := cmd.Flags().GetBool("rebuild-index")
var cacheErr error
analysisCache, cacheErr = builder.OpenAnalysisCacheWithOptions(builder.CacheOptions{
ProjectRoot: projectPath,
IndexPath: indexPath,
EngineVersion: Version,
ForceRebuild: rebuildIndex,
})
if cacheErr != nil {
logger.Warning("Could not open analysis cache: %v. Running full analysis instead.", cacheErr)
analysisCache = nil
} else {
logger.Debug("Analysis index: %s", analysisCache.DBPath())
defer analysisCache.Close()
}
}

logger.StartProgress("Building callgraph", -1)
cg, err := builder.BuildCallGraph(codeGraph, moduleRegistry, projectPath, logger)
cg, err := builder.BuildCallGraphWithCache(codeGraph, moduleRegistry, projectPath, logger, analysisCache)
logger.FinishProgress()
if err != nil {
analytics.ReportEventWithProperties(analytics.ScanFailed, map[string]any{
Expand Down Expand Up @@ -278,26 +300,7 @@ Examples:

goTypeEngine := resolution.NewGoTypeInferenceEngine(goRegistry)

enableDBCache, _ := cmd.Flags().GetBool("enable-db-cache")
var analysisCache *builder.AnalysisCache
if enableDBCache {
indexPath, _ := cmd.Flags().GetString("index-path")
rebuildIndex, _ := cmd.Flags().GetBool("rebuild-index")
var cacheErr error
analysisCache, cacheErr = builder.OpenAnalysisCacheWithOptions(builder.CacheOptions{
ProjectRoot: projectPath,
IndexPath: indexPath,
EngineVersion: Version,
ForceRebuild: rebuildIndex,
})
if cacheErr != nil {
logger.Warning("Could not open analysis cache: %v. Running full analysis instead.", cacheErr)
} else {
logger.Debug("Analysis index: %s", analysisCache.DBPath())
defer analysisCache.Close()
}
}

// Reuses the analysis cache opened above for the call-graph build.
goCG, err := builder.BuildGoCallGraph(codeGraph, goRegistry, goTypeEngine, logger, analysisCache)
if err != nil {
logger.Warning("Failed to build Go call graph: %v", err)
Expand Down
3 changes: 3 additions & 0 deletions sast-engine/graph/callgraph/builder/analysis_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const (
pass4Version = "1"
fqnIndexVersion = "1"
callSitesVersion = "1"
indexedFilesVersion = "1"
)

// currentSchemaVersion is the global on-disk schema version. It is written into
Expand Down Expand Up @@ -219,6 +220,7 @@ var tableVersions = []tableVersion{
{"pass4_version", pass4Version, "pass4_results"},
{"fqn_index_version", fqnIndexVersion, "fqn_index"},
{"call_sites_version", callSitesVersion, "call_sites"},
{"indexed_files_version", indexedFilesVersion, "indexed_files"},
}

// reconcileVersions decides how much cached data to discard, assuming the
Expand Down Expand Up @@ -358,6 +360,7 @@ func stampMeta(db *sql.DB, opts CacheOptions) error {
"pass4_version": pass4Version,
"fqn_index_version": fqnIndexVersion,
"call_sites_version": callSitesVersion,
"indexed_files_version": indexedFilesVersion,
}
if opts.EngineVersion != "" {
stamps[metaEngineVersion] = opts.EngineVersion
Expand Down
23 changes: 23 additions & 0 deletions sast-engine/graph/callgraph/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,18 @@ func getOptimalWorkerCount() int {
// edges: {"myapp.views.get_user": ["myapp.utils.sanitize"]}
// reverseEdges: {"myapp.utils.sanitize": ["myapp.views.get_user"]}
// callSites: {"myapp.views.get_user": [CallSite{Target: "sanitize", ...}]}
// BuildCallGraph builds the call graph without persisting an FQN index. It is
// the form callers that do not opt into the analysis cache use (serve, the
// integration wrappers, tests).
func BuildCallGraph(codeGraph *graph.CodeGraph, registry *core.ModuleRegistry, projectRoot string, logger *output.Logger) (*core.CallGraph, error) {
return BuildCallGraphWithCache(codeGraph, registry, projectRoot, logger, nil)
}

// BuildCallGraphWithCache builds the call graph and, when cache is non-nil,
// writes every discovered Python definition into the persisted fqn_index as a
// side effect of the same analysis. The in-memory call graph it returns is
// identical whether or not a cache is supplied.
func BuildCallGraphWithCache(codeGraph *graph.CodeGraph, registry *core.ModuleRegistry, projectRoot string, logger *output.Logger, cache *AnalysisCache) (*core.CallGraph, error) {
callGraph := core.NewCallGraph()

// Initialize import map cache for performance
Expand Down Expand Up @@ -473,6 +484,18 @@ func BuildCallGraph(codeGraph *graph.CodeGraph, registry *core.ModuleRegistry, p
callGraph.ThirdPartyRemote = typeEngine.ThirdPartyRemote
callGraph.StdlibRemote = typeEngine.StdlibRemote

// Persist the Python FQN index as a side effect of the same analysis. This
// is best-effort: a write failure is logged but never fails the scan, since
// the in-memory call graph is already complete and correct.
if cache != nil {
entries, files := collectPythonFqnEntries(callGraph, codeGraph, registry)
if err := cache.ReplacePythonFqnIndex(entries, files); err != nil {
logger.Warning("Failed to persist Python FQN index: %v", err)
} else {
logger.Debug("Persisted %d Python FQN entries across %d files", len(entries), len(files))
}
}

return callGraph, nil
}

Expand Down
56 changes: 56 additions & 0 deletions sast-engine/graph/callgraph/builder/file_mtime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package builder

import (
"context"
"database/sql"
"errors"
"fmt"
"os"
)

// IndexedFile records that a file's definitions were written into fqn_index at a
// known modification time. ModTimeUnix is the file's mtime at index time, used
// to detect staleness on a later query.
type IndexedFile struct {
Path string
ModTimeUnix int64
}

// GetIndexedFileMtime returns the stored index-time mtime for a file. The bool
// is false when the file has never been indexed (no row), which is distinct
// from a SQL error.
func (c *AnalysisCache) GetIndexedFileMtime(path string) (int64, bool, error) {
var mtime int64
err := c.db.QueryRowContext(context.Background(),
`SELECT indexed_at_mtime FROM indexed_files WHERE file_path=?`, path,
).Scan(&mtime)
if errors.Is(err, sql.ErrNoRows) {
return 0, false, nil
}
if err != nil {
return 0, false, fmt.Errorf("analysis cache: read indexed mtime for %s: %w", path, err)
}
return mtime, true, nil
}

// IsStale reports whether a file must be re-indexed before a query trusts its
// fqn_index rows. A file is stale when it has never been indexed or when its
// current mtime is newer than the indexed mtime.
//
// When os.Stat fails (the file was deleted or is unreadable) IsStale returns
// (true, err): the stored rows can no longer be trusted, but the caller decides
// whether to re-parse (it cannot, for a deleted file) or drop the rows.
func (c *AnalysisCache) IsStale(path string) (bool, error) {
info, err := os.Stat(path)
if err != nil {
return true, fmt.Errorf("analysis cache: stat %s: %w", path, err)
}
indexedMtime, exists, err := c.GetIndexedFileMtime(path)
if err != nil {
return true, err
}
if !exists {
return true, nil
}
return info.ModTime().Unix() > indexedMtime, nil
}
83 changes: 83 additions & 0 deletions sast-engine/graph/callgraph/builder/file_mtime_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package builder

import (
"os"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestGetIndexedFileMtime_NeverIndexed(t *testing.T) {
cache := openTempCache(t)
_, ok, err := cache.GetIndexedFileMtime("/never/seen.py")
require.NoError(t, err)
assert.False(t, ok)
}

func TestIsStale_NeverIndexed(t *testing.T) {
cache := openTempCache(t)
dir := t.TempDir()
f := filepath.Join(dir, "fresh.py")
require.NoError(t, os.WriteFile(f, []byte("x = 1\n"), 0o644))

stale, err := cache.IsStale(f)
require.NoError(t, err)
assert.True(t, stale, "a file never indexed is stale")
}

func TestIsStale_FreshThenTouched(t *testing.T) {
cache := openTempCache(t)
dir := t.TempDir()
f := filepath.Join(dir, "m.py")
require.NoError(t, os.WriteFile(f, []byte("x = 1\n"), 0o644))

info, err := os.Stat(f)
require.NoError(t, err)
require.NoError(t, cache.ReplacePythonFqnIndex(nil, []IndexedFile{{Path: f, ModTimeUnix: info.ModTime().Unix()}}))

stale, err := cache.IsStale(f)
require.NoError(t, err)
assert.False(t, stale, "a file indexed at its current mtime is fresh")

// Advance mtime past the stamp.
future := time.Now().Add(2 * time.Second)
require.NoError(t, os.Chtimes(f, future, future))

stale, err = cache.IsStale(f)
require.NoError(t, err)
assert.True(t, stale, "a file touched after indexing is stale")
}

func TestIsStale_DeletedFile(t *testing.T) {
cache := openTempCache(t)
stale, err := cache.IsStale("/proj/deleted.py")
assert.True(t, stale, "a missing file is reported stale")
assert.Error(t, err, "stat failure is surfaced so the caller can drop the rows")
}

func TestGetIndexedFileMtime_ClosedDBErrors(t *testing.T) {
dir := t.TempDir()
cache, err := OpenAnalysisCache(dir)
require.NoError(t, err)
require.NoError(t, cache.Close())

_, _, err = cache.GetIndexedFileMtime("/a.py")
assert.Error(t, err)
}

func TestIsStale_DBErrorPropagates(t *testing.T) {
dir := t.TempDir()
cache, err := OpenAnalysisCache(dir)
require.NoError(t, err)

f := filepath.Join(dir, "live.py")
require.NoError(t, os.WriteFile(f, []byte("x = 1\n"), 0o644))
require.NoError(t, cache.Close()) // close so the mtime lookup errors, but stat still succeeds

stale, err := cache.IsStale(f)
assert.True(t, stale)
assert.Error(t, err, "a DB error during staleness check must surface, defaulting to stale")
}
Loading
Loading