Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions sast-engine/graph/callgraph/registry/c_stdlib_remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,19 @@ func (r *CStdlibRegistryRemote) fetchHeaderFromHTTP(entry *core.CStdlibHeaderEnt
return &h, nil
}

// headerURL prefers the manifest-embedded URL when present (lets the registry
// publisher point individual files at a different host or a versioned path)
// and otherwise constructs one from the loader's baseURL + entry.File.
// headerURL constructs the URL for a per-header JSON by joining the
// loader's configured baseURL with the manifest's relative path.
//
// We DO NOT use entry.URL even when present. The reason: manifests are
// generated against a default `--base-url` (the production CDN), so the
// embedded URLs always point at the prod CDN. Honoring entry.URL would
// silently bypass `--stdlib-base-url` overrides — every test, local
// server, or staging deploy would still hit the production CDN. The
// loader's baseURL is the single source of truth.
//
// entry.URL stays in the schema for forward compatibility (mirrors,
// CDN inspection) but the loader ignores it.
func (r *CStdlibRegistryRemote) headerURL(entry *core.CStdlibHeaderEntry) string {
if entry.URL != "" {
return entry.URL
}
return joinURL(r.baseURL, r.platform, "c", "v1", entry.File)
}

Expand Down
26 changes: 18 additions & 8 deletions sast-engine/graph/callgraph/registry/clike_http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,19 +229,27 @@ func TestCStdlibRegistry_HTTP_ChecksumUnsupportedFormat(t *testing.T) {
assert.Contains(t, err.Error(), "unsupported checksum format")
}

func TestCStdlibRegistry_HTTP_ManifestEmbeddedURL(t *testing.T) {
// TestCStdlibRegistry_HTTP_ManifestEmbeddedURLIgnored pins the post-bugfix
// contract: even when the manifest's per-entry URL points elsewhere, the
// loader ignores it and constructs the fetch URL from its own baseURL.
//
// The bug this guards: the generator stamps every manifest with the
// production CDN URL by default, so any `--stdlib-base-url` override (or
// local HTTP server) would silently bypass the override if entry.URL won.
func TestCStdlibRegistry_HTTP_ManifestEmbeddedURLIgnored(t *testing.T) {
f := newCFixture()
srv := serveFixture(t, f)

// Override the entry's URL to something at a different path; the loader
// must follow it instead of constructing one from baseURL.
f.manifestC.Headers[0].URL = srv.URL + "/registries/linux/c/v1/stdio_stdlib.json"
// Embed a URL on a host the test fixture doesn't even know about.
// If the loader followed entry.URL, the fetch would fail (or hang).
// The loader must ignore it and use baseURL + entry.File.
f.manifestC.Headers[0].URL = "https://nowhere.test/this/would/not/work.json"

r := NewCStdlibRegistryRemote(srv.URL+"/registries", core.PlatformLinux)
withTempCacheRoot(t, r)
require.NoError(t, r.LoadManifest(noopLogger{}))
h, err := r.GetHeader("stdio.h")
require.NoError(t, err)
require.NoError(t, err, "loader must use its own baseURL, not entry.URL")
require.Contains(t, h.Functions, "printf")
}

Expand Down Expand Up @@ -345,16 +353,18 @@ func TestCppStdlibRegistry_HTTP_ChecksumMismatch(t *testing.T) {
assert.Contains(t, err.Error(), "digest mismatch")
}

func TestCppStdlibRegistry_HTTP_ManifestEmbeddedURL(t *testing.T) {
// TestCppStdlibRegistry_HTTP_ManifestEmbeddedURLIgnored — C++ counterpart
// to the C-loader test. Same contract: entry.URL is ignored.
func TestCppStdlibRegistry_HTTP_ManifestEmbeddedURLIgnored(t *testing.T) {
f := newCFixture()
srv := serveFixture(t, f)
f.manifestCpp.Headers[0].URL = srv.URL + "/registries/linux/cpp/v1/vector_stdlib.json"
f.manifestCpp.Headers[0].URL = "https://nowhere.test/would/not/work.json"

r := NewCppStdlibRegistryRemote(srv.URL+"/registries", core.PlatformLinux)
withTempCacheRootCpp(t, r)
require.NoError(t, r.LoadManifest(noopLogger{}))
_, err := r.GetClass("vector", "std::vector")
require.NoError(t, err)
require.NoError(t, err, "loader must use its own baseURL, not entry.URL")
}

// --- helpers --------------------------------------------------------------
Expand Down
9 changes: 3 additions & 6 deletions sast-engine/graph/callgraph/registry/cpp_stdlib_remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,13 +213,10 @@ func (r *CppStdlibRegistryRemote) fetchHeaderFromHTTP(entry *core.CStdlibHeaderE
return &h, nil
}

// headerURL prefers the manifest-embedded URL when present and falls back
// to <baseURL>/<platform>/cpp/v1/<file> for manifests that predate the
// per-entry URL field.
// headerURL constructs the URL for a per-header JSON by joining the
// loader's configured baseURL with the manifest's relative path. See
// the C loader's headerURL comment for why entry.URL is ignored.
func (r *CppStdlibRegistryRemote) headerURL(entry *core.CStdlibHeaderEntry) string {
if entry.URL != "" {
return entry.URL
}
return joinURL(r.baseURL, r.platform, "cpp", "v1", entry.File)
}

Expand Down
2 changes: 1 addition & 1 deletion sast-engine/tools/internal/clikeextract/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func collapseWhitespace(s string) string {
// so we should never see it here — but the keep rule means even if we do, we
// don't accidentally drop something that's just an oddly-named keyword.
func IsPrivateSymbol(name string) bool {
if len(name) == 0 {
if len(name) < 2 {
return false
}
if !strings.HasPrefix(name, "_") {
Expand Down
115 changes: 95 additions & 20 deletions sast-engine/tools/internal/clikeextract/walker_xplat.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,39 @@ import (
"os"
"path/filepath"
"sort"
"strings"

"github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core"
)

// Windows headers, accessed cross-platform via mingw-w64 on Ubuntu.
//
// `apt install mingw-w64` on ubuntu-latest places the Win32 + MSVCRT headers
// under /usr/x86_64-w64-mingw32/include and the mingw libstdc++ tree at
// /usr/x86_64-w64-mingw32/include/c++/<version>. Using mingw on Linux beats
// running a Windows GitHub Actions runner for cost and simplicity, and gives
// us a faithful Win32 surface for stdlib resolution.
// under /usr/x86_64-w64-mingw32/include. The mingw libstdc++ tree can live
// in two different places depending on the packaging:
//
// - /usr/x86_64-w64-mingw32/include/c++/<version> (mingw upstream)
// - /usr/lib/gcc/x86_64-w64-mingw32/<ver>-<thread>/include/c++ (Ubuntu)
//
// Ubuntu's apt-packaged g++-mingw-w64 splits the C++ headers under the
// gcc tree instead of the mingw sysroot. We probe both layouts so the
// generator works regardless of distro packaging.
//
// All paths exposed as package vars (rather than literals) so tests can
// override them to exercise both the hit and miss branches without depending
// on whether the host actually has mingw installed.
var (
// windowsMingwRoot is the canonical mingw-w64 install root. Subdirectories
// `include` (C) and `include/c++/<version>` (C++) live underneath.
// windowsMingwRoot is the canonical mingw-w64 install root. Subdirectory
// `include` (C) lives underneath; C++ may also live here for upstream
// mingw installs.
windowsMingwRoot = "/usr/x86_64-w64-mingw32"

// ubuntuMingwGccRoot is the directory holding `<ver>-<thread>` subdirs
// (e.g. `13-posix`, `13-win32`) on Debian/Ubuntu. The C++ headers live at
// <root>/<ver>-<thread>/include/c++. We probe this only when the
// upstream layout under windowsMingwRoot doesn't carry C++ headers.
ubuntuMingwGccRoot = "/usr/lib/gcc/x86_64-w64-mingw32"

// darwinSDKRoots is the ordered list of macOS SDK include directories the
// generator probes. Command Line Tools first because that's the lighter
// install used in CI; Xcode.app is a fallback for full developer setups.
Expand Down Expand Up @@ -61,28 +74,89 @@ func windowsCSource() (HeaderSource, error) {
}, nil
}

// windowsCppSource finds the mingw libstdc++ header tree. Returns an error
// when no version directory exists — without one, the C++ surface is
// unrecoverable (the directory name encodes the version).
// windowsCppSource finds the mingw libstdc++ header tree. Probes both the
// upstream mingw layout (headers under the sysroot) and the Debian/Ubuntu
// layout (headers split under /usr/lib/gcc). Returns an error only when
// neither layout yields a versioned directory.
//
// The walk lists the C++ STL tree only. Win32 C headers are exposed via
// windowsCSource; mixing both into one source would conflate languages.
func windowsCppSource() (HeaderSource, error) {
root := filepath.Join(windowsMingwRoot, "include", "c++")
dir, version := findVersionedDir(root)
dir, version, layout := findWindowsMingwCppDir()
if dir == "" {
return HeaderSource{}, fmt.Errorf("windowsCppSource: no mingw libstdc++ headers under %s "+
"(install with: apt install g++-mingw-w64)", root)
"or %s (install with: apt install g++-mingw-w64)",
filepath.Join(windowsMingwRoot, "include", "c++"), ubuntuMingwGccRoot)
}
return HeaderSource{
Platform: core.PlatformWindows,
Language: core.LanguageCpp,
SearchDirs: []string{dir},
HeaderExts: []string{".h", ".hpp", ".hxx", ""},
SystemTag: "mingw-w64-libstdc++-" + version,
SystemTag: "mingw-w64-libstdc++-" + version + "-" + layout,
}, nil
}

// findWindowsMingwCppDir locates the mingw libstdc++ header tree. Returns
// (dir, version, layout-tag) where layout-tag identifies which packaging
// shape matched ("upstream" or "ubuntu") so the SystemTag downstream can
// disambiguate. Returns ("","","") when no layout matches.
//
// Layout precedence:
//
// 1. Upstream mingw: <root>/include/c++/<ver> (version-keyed subdirs)
// 2. Ubuntu split: /usr/lib/gcc/x86_64-w64-mingw32/<ver>-<thread>/include/c++
// prefers the posix threading variant (the libstdc++ shipped under win32
// threading is functionally identical but the posix one is the platform
// default for general apps).
func findWindowsMingwCppDir() (dir, version, layout string) {
upstream := filepath.Join(windowsMingwRoot, "include", "c++")
if d, v := findVersionedDir(upstream); d != "" {
return d, v, "upstream"
}
if d, v := findUbuntuMingwCppDir(); d != "" {
return d, v, "ubuntu"
}
return "", "", ""
}

// findUbuntuMingwCppDir resolves the Debian/Ubuntu mingw libstdc++ tree.
// Scans /usr/lib/gcc/x86_64-w64-mingw32/ for `<ver>-<thread>` subdirs,
// prefers `<ver>-posix`, falls back to `<ver>-win32`. Returns ("","")
// when no candidate exists or the candidate doesn't carry an
// include/c++ subtree.
func findUbuntuMingwCppDir() (dir, version string) {
entries, err := os.ReadDir(ubuntuMingwGccRoot)
if err != nil {
return "", ""
}
var posix, win32 []string
for _, e := range entries {
if !e.IsDir() {
continue
}
name := e.Name()
switch {
case strings.HasSuffix(name, "-posix"):
posix = append(posix, name)
case strings.HasSuffix(name, "-win32"):
win32 = append(win32, name)
}
}
for _, candidates := range [][]string{posix, win32} {
if len(candidates) == 0 {
continue
}
sort.Strings(candidates)
newest := candidates[len(candidates)-1]
cppDir := filepath.Join(ubuntuMingwGccRoot, newest, "include", "c++")
if dirExists(cppDir) {
return cppDir, newest
}
}
return "", ""
}

// darwinCSource probes the canonical macOS SDK include locations and uses
// the first one that exists. Apple ships Command Line Tools and full Xcode
// installs in different subtrees; the loader tries CLT first because that's
Expand Down Expand Up @@ -165,14 +239,15 @@ func firstExistingDir(candidates []string) string {
return ""
}

// detectMingwVersion derives a version string from the libstdc++ directory
// name embedded under the mingw root. Returns "unknown" when the tree has
// not been probed yet (windowsCppSource hits this path before the C source
// builder runs). Lightweight on purpose: parsing `gcc --version` would add
// an exec dependency that complicates testing for marginal accuracy gain.
// detectMingwVersion derives a short version tag for the C HeaderSource's
// SystemTag. Uses the libstdc++ directory under whichever mingw layout the
// host actually has installed, so the tag stays consistent across
// upstream-mingw and Ubuntu packaging. Returns "unknown" when neither
// layout has C++ headers (only the C side is installed). Lightweight on
// purpose: parsing `gcc --version` would add an exec dependency that
// complicates testing for marginal accuracy gain.
func detectMingwVersion() string {
root := filepath.Join(windowsMingwRoot, "include", "c++")
_, v := findVersionedDir(root)
_, v, _ := findWindowsMingwCppDir()
if v == "" {
return "unknown"
}
Expand Down
64 changes: 59 additions & 5 deletions sast-engine/tools/internal/clikeextract/walker_xplat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,28 @@ import (
// independent of order.
func withTempMingwRoot(t *testing.T, root string) {
t.Helper()
orig := windowsMingwRoot
origMingw := windowsMingwRoot
origUbuntu := ubuntuMingwGccRoot
windowsMingwRoot = root
t.Cleanup(func() { windowsMingwRoot = orig })
// Point the Ubuntu probe at a fresh empty dir so tests on a host with
// real mingw installed don't see Ubuntu's libstdc++ tree as a
// "phantom" fallback.
ubuntuMingwGccRoot = filepath.Join(t.TempDir(), "ubuntu-mingw-absent")
t.Cleanup(func() {
windowsMingwRoot = origMingw
ubuntuMingwGccRoot = origUbuntu
})
}

// withTempUbuntuMingwRoot points the Ubuntu mingw probe at root for the
// duration of t. Used by tests that exercise the Ubuntu layout
// specifically — independent of withTempMingwRoot so callers can mix and
// match upstream / Ubuntu probe state.
func withTempUbuntuMingwRoot(t *testing.T, root string) {
t.Helper()
orig := ubuntuMingwGccRoot
ubuntuMingwGccRoot = root
t.Cleanup(func() { ubuntuMingwGccRoot = orig })
}

// withTempDarwinRoots replaces both the C and C++ Darwin probe lists for t.
Expand Down Expand Up @@ -64,7 +83,7 @@ func TestWindowsCSource_VersionUnknownWhenCppTreeMissing(t *testing.T) {
assert.Equal(t, "mingw-w64-unknown", src.SystemTag)
}

func TestWindowsCppSource_Found(t *testing.T) {
func TestWindowsCppSource_Found_UpstreamLayout(t *testing.T) {
root := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(root, "include", "c++", "13"), 0o755))
require.NoError(t, os.MkdirAll(filepath.Join(root, "include", "c++", "12"), 0o755))
Expand All @@ -76,7 +95,42 @@ func TestWindowsCppSource_Found(t *testing.T) {
assert.Equal(t, core.LanguageCpp, src.Language)
assert.Equal(t, []string{filepath.Join(root, "include", "c++", "13")}, src.SearchDirs,
"freshest version directory must win")
assert.Equal(t, "mingw-w64-libstdc++-13", src.SystemTag)
assert.Equal(t, "mingw-w64-libstdc++-13-upstream", src.SystemTag)
}

// TestWindowsCppSource_Found_UbuntuLayout pins the Debian/Ubuntu
// packaging shape where libstdc++ lives under
// /usr/lib/gcc/x86_64-w64-mingw32/<ver>-<thread>/include/c++.
// The posix-threading variant must win when both are present.
func TestWindowsCppSource_Found_UbuntuLayout(t *testing.T) {
// Upstream layout absent.
withTempMingwRoot(t, filepath.Join(t.TempDir(), "no-upstream"))

ubuntu := t.TempDir()
for _, ver := range []string{"13-posix", "13-win32"} {
require.NoError(t, os.MkdirAll(filepath.Join(ubuntu, ver, "include", "c++"), 0o755))
}
withTempUbuntuMingwRoot(t, ubuntu)

src, err := windowsCppSource()
require.NoError(t, err)
assert.Equal(t, []string{filepath.Join(ubuntu, "13-posix", "include", "c++")}, src.SearchDirs,
"posix threading variant must win over win32")
assert.Equal(t, "mingw-w64-libstdc++-13-posix-ubuntu", src.SystemTag)
}

// TestWindowsCppSource_Found_UbuntuLayout_Win32Fallback covers the case
// where only the win32 threading variant is installed.
func TestWindowsCppSource_Found_UbuntuLayout_Win32Fallback(t *testing.T) {
withTempMingwRoot(t, filepath.Join(t.TempDir(), "no-upstream"))

ubuntu := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(ubuntu, "13-win32", "include", "c++"), 0o755))
withTempUbuntuMingwRoot(t, ubuntu)

src, err := windowsCppSource()
require.NoError(t, err)
assert.Equal(t, "mingw-w64-libstdc++-13-win32-ubuntu", src.SystemTag)
}

func TestWindowsCppSource_Missing(t *testing.T) {
Expand Down Expand Up @@ -187,7 +241,7 @@ func TestDiscoverHeaderSources_DarwinAndWindowsDispatched(t *testing.T) {
target, language, wantTag string
}{
{core.PlatformWindows, core.LanguageC, "mingw-w64-13"},
{core.PlatformWindows, core.LanguageCpp, "mingw-w64-libstdc++-13"},
{core.PlatformWindows, core.LanguageCpp, "mingw-w64-libstdc++-13-upstream"},
{core.PlatformDarwin, core.LanguageC, "darwin-MacOSX.sdk"},
{core.PlatformDarwin, core.LanguageCpp, "libc++-darwin-v1"},
} {
Expand Down
Loading