Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions __tests__/extraction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3841,6 +3841,81 @@ std::string use() {
expect(reached.some((p) => p.endsWith('user.cc')), `${fn.name} should be called from user.cc`).toBe(true);
}
});

it('names a reference-returning free function from its name node (not `& Name(...)`)', async () => {
const src = path.join(tempDir, 'src');
fs.mkdirSync(src, { recursive: true });

// `const std::string& GetRef(a::Ctx&)` wraps the function_declarator in a
// reference_declarator. The generic declarator-text fallback kept the
// leading `&` and the whole signature, indexing it as
// `& GetRef(a::Ctx& c)` — unsearchable, so callers never resolved.
fs.writeFileSync(
path.join(src, 'ref.cc'),
`#include <string>
namespace a { struct Ctx {}; }
const std::string& GetRef(a::Ctx& c) { static std::string s; return s; }
`
);
fs.writeFileSync(
path.join(src, 'use_ref.cc'),
`#include <string>
namespace a { struct Ctx {}; }
void useRef(a::Ctx& c) { (void)GetRef(c); }
`
);

cg = CodeGraph.initSync(tempDir);
await cg.indexAll();
cg.resolveReferences();

const fns = cg.getNodesByKind('function');
const getRef = fns.find((n) => n.name === 'GetRef');
expect(getRef, 'GetRef extracted under its real name (not "& GetRef(...)")').toBeDefined();
expect(fns.some((n) => n.name.includes('&')), 'no function indexed with a stray "&" in its name').toBe(false);

const reached = [...cg.getImpactRadius(getRef!.id, 3).nodes.values()].map((n) => n.filePath ?? '');
expect(reached.some((p) => p.endsWith('use_ref.cc')), 'GetRef should be called from use_ref.cc').toBe(true);
});

it('resolves a fully-qualified call `ns::a::Func(...)` to its definition', async () => {
const src = path.join(tempDir, 'src');
fs.mkdirSync(src, { recursive: true });

// The callee node is stored under its SIMPLE name (`GetInsured`), but the
// call site uses the fully-qualified `ns::insured::GetInsured(...)`. Storing
// the full qualified text as the callee left the name-based resolver unable
// to link the `calls` edge, so `GetInsured` reported no callers.
fs.writeFileSync(
path.join(src, 'insured.cc'),
`#include <string>
namespace mmpayinspolicymgrao { namespace insured {
std::string GetInsured(const std::string& id) { return id; }
} }
`
);
fs.writeFileSync(
path.join(src, 'caller.cc'),
`#include <string>
std::string CallIt() {
return mmpayinspolicymgrao::insured::GetInsured("x");
}
`
);

cg = CodeGraph.initSync(tempDir);
await cg.indexAll();
cg.resolveReferences();

const getInsured = cg.getNodesByKind('function').find((n) => n.name === 'GetInsured');
expect(getInsured, 'GetInsured extracted').toBeDefined();

const reached = [...cg.getImpactRadius(getInsured!.id, 3).nodes.values()].map((n) => n.filePath ?? '');
expect(
reached.some((p) => p.endsWith('caller.cc')),
'qualified call ns::insured::GetInsured should resolve to caller.cc',
).toBe(true);
});
});

describe('Dart mixins and type references', () => {
Expand Down
73 changes: 41 additions & 32 deletions src/extraction/languages/c-cpp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,46 +3,55 @@ import { getChildByField, getNodeText } from '../tree-sitter-helpers';
import type { LanguageExtractor } from '../tree-sitter-types';

/**
* Find the function NAME's `qualified_identifier` (`Foo::bar`) inside a
* declarator, skipping the `parameter_list` — a parameter with a qualified type
* (`const std::string& x`) must NOT be mistaken for the method name. Without the
* skip, a plain free function `std::string TableFileName(const std::string&...)`
* was named `string` (from the parameter type), so calls to it never resolved
* and its file looked like nothing depended on it.
* Walk the declarator chain to the function's NAME node only, unwrapping
* pointer / reference / parenthesized wrappers and never descending into the
* `parameter_list`. This keeps two failure modes out of the extracted name:
* - a free function named after its first namespaced PARAMETER type
* (`X GetThing(a::Ctx& c)` must be `GetThing`, not `Ctx`), and
* - a reference-returning free function whose generic declarator-text
* fallback kept the leading `&` and the whole signature
* (`const std::string& GetRef(a::Ctx& c)` must be `GetRef`, not
* `& GetRef(a::Ctx& c)`).
*/
function findDeclaratorQualifiedId(declarator: SyntaxNode): SyntaxNode | undefined {
const queue: SyntaxNode[] = [declarator];
while (queue.length > 0) {
const current = queue.shift()!;
if (current.type === 'qualified_identifier') return current;
for (let i = 0; i < current.namedChildCount; i++) {
const child = current.namedChild(i);
// Don't descend into parameters or the trailing return type — their types
// (`const std::string&`, `-> std::string`) aren't the function name.
if (child && child.type !== 'parameter_list' && child.type !== 'trailing_return_type') {
queue.push(child);
}
}
function getCppNameDeclarator(node: SyntaxNode): SyntaxNode | undefined {
let current: SyntaxNode | null | undefined = getChildByField(node, 'declarator');
while (
current &&
(current.type === 'function_declarator' ||
current.type === 'pointer_declarator' ||
current.type === 'reference_declarator' ||
current.type === 'parenthesized_declarator')
) {
current = getChildByField(current, 'declarator') ?? current.namedChild(0);
}
return undefined;
return current ?? undefined;
}

function extractCppQualifiedMethodName(node: SyntaxNode, source: string): string | undefined {
const declarator = getChildByField(node, 'declarator');
if (!declarator) return undefined;
const qid = findDeclaratorQualifiedId(declarator);
if (!qid) return undefined;
const parts = getNodeText(qid, source).trim().split('::').filter(Boolean);
return parts[parts.length - 1];
const nameNode = getCppNameDeclarator(node);
if (!nameNode) return undefined;
if (nameNode.type === 'qualified_identifier') {
const parts = getNodeText(nameNode, source).trim().split('::').filter(Boolean);
return parts[parts.length - 1];
}
// Return plain names directly so we don't fall through to the generic
// declarator-text fallback, which doesn't unwrap reference_declarator
// (`T& Foo(...)` -> "& Foo(...)"). operator_name / destructor_name /
// template_function are intentionally left to that fallback, which names
// them correctly.
if (nameNode.type === 'identifier' || nameNode.type === 'field_identifier') {
return getNodeText(nameNode, source).trim();
}
return undefined;
}

function extractCppReceiverType(node: SyntaxNode, source: string): string | undefined {
const declarator = getChildByField(node, 'declarator');
if (!declarator) return undefined;
const qid = findDeclaratorQualifiedId(declarator);
if (!qid) return undefined;
const parts = getNodeText(qid, source).trim().split('::').filter(Boolean);
return parts.length > 1 ? parts.slice(0, -1).join('::') : undefined;
const nameNode = getCppNameDeclarator(node);
if (nameNode && nameNode.type === 'qualified_identifier') {
const parts = getNodeText(nameNode, source).trim().split('::').filter(Boolean);
if (parts.length > 1) return parts.slice(0, -1).join('::');
}
return undefined;
}

/**
Expand Down
9 changes: 9 additions & 0 deletions src/extraction/tree-sitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2615,6 +2615,15 @@ export class TreeSitterExtractor {
} else if (func.type === 'scoped_identifier' || func.type === 'scoped_call_expression') {
// Scoped call: Module::function()
calleeName = getNodeText(func, this.source);
} else if (func.type === 'qualified_identifier') {
// C++ qualified call `ns::a::Func(...)`. The callee is stored under its
// SIMPLE name (the C++ extractor records the last `::` segment as the
// node name), so reference that segment too. Storing the full
// `ns::a::Func` here drops through to the generic text below and the
// name-based resolver never links the `calls` edge ("No callers").
const qtext = getNodeText(func, this.source);
const qparts = qtext.split('::').filter(Boolean);
calleeName = qparts[qparts.length - 1] || qtext;
} else if (this.language === 'csharp' && func.type === 'member_access_expression') {
// C# member call `recv.Method(...)`. When the receiver is itself a call
// — a chained factory `Foo.Create(args).Bar()` — encode `inner().Bar`
Expand Down