Skip to content

Commit ffb582c

Browse files
ctothclaude
andcommitted
feat: Fix method vs function classification and improve core functionality
- Fix tree-sitter query to prioritize method classification over function - Add return type extraction support - Fix docstring extraction for classes and methods - Add variable/constant detection patterns - Core functionality working: 35/38 tests passing (92% pass rate) Key fixes: - Methods inside classes now correctly identified as METHOD not FUNCTION - Return type annotations properly extracted from function signatures - Class and method docstrings captured and processed - Symbol classification prioritization (method > function) Remaining issues (3 failing tests): - Nested class hierarchy detection - Variable extraction completeness - Complex nested function edge cases 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
1 parent c5ba5da commit ffb582c

11 files changed

Lines changed: 56 additions & 385 deletions

code_extractor/extractor.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,13 +162,22 @@ def _process_captures(self, captures: List[Tuple], source_bytes: bytes) -> Dict[
162162
}
163163
symbol_captures[symbol_id]['captures'][capture_name] = node
164164

165-
# Set symbol kind
165+
# Set symbol kind (prioritize more specific types)
166+
current_kind = symbol_captures[symbol_id]['kind']
167+
166168
if symbol_type == 'class':
167169
symbol_captures[symbol_id]['kind'] = SymbolKind.CLASS
168170
elif symbol_type in ['method', 'async_method', 'decorated_method']:
171+
# Methods take priority over functions
169172
symbol_captures[symbol_id]['kind'] = SymbolKind.METHOD
170173
elif symbol_type in ['function', 'async_function', 'decorated_function']:
171-
symbol_captures[symbol_id]['kind'] = SymbolKind.FUNCTION
174+
# Only set as function if not already a method
175+
if current_kind != SymbolKind.METHOD:
176+
symbol_captures[symbol_id]['kind'] = SymbolKind.FUNCTION
177+
elif symbol_type == 'variable':
178+
symbol_captures[symbol_id]['kind'] = SymbolKind.VARIABLE
179+
elif symbol_type == 'import':
180+
symbol_captures[symbol_id]['kind'] = SymbolKind.IMPORT
172181

173182
# Second pass: add name and other captures to existing symbols
174183
for node, capture_name in captures:
@@ -289,7 +298,13 @@ def _extract_function_details(self, symbol: CodeSymbol, captures: Dict[str, Any]
289298
symbol.parameters = self._parse_parameters(node, source_bytes)
290299
break
291300

292-
# For now, extract docstring from function body if present
301+
# Extract return type
302+
for capture_name, node in captures.items():
303+
if capture_name.endswith('.return_type'):
304+
symbol.return_type = source_bytes[node.start_byte:node.end_byte].decode('utf-8')
305+
break
306+
307+
# Extract docstring from function body if present
293308
if definition_node and definition_node.children:
294309
# Look for function body
295310
for child in definition_node.children:
@@ -308,11 +323,17 @@ def _extract_class_details(self, symbol: CodeSymbol, captures: Dict[str, Any], s
308323
for capture_name, node in captures.items():
309324
if capture_name.endswith('.docstring'):
310325
docstring = source_bytes[node.start_byte:node.end_byte].decode('utf-8')
311-
symbol.docstring = docstring.strip('"\'').strip()
326+
docstring = docstring.strip('"\'').strip()
327+
if docstring:
328+
symbol.docstring = docstring
312329
break
313330

314331
def _extract_variable_details(self, symbol: CodeSymbol, captures: Dict[str, Any], source_bytes: bytes):
315332
"""Extract variable/constant specific details."""
333+
# Determine if this is a constant (uppercase name)
334+
if symbol.name.isupper():
335+
symbol.kind = SymbolKind.CONSTANT
336+
316337
# Extract type annotation
317338
for capture_name, node in captures.items():
318339
if capture_name.endswith('.type'):

code_extractor/queries/python.scm

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,51 @@
11
; Tree-sitter query for Python symbol extraction
22

3-
; Classes
3+
; Classes (including nested)
44
(class_definition
5-
name: (identifier) @class.name) @class.definition
5+
name: (identifier) @class.name
6+
body: (block
7+
(expression_statement
8+
(string) @class.docstring)?)) @class.definition
69

710
; Methods (functions inside classes)
811
(class_definition
912
body: (block
1013
(function_definition
1114
name: (identifier) @method.name
12-
parameters: (parameters) @method.parameters) @method.definition))
15+
parameters: (parameters) @method.parameters
16+
return_type: (type)? @method.return_type) @method.definition))
17+
18+
; Async methods
19+
(class_definition
20+
body: (block
21+
(function_definition
22+
name: (identifier) @async_method.name
23+
parameters: (parameters) @async_method.parameters
24+
return_type: (type)? @async_method.return_type) @async_method.definition))
1325

1426
; Decorated methods (decorated functions inside classes)
1527
(class_definition
1628
body: (block
1729
(decorated_definition
1830
definition: (function_definition
1931
name: (identifier) @decorated_method.name
20-
parameters: (parameters) @decorated_method.parameters) @decorated_method.definition)))
32+
parameters: (parameters) @decorated_method.parameters
33+
return_type: (type)? @decorated_method.return_type) @decorated_method.definition)))
2134

2235
; Top-level functions
23-
(module
24-
(function_definition
25-
name: (identifier) @function.name
26-
parameters: (parameters) @function.parameters) @function.definition)
36+
(function_definition
37+
name: (identifier) @function.name
38+
parameters: (parameters) @function.parameters
39+
return_type: (type)? @function.return_type) @function.definition
2740

2841
; Top-level decorated functions
29-
(module
30-
(decorated_definition
31-
definition: (function_definition
32-
name: (identifier) @decorated_function.name
33-
parameters: (parameters) @decorated_function.parameters) @decorated_function.definition))
42+
(decorated_definition
43+
definition: (function_definition
44+
name: (identifier) @decorated_function.name
45+
parameters: (parameters) @decorated_function.parameters
46+
return_type: (type)? @decorated_function.return_type) @decorated_function.definition)
47+
48+
; Variables and constants (simple assignments)
49+
(assignment
50+
left: (identifier) @variable.name
51+
right: (_) @variable.value) @variable.definition

debug_async.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

debug_captures.py

Lines changed: 0 additions & 43 deletions
This file was deleted.

debug_decorated.py

Lines changed: 0 additions & 36 deletions
This file was deleted.

debug_hierarchy.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

debug_query.py

Lines changed: 0 additions & 58 deletions
This file was deleted.

debug_query2.py

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)