Skip to content

Commit 3f9cb73

Browse files
committed
feat: update internal functions to use path_or_url parameter
1 parent e8bc634 commit 3f9cb73

1 file changed

Lines changed: 29 additions & 23 deletions

File tree

code_extractor/server.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,15 @@
128128
}
129129

130130

131-
def get_language_for_file(file_path: str) -> str:
132-
"""Get the language name for a file."""
133-
ext = Path(file_path).suffix.lower()
131+
def get_language_for_file(path_or_url: str) -> str:
132+
"""Get the language name for a file or URL."""
133+
# For URLs, extract file extension from the path component
134+
if path_or_url.startswith(('http://', 'https://')):
135+
from urllib.parse import urlparse
136+
parsed = urlparse(path_or_url)
137+
ext = Path(parsed.path).suffix.lower()
138+
else:
139+
ext = Path(path_or_url).suffix.lower()
134140
return LANG_MAP.get(ext, 'text')
135141

136142

@@ -143,18 +149,18 @@ def find_function(node) -> dict:
143149
If you're looking for a specific function, this is better than searching.
144150
"""
145151

146-
def get_function(file_path: str, function_name: str, git_revision: Optional[str] = None) -> dict:
152+
def get_function(path_or_url: str, function_name: str, git_revision: Optional[str] = None) -> dict:
147153
"""Extract a specific function from a file."""
148154
try:
149-
lang_name = get_language_for_file(file_path)
155+
lang_name = get_language_for_file(path_or_url)
150156

151157
# Get tree-sitter parser
152158
try:
153159
parser = get_parser(lang_name)
154160
except Exception:
155161
return {"error": f"Language '{lang_name}' not supported"}
156162

157-
source = get_file_content(file_path, git_revision)
163+
source = get_file_content(path_or_url, git_revision)
158164

159165
tree = parser.parse(source)
160166

@@ -227,12 +233,12 @@ def find_function(node):
227233
"end_line": end_line,
228234
"lines": f"{start_line}-{end_line}",
229235
"function": function_name,
230-
"file": file_path,
236+
"file": path_or_url,
231237
"language": lang_name
232238
}
233239

234240
except Exception as e:
235-
return {"error": f"Failed to parse '{file_path}': {str(e)}"}
241+
return {"error": f"Failed to parse '{path_or_url}': {str(e)}"}
236242

237243
return get_function
238244

@@ -246,18 +252,18 @@ def find_class(node) -> dict:
246252
If you're looking for a specific class, this is better than searching.
247253
"""
248254

249-
def get_class(file_path: str, class_name: str, git_revision: Optional[str] = None) -> dict:
255+
def get_class(path_or_url: str, class_name: str, git_revision: Optional[str] = None) -> dict:
250256
"""Extract a specific class from a file."""
251257
try:
252-
lang_name = get_language_for_file(file_path)
258+
lang_name = get_language_for_file(path_or_url)
253259

254260
# Get tree-sitter parser
255261
try:
256262
parser = get_parser(lang_name)
257263
except Exception:
258264
return {"error": f"Language '{lang_name}' not supported"}
259265

260-
source = get_file_content(file_path, git_revision)
266+
source = get_file_content(path_or_url, git_revision)
261267

262268
tree = parser.parse(source)
263269

@@ -323,17 +329,17 @@ def find_class(node):
323329
"end_line": end_line,
324330
"lines": f"{start_line}-{end_line}",
325331
"class": class_name,
326-
"file": file_path,
332+
"file": path_or_url,
327333
"language": lang_name
328334
}
329335

330336
except Exception as e:
331-
return {"error": f"Failed to parse '{file_path}': {str(e)}"}
337+
return {"error": f"Failed to parse '{path_or_url}': {str(e)}"}
332338

333339
return get_class
334340

335341

336-
def get_symbols(file_path: str, git_revision: Optional[str] = None) -> list:
342+
def get_symbols(path_or_url: str, git_revision: Optional[str] = None) -> list:
337343
"""
338344
🚨 **ALWAYS USE THIS FIRST** for code investigation - DO NOT use Read()!
339345
@@ -343,8 +349,8 @@ def get_symbols(file_path: str, git_revision: Optional[str] = None) -> list:
343349
"""
344350

345351
try:
346-
extractor = create_extractor(file_path)
347-
source_code = get_file_content(file_path, git_revision)
352+
extractor = create_extractor(path_or_url)
353+
source_code = get_file_content(path_or_url, git_revision)
348354
symbols = extractor.extract_symbols(source_code)
349355

350356
# Convert to dict format for MCP compatibility
@@ -355,10 +361,10 @@ def get_symbols(file_path: str, git_revision: Optional[str] = None) -> list:
355361
return result
356362

357363
except Exception as e:
358-
return [{"error": f"Failed to parse '{file_path}': {str(e)}"}]
364+
return [{"error": f"Failed to parse '{path_or_url}': {str(e)}"}]
359365

360366

361-
def get_lines(file_path: str, start_line: int, end_line: int, git_revision: Optional[str] = None) -> dict:
367+
def get_lines(path_or_url: str, start_line: int, end_line: int, git_revision: Optional[str] = None) -> dict:
362368
"""
363369
Get specific lines from a file using precise line range control.
364370
@@ -372,7 +378,7 @@ def get_lines(file_path: str, start_line: int, end_line: int, git_revision: Opti
372378
if end_line < start_line:
373379
return {"error": "end_line must be >= start_line"}
374380

375-
source_code = get_file_content(file_path, git_revision)
381+
source_code = get_file_content(path_or_url, git_revision)
376382
lines = source_code.splitlines(keepends=True)
377383

378384
# Convert to 0-based indexing
@@ -386,21 +392,21 @@ def get_lines(file_path: str, start_line: int, end_line: int, git_revision: Opti
386392
"start_line": start_line,
387393
"end_line": min(end_line, len(lines)),
388394
"lines": f"{start_line}-{min(end_line, len(lines))}",
389-
"file": file_path
395+
"file": path_or_url
390396
}
391397

392398
except Exception as e:
393-
return {"error": f"Failed to read '{file_path}': {str(e)}"}
399+
return {"error": f"Failed to read '{path_or_url}': {str(e)}"}
394400

395401

396-
def get_signature(file_path: str, function_name: str, git_revision: Optional[str] = None) -> dict:
402+
def get_signature(path_or_url: str, function_name: str, git_revision: Optional[str] = None) -> dict:
397403
"""
398404
Get just the signature/declaration of a function without full implementation.
399405
400406
Use for function interfaces, parameters, return types. Lighter than get_function.
401407
"""
402408

403-
result = find_function(None)(file_path, function_name, git_revision)
409+
result = find_function(None)(path_or_url, function_name, git_revision)
404410

405411
if "error" in result:
406412
return result

0 commit comments

Comments
 (0)