Skip to content

Commit e850ba0

Browse files
shantanu patilshantanu patil
authored andcommitted
Merge branch 'worktree-agent-a07f6a5d'
2 parents 29f6faf + b7dcb52 commit e850ba0

1 file changed

Lines changed: 195 additions & 74 deletions

File tree

api/api.py

Lines changed: 195 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
from fastapi import FastAPI, HTTPException, Query, Request, WebSocket
66
from fastapi.middleware.cors import CORSMiddleware
77
from fastapi.responses import JSONResponse, Response
8+
from starlette.middleware.gzip import GZipMiddleware
89
from typing import List, Optional, Dict, Any, Literal
910
import json
11+
import hashlib
1012
from datetime import datetime
1113
from pydantic import BaseModel, Field
1214
import google.generativeai as genai
@@ -115,6 +117,10 @@ def _custom_rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded
115117
allow_headers=["*"], # Allows all headers
116118
)
117119

120+
# GZip compression for responses >= 1KB (added after CORS so it runs inside
121+
# the CORS middleware — Starlette processes middleware in LIFO order)
122+
app.add_middleware(GZipMiddleware, minimum_size=1000)
123+
118124
# Helper function to get adalflow root path
119125
def get_adalflow_default_root_path():
120126
return os.path.expanduser(os.path.join("~", ".adalflow"))
@@ -263,14 +269,21 @@ def _load_wiki_templates() -> Dict[str, Any]:
263269
async def get_wiki_templates():
264270
"""Return available wiki template configurations."""
265271
try:
266-
return _load_wiki_templates()
272+
data = _load_wiki_templates()
273+
return JSONResponse(
274+
content=data,
275+
headers={"Cache-Control": "public, max-age=3600"}
276+
)
267277
except Exception as e:
268278
logger.error(f"Error loading wiki templates: {e}")
269279
raise HTTPException(status_code=500, detail="Failed to load wiki templates")
270280

271281
@app.get("/lang/config")
272282
async def get_lang_config():
273-
return configs["lang_config"]
283+
return JSONResponse(
284+
content=configs["lang_config"],
285+
headers={"Cache-Control": "public, max-age=3600"}
286+
)
274287

275288
@app.get("/auth/status")
276289
async def get_auth_status():
@@ -327,12 +340,15 @@ async def get_model_config():
327340
providers=providers,
328341
defaultProvider=default_provider
329342
)
330-
return config
343+
return JSONResponse(
344+
content=config.model_dump(),
345+
headers={"Cache-Control": "public, max-age=3600"}
346+
)
331347

332348
except Exception as e:
333349
logger.error(f"Error creating model configuration: {str(e)}")
334350
# Return some default configuration in case of error
335-
return ModelConfig(
351+
fallback = ModelConfig(
336352
providers=[
337353
Provider(
338354
id="google",
@@ -345,6 +361,10 @@ async def get_model_config():
345361
],
346362
defaultProvider="google"
347363
)
364+
return JSONResponse(
365+
content=fallback.model_dump(),
366+
headers={"Cache-Control": "public, max-age=3600"}
367+
)
348368

349369
@app.post("/export/wiki")
350370
async def export_wiki(request: WikiExportRequest):
@@ -411,27 +431,27 @@ async def get_local_repo_structure(path: str = Query(None, description="Path to
411431

412432
try:
413433
logger.info(f"Processing local repository at: {path}")
414-
file_tree_lines = []
415-
readme_content = ""
416-
417-
for root, dirs, files in os.walk(path):
418-
# Exclude hidden dirs/files and virtual envs
419-
dirs[:] = [d for d in dirs if not d.startswith('.') and d != '__pycache__' and d != 'node_modules' and d != '.venv']
420-
for file in files:
421-
if file.startswith('.') or file == '__init__.py' or file == '.DS_Store':
422-
continue
423-
rel_dir = os.path.relpath(root, path)
424-
rel_file = os.path.join(rel_dir, file) if rel_dir != '.' else file
425-
file_tree_lines.append(rel_file)
426-
# Find README.md (case-insensitive)
427-
if file.lower() == 'readme.md' and not readme_content:
428-
try:
429-
with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
430-
readme_content = f.read()
431-
except Exception as e:
432-
logger.warning(f"Could not read README.md: {str(e)}")
433-
readme_content = ""
434434

435+
def _scan_local_repo(repo_path):
436+
file_tree_lines = []
437+
readme_content = ""
438+
for root, dirs, files in os.walk(repo_path):
439+
dirs[:] = [d for d in dirs if not d.startswith('.') and d != '__pycache__' and d != 'node_modules' and d != '.venv']
440+
for file in files:
441+
if file.startswith('.') or file == '__init__.py' or file == '.DS_Store':
442+
continue
443+
rel_dir = os.path.relpath(root, repo_path)
444+
rel_file = os.path.join(rel_dir, file) if rel_dir != '.' else file
445+
file_tree_lines.append(rel_file)
446+
if file.lower() == 'readme.md' and not readme_content:
447+
try:
448+
with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
449+
readme_content = f.read()
450+
except Exception as e:
451+
readme_content = ""
452+
return file_tree_lines, readme_content
453+
454+
file_tree_lines, readme_content = await asyncio.to_thread(_scan_local_repo, path)
435455
file_tree_str = '\n'.join(sorted(file_tree_lines))
436456
return {"file_tree": file_tree_str, "readme": readme_content}
437457
except Exception as e:
@@ -642,19 +662,104 @@ async def parse_wiki_structure_endpoint(request: ParseStructureRequest):
642662
WIKI_CACHE_DIR = os.path.join(get_adalflow_default_root_path(), "wikicache")
643663
os.makedirs(WIKI_CACHE_DIR, exist_ok=True)
644664

665+
# --- Async File I/O Helpers ---
666+
667+
async def _read_json_async(path: str):
668+
"""Read and parse a JSON file without blocking the event loop."""
669+
def _read():
670+
with open(path, 'r', encoding='utf-8') as f:
671+
return json.load(f)
672+
return await asyncio.to_thread(_read)
673+
674+
async def _write_json_async(path: str, data, indent: int = 2):
675+
"""Write data as JSON to a file without blocking the event loop."""
676+
def _write():
677+
with open(path, 'w', encoding='utf-8') as f:
678+
json.dump(data, f, indent=indent, ensure_ascii=False)
679+
await asyncio.to_thread(_write)
680+
681+
async def _read_file_async(path: str) -> str:
682+
"""Read a text file without blocking the event loop."""
683+
def _read():
684+
with open(path, 'r', encoding='utf-8') as f:
685+
return f.read()
686+
return await asyncio.to_thread(_read)
687+
688+
# --- Projects Index Helpers ---
689+
690+
_PROJECTS_INDEX_PATH = os.path.join(WIKI_CACHE_DIR, "_index.json")
691+
692+
async def _update_projects_index(owner: str, repo: str, repo_type: str, language: str, filename: str):
693+
"""Update the lightweight projects index after a wiki cache is saved or deleted."""
694+
try:
695+
index = {}
696+
if os.path.exists(_PROJECTS_INDEX_PATH):
697+
index = await _read_json_async(_PROJECTS_INDEX_PATH)
698+
699+
file_path = os.path.join(WIKI_CACHE_DIR, filename)
700+
if os.path.exists(file_path):
701+
stats = await asyncio.to_thread(os.stat, file_path)
702+
index[filename] = {
703+
"owner": owner,
704+
"repo": repo,
705+
"repo_type": repo_type,
706+
"language": language,
707+
"submittedAt": int(stats.st_mtime * 1000),
708+
}
709+
else:
710+
# File was deleted — remove from index
711+
index.pop(filename, None)
712+
713+
await _write_json_async(_PROJECTS_INDEX_PATH, index)
714+
except Exception as e:
715+
logger.warning(f"Failed to update projects index: {e}")
716+
717+
async def _rebuild_projects_index() -> dict:
718+
"""Rebuild the projects index from a full directory scan (fallback)."""
719+
index = {}
720+
if not os.path.exists(WIKI_CACHE_DIR):
721+
return index
722+
filenames = await asyncio.to_thread(os.listdir, WIKI_CACHE_DIR)
723+
for filename in filenames:
724+
if filename.startswith("deepwiki_cache_") and filename.endswith(".json"):
725+
file_path = os.path.join(WIKI_CACHE_DIR, filename)
726+
try:
727+
stats = await asyncio.to_thread(os.stat, file_path)
728+
parts = filename.replace("deepwiki_cache_", "").replace(".json", "").split('_')
729+
if len(parts) >= 4:
730+
repo_type = parts[0]
731+
owner = parts[1]
732+
language = parts[-1]
733+
repo = "_".join(parts[2:-1])
734+
index[filename] = {
735+
"owner": owner,
736+
"repo": repo,
737+
"repo_type": repo_type,
738+
"language": language,
739+
"submittedAt": int(stats.st_mtime * 1000),
740+
}
741+
except Exception as e:
742+
logger.error(f"Error processing file {file_path} during index rebuild: {e}")
743+
continue
744+
# Persist the rebuilt index
745+
try:
746+
await _write_json_async(_PROJECTS_INDEX_PATH, index)
747+
except Exception:
748+
pass
749+
return index
750+
645751
def get_wiki_cache_path(owner: str, repo: str, repo_type: str, language: str) -> str:
646752
"""Generates the file path for a given wiki cache."""
647753
filename = f"deepwiki_cache_{repo_type}_{owner}_{repo}_{language}.json"
648754
return os.path.join(WIKI_CACHE_DIR, filename)
649755

650756
async def read_wiki_cache(owner: str, repo: str, repo_type: str, language: str) -> Optional[WikiCacheData]:
651-
"""Reads wiki cache data from the file system."""
757+
"""Reads wiki cache data from the file system (non-blocking)."""
652758
cache_path = get_wiki_cache_path(owner, repo, repo_type, language)
653759
if os.path.exists(cache_path):
654760
try:
655-
with open(cache_path, 'r', encoding='utf-8') as f:
656-
data = json.load(f)
657-
return WikiCacheData(**data)
761+
data = await _read_json_async(cache_path)
762+
return WikiCacheData(**data)
658763
except Exception as e:
659764
logger.error(f"Error reading wiki cache from {cache_path}: {e}")
660765
return None
@@ -693,9 +798,15 @@ async def save_wiki_cache(data: WikiCacheRequest) -> bool:
693798

694799

695800
logger.info(f"Writing cache file to: {cache_path}")
696-
with open(cache_path, 'w', encoding='utf-8') as f:
697-
json.dump(payload.model_dump(), f, indent=2)
801+
await _write_json_async(cache_path, payload.model_dump())
698802
logger.info(f"Wiki cache successfully saved to {cache_path}")
803+
804+
# Update the lightweight projects index
805+
filename = os.path.basename(cache_path)
806+
await _update_projects_index(
807+
data.repo.owner, data.repo.repo, data.repo.type, data.language, filename
808+
)
809+
699810
return True
700811
except IOError as e:
701812
logger.error(f"IOError saving wiki cache to {cache_path}: {e.strerror} (errno: {e.errno})", exc_info=True)
@@ -708,13 +819,15 @@ async def save_wiki_cache(data: WikiCacheRequest) -> bool:
708819

709820
@app.get("/api/wiki_cache", response_model=Optional[WikiCacheData])
710821
async def get_cached_wiki(
822+
request: Request,
711823
owner: str = Query(..., description="Repository owner"),
712824
repo: str = Query(..., description="Repository name"),
713825
repo_type: str = Query(..., description="Repository type (e.g., github, gitlab)"),
714826
language: str = Query(..., description="Language of the wiki content")
715827
):
716828
"""
717829
Retrieves cached wiki data (structure and generated pages) for a repository.
830+
Supports ETag/If-None-Match for conditional caching.
718831
"""
719832
# Language validation
720833
supported_langs = configs["lang_config"]["supported_languages"]
@@ -724,10 +837,25 @@ async def get_cached_wiki(
724837
logger.info(f"Attempting to retrieve wiki cache for {owner}/{repo} ({repo_type}), lang: {language}")
725838
cached_data = await read_wiki_cache(owner, repo, repo_type, language)
726839
if cached_data:
727-
return cached_data
840+
# Compute ETag from serialized content
841+
data_json = json.dumps(cached_data.model_dump(), sort_keys=True, default=str)
842+
content_hash = hashlib.md5(data_json.encode()).hexdigest()
843+
etag = f'"{content_hash}"'
844+
845+
# Check If-None-Match header for conditional requests
846+
if_none_match = request.headers.get("if-none-match")
847+
if if_none_match and if_none_match == etag:
848+
return Response(status_code=304, headers={"ETag": etag})
849+
850+
return JSONResponse(
851+
content=cached_data.model_dump(),
852+
headers={
853+
"ETag": etag,
854+
"Cache-Control": "public, max-age=300",
855+
}
856+
)
728857
else:
729858
# Return 200 with null body if not found, as frontend expects this behavior
730-
# Or, raise HTTPException(status_code=404, detail="Wiki cache not found") if preferred
731859
logger.info(f"Wiki cache not found for {owner}/{repo} ({repo_type}), lang: {language}")
732860
return None
733861

@@ -775,8 +903,13 @@ async def delete_wiki_cache(
775903

776904
if os.path.exists(cache_path):
777905
try:
778-
os.remove(cache_path)
906+
await asyncio.to_thread(os.remove, cache_path)
779907
logger.info(f"Successfully deleted wiki cache: {cache_path}")
908+
909+
# Update the projects index to remove this entry
910+
filename = os.path.basename(cache_path)
911+
await _update_projects_index(owner, repo, repo_type, language, filename)
912+
780913
return {"message": f"Wiki cache for {owner}/{repo} ({language}) deleted successfully"}
781914
except Exception as e:
782915
logger.error(f"Error deleting wiki cache {cache_path}: {e}")
@@ -1029,8 +1162,7 @@ def generate_file_url(file_path: str) -> str:
10291162
try:
10301163
cache_dict = cached.model_dump()
10311164
cache_dict["generated_at"] = datetime.now().isoformat()
1032-
with open(cache_path, "w", encoding="utf-8") as f:
1033-
json.dump(cache_dict, f, indent=2)
1165+
await _write_json_async(cache_path, cache_dict)
10341166
logger.info(f"Cache updated after page regeneration: {cache_path}")
10351167
except Exception as e:
10361168
logger.error(f"Failed to update cache after regeneration: {e}")
@@ -1082,51 +1214,40 @@ async def root():
10821214
async def get_processed_projects():
10831215
"""
10841216
Lists all processed projects found in the wiki cache directory.
1085-
Projects are identified by files named like: deepwiki_cache_{repo_type}_{owner}_{repo}_{language}.json
1217+
Uses a lightweight _index.json file for fast lookups. Falls back to
1218+
a full directory scan if the index is missing or stale, then rebuilds it.
10861219
"""
1087-
project_entries: List[ProcessedProjectEntry] = []
1088-
# WIKI_CACHE_DIR is already defined globally in the file
1089-
10901220
try:
10911221
if not os.path.exists(WIKI_CACHE_DIR):
10921222
logger.info(f"Cache directory {WIKI_CACHE_DIR} not found. Returning empty list.")
10931223
return []
10941224

1095-
logger.info(f"Scanning for project cache files in: {WIKI_CACHE_DIR}")
1096-
filenames = await asyncio.to_thread(os.listdir, WIKI_CACHE_DIR) # Use asyncio.to_thread for os.listdir
1097-
1098-
for filename in filenames:
1099-
if filename.startswith("deepwiki_cache_") and filename.endswith(".json"):
1100-
file_path = os.path.join(WIKI_CACHE_DIR, filename)
1101-
try:
1102-
stats = await asyncio.to_thread(os.stat, file_path) # Use asyncio.to_thread for os.stat
1103-
parts = filename.replace("deepwiki_cache_", "").replace(".json", "").split('_')
1104-
1105-
# Expecting repo_type_owner_repo_language
1106-
# Example: deepwiki_cache_github_AsyncFuncAI_deepwiki-open_en.json
1107-
# parts = [github, AsyncFuncAI, deepwiki-open, en]
1108-
if len(parts) >= 4:
1109-
repo_type = parts[0]
1110-
owner = parts[1]
1111-
language = parts[-1] # language is the last part
1112-
repo = "_".join(parts[2:-1]) # repo can contain underscores
1113-
1114-
project_entries.append(
1115-
ProcessedProjectEntry(
1116-
id=filename,
1117-
owner=owner,
1118-
repo=repo,
1119-
name=f"{owner}/{repo}",
1120-
repo_type=repo_type,
1121-
submittedAt=int(stats.st_mtime * 1000), # Convert to milliseconds
1122-
language=language
1123-
)
1124-
)
1125-
else:
1126-
logger.warning(f"Could not parse project details from filename: {filename}")
1127-
except Exception as e:
1128-
logger.error(f"Error processing file {file_path}: {e}")
1129-
continue # Skip this file on error
1225+
# Try reading the pre-built index first
1226+
index: Optional[dict] = None
1227+
if os.path.exists(_PROJECTS_INDEX_PATH):
1228+
try:
1229+
index = await _read_json_async(_PROJECTS_INDEX_PATH)
1230+
except Exception as e:
1231+
logger.warning(f"Could not read projects index, will rebuild: {e}")
1232+
1233+
# Fallback: rebuild from directory scan
1234+
if index is None:
1235+
logger.info("Projects index not found or unreadable — rebuilding from directory scan.")
1236+
index = await _rebuild_projects_index()
1237+
1238+
project_entries: List[ProcessedProjectEntry] = []
1239+
for filename, meta in index.items():
1240+
project_entries.append(
1241+
ProcessedProjectEntry(
1242+
id=filename,
1243+
owner=meta["owner"],
1244+
repo=meta["repo"],
1245+
name=f"{meta['owner']}/{meta['repo']}",
1246+
repo_type=meta["repo_type"],
1247+
submittedAt=meta["submittedAt"],
1248+
language=meta["language"],
1249+
)
1250+
)
11301251

11311252
# Sort by most recent first
11321253
project_entries.sort(key=lambda p: p.submittedAt, reverse=True)

0 commit comments

Comments
 (0)