55from fastapi import FastAPI , HTTPException , Query , Request , WebSocket
66from fastapi .middleware .cors import CORSMiddleware
77from fastapi .responses import JSONResponse , Response
8+ from starlette .middleware .gzip import GZipMiddleware
89from typing import List , Optional , Dict , Any , Literal
910import json
11+ import hashlib
1012from datetime import datetime
1113from pydantic import BaseModel , Field
1214import google .generativeai as genai
@@ -115,6 +117,10 @@ def _custom_rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded
115117 allow_headers = ["*" ], # Allows all headers
116118)
117119
120+ # GZip compression for responses >= 1KB (added after CORS so it runs inside
121+ # the CORS middleware — Starlette processes middleware in LIFO order)
122+ app .add_middleware (GZipMiddleware , minimum_size = 1000 )
123+
118124# Helper function to get adalflow root path
119125def get_adalflow_default_root_path ():
120126 return os .path .expanduser (os .path .join ("~" , ".adalflow" ))
@@ -263,14 +269,21 @@ def _load_wiki_templates() -> Dict[str, Any]:
263269async def get_wiki_templates ():
264270 """Return available wiki template configurations."""
265271 try :
266- return _load_wiki_templates ()
272+ data = _load_wiki_templates ()
273+ return JSONResponse (
274+ content = data ,
275+ headers = {"Cache-Control" : "public, max-age=3600" }
276+ )
267277 except Exception as e :
268278 logger .error (f"Error loading wiki templates: { e } " )
269279 raise HTTPException (status_code = 500 , detail = "Failed to load wiki templates" )
270280
271281@app .get ("/lang/config" )
272282async def get_lang_config ():
273- return configs ["lang_config" ]
283+ return JSONResponse (
284+ content = configs ["lang_config" ],
285+ headers = {"Cache-Control" : "public, max-age=3600" }
286+ )
274287
275288@app .get ("/auth/status" )
276289async def get_auth_status ():
@@ -327,12 +340,15 @@ async def get_model_config():
327340 providers = providers ,
328341 defaultProvider = default_provider
329342 )
330- return config
343+ return JSONResponse (
344+ content = config .model_dump (),
345+ headers = {"Cache-Control" : "public, max-age=3600" }
346+ )
331347
332348 except Exception as e :
333349 logger .error (f"Error creating model configuration: { str (e )} " )
334350 # Return some default configuration in case of error
335- return ModelConfig (
351+ fallback = ModelConfig (
336352 providers = [
337353 Provider (
338354 id = "google" ,
@@ -345,6 +361,10 @@ async def get_model_config():
345361 ],
346362 defaultProvider = "google"
347363 )
364+ return JSONResponse (
365+ content = fallback .model_dump (),
366+ headers = {"Cache-Control" : "public, max-age=3600" }
367+ )
348368
349369@app .post ("/export/wiki" )
350370async def export_wiki (request : WikiExportRequest ):
@@ -411,27 +431,27 @@ async def get_local_repo_structure(path: str = Query(None, description="Path to
411431
412432 try :
413433 logger .info (f"Processing local repository at: { path } " )
414- file_tree_lines = []
415- readme_content = ""
416-
417- for root , dirs , files in os .walk (path ):
418- # Exclude hidden dirs/files and virtual envs
419- dirs [:] = [d for d in dirs if not d .startswith ('.' ) and d != '__pycache__' and d != 'node_modules' and d != '.venv' ]
420- for file in files :
421- if file .startswith ('.' ) or file == '__init__.py' or file == '.DS_Store' :
422- continue
423- rel_dir = os .path .relpath (root , path )
424- rel_file = os .path .join (rel_dir , file ) if rel_dir != '.' else file
425- file_tree_lines .append (rel_file )
426- # Find README.md (case-insensitive)
427- if file .lower () == 'readme.md' and not readme_content :
428- try :
429- with open (os .path .join (root , file ), 'r' , encoding = 'utf-8' ) as f :
430- readme_content = f .read ()
431- except Exception as e :
432- logger .warning (f"Could not read README.md: { str (e )} " )
433- readme_content = ""
434434
435+ def _scan_local_repo (repo_path ):
436+ file_tree_lines = []
437+ readme_content = ""
438+ for root , dirs , files in os .walk (repo_path ):
439+ dirs [:] = [d for d in dirs if not d .startswith ('.' ) and d != '__pycache__' and d != 'node_modules' and d != '.venv' ]
440+ for file in files :
441+ if file .startswith ('.' ) or file == '__init__.py' or file == '.DS_Store' :
442+ continue
443+ rel_dir = os .path .relpath (root , repo_path )
444+ rel_file = os .path .join (rel_dir , file ) if rel_dir != '.' else file
445+ file_tree_lines .append (rel_file )
446+ if file .lower () == 'readme.md' and not readme_content :
447+ try :
448+ with open (os .path .join (root , file ), 'r' , encoding = 'utf-8' ) as f :
449+ readme_content = f .read ()
450+ except Exception as e :
451+ readme_content = ""
452+ return file_tree_lines , readme_content
453+
454+ file_tree_lines , readme_content = await asyncio .to_thread (_scan_local_repo , path )
435455 file_tree_str = '\n ' .join (sorted (file_tree_lines ))
436456 return {"file_tree" : file_tree_str , "readme" : readme_content }
437457 except Exception as e :
@@ -642,19 +662,104 @@ async def parse_wiki_structure_endpoint(request: ParseStructureRequest):
642662WIKI_CACHE_DIR = os .path .join (get_adalflow_default_root_path (), "wikicache" )
643663os .makedirs (WIKI_CACHE_DIR , exist_ok = True )
644664
665+ # --- Async File I/O Helpers ---
666+
667+ async def _read_json_async (path : str ):
668+ """Read and parse a JSON file without blocking the event loop."""
669+ def _read ():
670+ with open (path , 'r' , encoding = 'utf-8' ) as f :
671+ return json .load (f )
672+ return await asyncio .to_thread (_read )
673+
674+ async def _write_json_async (path : str , data , indent : int = 2 ):
675+ """Write data as JSON to a file without blocking the event loop."""
676+ def _write ():
677+ with open (path , 'w' , encoding = 'utf-8' ) as f :
678+ json .dump (data , f , indent = indent , ensure_ascii = False )
679+ await asyncio .to_thread (_write )
680+
681+ async def _read_file_async (path : str ) -> str :
682+ """Read a text file without blocking the event loop."""
683+ def _read ():
684+ with open (path , 'r' , encoding = 'utf-8' ) as f :
685+ return f .read ()
686+ return await asyncio .to_thread (_read )
687+
688+ # --- Projects Index Helpers ---
689+
690+ _PROJECTS_INDEX_PATH = os .path .join (WIKI_CACHE_DIR , "_index.json" )
691+
692+ async def _update_projects_index (owner : str , repo : str , repo_type : str , language : str , filename : str ):
693+ """Update the lightweight projects index after a wiki cache is saved or deleted."""
694+ try :
695+ index = {}
696+ if os .path .exists (_PROJECTS_INDEX_PATH ):
697+ index = await _read_json_async (_PROJECTS_INDEX_PATH )
698+
699+ file_path = os .path .join (WIKI_CACHE_DIR , filename )
700+ if os .path .exists (file_path ):
701+ stats = await asyncio .to_thread (os .stat , file_path )
702+ index [filename ] = {
703+ "owner" : owner ,
704+ "repo" : repo ,
705+ "repo_type" : repo_type ,
706+ "language" : language ,
707+ "submittedAt" : int (stats .st_mtime * 1000 ),
708+ }
709+ else :
710+ # File was deleted — remove from index
711+ index .pop (filename , None )
712+
713+ await _write_json_async (_PROJECTS_INDEX_PATH , index )
714+ except Exception as e :
715+ logger .warning (f"Failed to update projects index: { e } " )
716+
717+ async def _rebuild_projects_index () -> dict :
718+ """Rebuild the projects index from a full directory scan (fallback)."""
719+ index = {}
720+ if not os .path .exists (WIKI_CACHE_DIR ):
721+ return index
722+ filenames = await asyncio .to_thread (os .listdir , WIKI_CACHE_DIR )
723+ for filename in filenames :
724+ if filename .startswith ("deepwiki_cache_" ) and filename .endswith (".json" ):
725+ file_path = os .path .join (WIKI_CACHE_DIR , filename )
726+ try :
727+ stats = await asyncio .to_thread (os .stat , file_path )
728+ parts = filename .replace ("deepwiki_cache_" , "" ).replace (".json" , "" ).split ('_' )
729+ if len (parts ) >= 4 :
730+ repo_type = parts [0 ]
731+ owner = parts [1 ]
732+ language = parts [- 1 ]
733+ repo = "_" .join (parts [2 :- 1 ])
734+ index [filename ] = {
735+ "owner" : owner ,
736+ "repo" : repo ,
737+ "repo_type" : repo_type ,
738+ "language" : language ,
739+ "submittedAt" : int (stats .st_mtime * 1000 ),
740+ }
741+ except Exception as e :
742+ logger .error (f"Error processing file { file_path } during index rebuild: { e } " )
743+ continue
744+ # Persist the rebuilt index
745+ try :
746+ await _write_json_async (_PROJECTS_INDEX_PATH , index )
747+ except Exception :
748+ pass
749+ return index
750+
645751def get_wiki_cache_path (owner : str , repo : str , repo_type : str , language : str ) -> str :
646752 """Generates the file path for a given wiki cache."""
647753 filename = f"deepwiki_cache_{ repo_type } _{ owner } _{ repo } _{ language } .json"
648754 return os .path .join (WIKI_CACHE_DIR , filename )
649755
650756async def read_wiki_cache (owner : str , repo : str , repo_type : str , language : str ) -> Optional [WikiCacheData ]:
651- """Reads wiki cache data from the file system."""
757+ """Reads wiki cache data from the file system (non-blocking) ."""
652758 cache_path = get_wiki_cache_path (owner , repo , repo_type , language )
653759 if os .path .exists (cache_path ):
654760 try :
655- with open (cache_path , 'r' , encoding = 'utf-8' ) as f :
656- data = json .load (f )
657- return WikiCacheData (** data )
761+ data = await _read_json_async (cache_path )
762+ return WikiCacheData (** data )
658763 except Exception as e :
659764 logger .error (f"Error reading wiki cache from { cache_path } : { e } " )
660765 return None
@@ -693,9 +798,15 @@ async def save_wiki_cache(data: WikiCacheRequest) -> bool:
693798
694799
695800 logger .info (f"Writing cache file to: { cache_path } " )
696- with open (cache_path , 'w' , encoding = 'utf-8' ) as f :
697- json .dump (payload .model_dump (), f , indent = 2 )
801+ await _write_json_async (cache_path , payload .model_dump ())
698802 logger .info (f"Wiki cache successfully saved to { cache_path } " )
803+
804+ # Update the lightweight projects index
805+ filename = os .path .basename (cache_path )
806+ await _update_projects_index (
807+ data .repo .owner , data .repo .repo , data .repo .type , data .language , filename
808+ )
809+
699810 return True
700811 except IOError as e :
701812 logger .error (f"IOError saving wiki cache to { cache_path } : { e .strerror } (errno: { e .errno } )" , exc_info = True )
@@ -708,13 +819,15 @@ async def save_wiki_cache(data: WikiCacheRequest) -> bool:
708819
709820@app .get ("/api/wiki_cache" , response_model = Optional [WikiCacheData ])
710821async def get_cached_wiki (
822+ request : Request ,
711823 owner : str = Query (..., description = "Repository owner" ),
712824 repo : str = Query (..., description = "Repository name" ),
713825 repo_type : str = Query (..., description = "Repository type (e.g., github, gitlab)" ),
714826 language : str = Query (..., description = "Language of the wiki content" )
715827):
716828 """
717829 Retrieves cached wiki data (structure and generated pages) for a repository.
830+ Supports ETag/If-None-Match for conditional caching.
718831 """
719832 # Language validation
720833 supported_langs = configs ["lang_config" ]["supported_languages" ]
@@ -724,10 +837,25 @@ async def get_cached_wiki(
724837 logger .info (f"Attempting to retrieve wiki cache for { owner } /{ repo } ({ repo_type } ), lang: { language } " )
725838 cached_data = await read_wiki_cache (owner , repo , repo_type , language )
726839 if cached_data :
727- return cached_data
840+ # Compute ETag from serialized content
841+ data_json = json .dumps (cached_data .model_dump (), sort_keys = True , default = str )
842+ content_hash = hashlib .md5 (data_json .encode ()).hexdigest ()
843+ etag = f'"{ content_hash } "'
844+
845+ # Check If-None-Match header for conditional requests
846+ if_none_match = request .headers .get ("if-none-match" )
847+ if if_none_match and if_none_match == etag :
848+ return Response (status_code = 304 , headers = {"ETag" : etag })
849+
850+ return JSONResponse (
851+ content = cached_data .model_dump (),
852+ headers = {
853+ "ETag" : etag ,
854+ "Cache-Control" : "public, max-age=300" ,
855+ }
856+ )
728857 else :
729858 # Return 200 with null body if not found, as frontend expects this behavior
730- # Or, raise HTTPException(status_code=404, detail="Wiki cache not found") if preferred
731859 logger .info (f"Wiki cache not found for { owner } /{ repo } ({ repo_type } ), lang: { language } " )
732860 return None
733861
@@ -775,8 +903,13 @@ async def delete_wiki_cache(
775903
776904 if os .path .exists (cache_path ):
777905 try :
778- os .remove ( cache_path )
906+ await asyncio . to_thread ( os .remove , cache_path )
779907 logger .info (f"Successfully deleted wiki cache: { cache_path } " )
908+
909+ # Update the projects index to remove this entry
910+ filename = os .path .basename (cache_path )
911+ await _update_projects_index (owner , repo , repo_type , language , filename )
912+
780913 return {"message" : f"Wiki cache for { owner } /{ repo } ({ language } ) deleted successfully" }
781914 except Exception as e :
782915 logger .error (f"Error deleting wiki cache { cache_path } : { e } " )
@@ -1029,8 +1162,7 @@ def generate_file_url(file_path: str) -> str:
10291162 try :
10301163 cache_dict = cached .model_dump ()
10311164 cache_dict ["generated_at" ] = datetime .now ().isoformat ()
1032- with open (cache_path , "w" , encoding = "utf-8" ) as f :
1033- json .dump (cache_dict , f , indent = 2 )
1165+ await _write_json_async (cache_path , cache_dict )
10341166 logger .info (f"Cache updated after page regeneration: { cache_path } " )
10351167 except Exception as e :
10361168 logger .error (f"Failed to update cache after regeneration: { e } " )
@@ -1082,51 +1214,40 @@ async def root():
10821214async def get_processed_projects ():
10831215 """
10841216 Lists all processed projects found in the wiki cache directory.
1085- Projects are identified by files named like: deepwiki_cache_{repo_type}_{owner}_{repo}_{language}.json
1217+ Uses a lightweight _index.json file for fast lookups. Falls back to
1218+ a full directory scan if the index is missing or stale, then rebuilds it.
10861219 """
1087- project_entries : List [ProcessedProjectEntry ] = []
1088- # WIKI_CACHE_DIR is already defined globally in the file
1089-
10901220 try :
10911221 if not os .path .exists (WIKI_CACHE_DIR ):
10921222 logger .info (f"Cache directory { WIKI_CACHE_DIR } not found. Returning empty list." )
10931223 return []
10941224
1095- logger .info (f"Scanning for project cache files in: { WIKI_CACHE_DIR } " )
1096- filenames = await asyncio .to_thread (os .listdir , WIKI_CACHE_DIR ) # Use asyncio.to_thread for os.listdir
1097-
1098- for filename in filenames :
1099- if filename .startswith ("deepwiki_cache_" ) and filename .endswith (".json" ):
1100- file_path = os .path .join (WIKI_CACHE_DIR , filename )
1101- try :
1102- stats = await asyncio .to_thread (os .stat , file_path ) # Use asyncio.to_thread for os.stat
1103- parts = filename .replace ("deepwiki_cache_" , "" ).replace (".json" , "" ).split ('_' )
1104-
1105- # Expecting repo_type_owner_repo_language
1106- # Example: deepwiki_cache_github_AsyncFuncAI_deepwiki-open_en.json
1107- # parts = [github, AsyncFuncAI, deepwiki-open, en]
1108- if len (parts ) >= 4 :
1109- repo_type = parts [0 ]
1110- owner = parts [1 ]
1111- language = parts [- 1 ] # language is the last part
1112- repo = "_" .join (parts [2 :- 1 ]) # repo can contain underscores
1113-
1114- project_entries .append (
1115- ProcessedProjectEntry (
1116- id = filename ,
1117- owner = owner ,
1118- repo = repo ,
1119- name = f"{ owner } /{ repo } " ,
1120- repo_type = repo_type ,
1121- submittedAt = int (stats .st_mtime * 1000 ), # Convert to milliseconds
1122- language = language
1123- )
1124- )
1125- else :
1126- logger .warning (f"Could not parse project details from filename: { filename } " )
1127- except Exception as e :
1128- logger .error (f"Error processing file { file_path } : { e } " )
1129- continue # Skip this file on error
1225+ # Try reading the pre-built index first
1226+ index : Optional [dict ] = None
1227+ if os .path .exists (_PROJECTS_INDEX_PATH ):
1228+ try :
1229+ index = await _read_json_async (_PROJECTS_INDEX_PATH )
1230+ except Exception as e :
1231+ logger .warning (f"Could not read projects index, will rebuild: { e } " )
1232+
1233+ # Fallback: rebuild from directory scan
1234+ if index is None :
1235+ logger .info ("Projects index not found or unreadable — rebuilding from directory scan." )
1236+ index = await _rebuild_projects_index ()
1237+
1238+ project_entries : List [ProcessedProjectEntry ] = []
1239+ for filename , meta in index .items ():
1240+ project_entries .append (
1241+ ProcessedProjectEntry (
1242+ id = filename ,
1243+ owner = meta ["owner" ],
1244+ repo = meta ["repo" ],
1245+ name = f"{ meta ['owner' ]} /{ meta ['repo' ]} " ,
1246+ repo_type = meta ["repo_type" ],
1247+ submittedAt = meta ["submittedAt" ],
1248+ language = meta ["language" ],
1249+ )
1250+ )
11301251
11311252 # Sort by most recent first
11321253 project_entries .sort (key = lambda p : p .submittedAt , reverse = True )
0 commit comments