REDFOX1899
diff --git a/‎api/data_pipeline.py‎
Lines changed: 20 additions & 7 deletions b/‎api/data_pipeline.py‎
Lines changed: 20 additions & 7 deletions
diff --git a/‎api/google_embedder_client.py‎
Lines changed: 101 additions & 34 deletions b/‎api/google_embedder_client.py‎
Lines changed: 101 additions & 34 deletions
diff --git a/‎api/prompts.py‎
Lines changed: 8 additions & 1 deletion b/‎api/prompts.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎api/rag.py‎
Lines changed: 5 additions & 2 deletions b/‎api/rag.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎api/websocket_wiki.py‎
Lines changed: 7 additions & 2 deletions b/‎api/websocket_wiki.py‎
Lines changed: 7 additions & 2 deletions
@@ -227,9 +227,10 @@ def download_repo(repo_url: str, local_path: str, repo_type: str = None, access_
 # Alias for backward compatibility
 download_github_repo = download_repo
 
-def read_all_documents(path: str, embedder_type: str = None, is_ollama_embedder: bool = None, 
+def read_all_documents(path: str, embedder_type: str = None, is_ollama_embedder: bool = None,
                       excluded_dirs: List[str] = None, excluded_files: List[str] = None,
-                      included_dirs: List[str] = None, included_files: List[str] = None):
+                      included_dirs: List[str] = None, included_files: List[str] = None,
+                      max_files: int = 0):
     """
     Recursively reads all documents in a directory and its subdirectories.
 
@@ -247,6 +248,8 @@ def read_all_documents(path: str, embedder_type: str = None, is_ollama_embedder:
             When provided, only files in these directories will be processed.
         included_files (List[str], optional): List of file patterns to include exclusively.
             When provided, only files matching these patterns will be processed.
+        max_files (int, optional): Maximum number of files to process (0 = unlimited).
+            When set, only the top N files by priority are included after sorting.
 
     Returns:
         list: A list of Document objects with metadata.
@@ -398,6 +401,11 @@ def should_process_file(file_path: str, use_inclusion: bool, included_dirs: List
     candidate_files.sort(key=lambda fp: _file_priority(os.path.relpath(fp, path)))
     logger.info(f"Found {len(candidate_files)} candidate files after filtering (sorted by priority)")
 
+    # Apply max_files limit if set (keep only top N by priority)
+    if max_files > 0 and len(candidate_files) > max_files:
+        logger.info(f"Applying max_files limit: {max_files} (from {len(candidate_files)} candidates)")
+        candidate_files = candidate_files[:max_files]
+
     # Process files with cumulative token budget enforcement
     cumulative_tokens = 0
 
@@ -823,7 +831,8 @@ def __init__(self):
     def prepare_database(self, repo_url_or_path: str, repo_type: str = None, access_token: str = None,
                          embedder_type: str = None, is_ollama_embedder: bool = None,
                          excluded_dirs: List[str] = None, excluded_files: List[str] = None,
-                         included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]:
+                         included_dirs: List[str] = None, included_files: List[str] = None,
+                         max_files: int = 0) -> List[Document]:
         """
         Create a new database from the repository.
 
@@ -839,6 +848,7 @@ def prepare_database(self, repo_url_or_path: str, repo_type: str = None, access_
             excluded_files (List[str], optional): List of file patterns to exclude from processing
             included_dirs (List[str], optional): List of directories to include exclusively
             included_files (List[str], optional): List of file patterns to include exclusively
+            max_files (int, optional): Maximum number of files to process (0 = unlimited)
 
         Returns:
             List[Document]: List of Document objects
@@ -850,7 +860,7 @@ def prepare_database(self, repo_url_or_path: str, repo_type: str = None, access_
         self.reset_database()
         self._create_repo(repo_url_or_path, repo_type, access_token)
         return self.prepare_db_index(embedder_type=embedder_type, excluded_dirs=excluded_dirs, excluded_files=excluded_files,
-                                   included_dirs=included_dirs, included_files=included_files)
+                                   included_dirs=included_dirs, included_files=included_files, max_files=max_files)
 
     def reset_database(self):
         """
@@ -929,9 +939,10 @@ def _create_repo(self, repo_url_or_path: str, repo_type: str = None, access_toke
             logger.error(f"Failed to create repository structure: {e}")
             raise
 
-    def prepare_db_index(self, embedder_type: str = None, is_ollama_embedder: bool = None, 
+    def prepare_db_index(self, embedder_type: str = None, is_ollama_embedder: bool = None,
                         excluded_dirs: List[str] = None, excluded_files: List[str] = None,
-                        included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]:
+                        included_dirs: List[str] = None, included_files: List[str] = None,
+                        max_files: int = 0) -> List[Document]:
         """
         Prepare the indexed database for the repository.
 
@@ -944,6 +955,7 @@ def prepare_db_index(self, embedder_type: str = None, is_ollama_embedder: bool =
             excluded_files (List[str], optional): List of file patterns to exclude from processing
             included_dirs (List[str], optional): List of directories to include exclusively
             included_files (List[str], optional): List of file patterns to include exclusively
+            max_files (int, optional): Maximum number of files to process (0 = unlimited)
 
         Returns:
             List[Document]: List of Document objects
@@ -1003,7 +1015,8 @@ def _embedding_vector_length(doc: Document) -> int:
             excluded_dirs=excluded_dirs,
             excluded_files=excluded_files,
             included_dirs=included_dirs,
-            included_files=included_files
+            included_files=included_files,
+            max_files=max_files,
         )
         self.db = transform_documents_and_save_to_db(
             documents, self.repo_paths["save_db_file"], embedder_type=embedder_type
 
@@ -2,7 +2,8 @@
 
 import os
 import logging
-import backoff
+import random
+import time
 from typing import Dict, Any, Optional, List, Sequence
 
 from adalflow.core.model_client import ModelClient
@@ -54,16 +55,21 @@ def __init__(
         self,
         api_key: Optional[str] = None,
         env_api_key_name: str = "GOOGLE_API_KEY",
+        inter_batch_delay: float = 0.2,
     ):
         """Initialize Google AI Embeddings client.
-        
+
         Args:
             api_key: Google AI API key. If not provided, uses environment variable.
             env_api_key_name: Name of environment variable containing API key.
+            inter_batch_delay: Seconds to sleep after each successful embedding
+                API call to avoid burst-hitting rate limits (default: 0.2s).
+                Set to 0 to disable.
         """
         super().__init__()
         self._api_key = api_key
         self._env_api_key_name = env_api_key_name
+        self._inter_batch_delay = inter_batch_delay
         self._initialize_client()
 
     def _initialize_client(self):
@@ -205,27 +211,57 @@ def convert_inputs_to_api_kwargs(
 
         return final_model_kwargs
 
-    @backoff.on_exception(
-        backoff.expo,
-        (Exception,),  # Google AI may raise various exceptions
-        max_time=5,
-    )
+    # Retry configuration for rate-limit and transient errors
+    _MAX_RETRIES = 5
+    _BASE_DELAY = 1.0       # seconds
+    _MAX_DELAY = 16.0       # seconds (cap for exponential backoff)
+    _JITTER_MAX = 1.0       # max random jitter in seconds
+
+    @staticmethod
+    def _is_retryable(exc: Exception) -> bool:
+        """Return True if the exception indicates a retryable error (429 / 503)."""
+        exc_str = str(exc).lower()
+        # google.api_core.exceptions.ResourceExhausted (429)
+        if "resourceexhausted" in type(exc).__name__.lower():
+            return True
+        # google.api_core.exceptions.ServiceUnavailable (503)
+        if "serviceunavailable" in type(exc).__name__.lower():
+            return True
+        # Catch by HTTP status code mentions in the message
+        if "429" in exc_str or "resource exhausted" in exc_str:
+            return True
+        if "503" in exc_str or "service unavailable" in exc_str:
+            return True
+        # google.generativeai may raise a generic exception wrapping these
+        if hasattr(exc, "code"):
+            code = getattr(exc, "code", None)
+            if code in (429, 503):
+                return True
+        if hasattr(exc, "status_code"):
+            status = getattr(exc, "status_code", None)
+            if status in (429, 503):
+                return True
+        return False
+
     def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
-        """Call Google AI embedding API.
-        
+        """Call Google AI embedding API with retry + exponential backoff.
+
+        Retries on 429 (ResourceExhausted) and 503 (ServiceUnavailable) errors
+        with exponential backoff: 1s, 2s, 4s, 8s, 16s plus random jitter.
+
         Args:
             api_kwargs: API parameters
             model_type: Should be ModelType.EMBEDDER
-            
+
         Returns:
             Google AI embedding response
         """
         if model_type != ModelType.EMBEDDER:
             raise ValueError(f"GoogleEmbedderClient only supports EMBEDDER model type")
-            
+
         # DEBUG LOGGING (Simplified)
         log.info(f"DEBUG: GoogleEmbedderClient.call received api_kwargs keys: {list(api_kwargs.keys())}")
-        
+
         safe_log_kwargs = {k: v for k, v in api_kwargs.items() if k not in {"content", "contents"}}
         if "content" in api_kwargs:
             safe_log_kwargs["content_chars"] = len(str(api_kwargs.get("content", "")))
@@ -236,28 +272,59 @@ def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINE
             except Exception:
                 safe_log_kwargs["contents_count"] = None
         log.info("Google AI Embeddings call kwargs (sanitized): %s", safe_log_kwargs)
-        
-        try:
-            # Use embed_content for single text or batch embedding
-            # CRITICAL FIX: Do not modify api_kwargs in place as it breaks backoff retries!
-            call_kwargs = api_kwargs.copy()
-            
-            if "content" in call_kwargs:
-                # Single embedding
-                response = genai.embed_content(**call_kwargs)
-            elif "contents" in call_kwargs:
-                # Batch embedding - Google AI supports batch natively
-                contents = call_kwargs.pop("contents")
-                # pass as 'content' argument which handles both single and batch in newer SDKs
-                response = genai.embed_content(content=contents, **call_kwargs)
-            else:
-                raise ValueError(f"Either 'content' or 'contents' must be provided. Got kwargs: {list(api_kwargs.keys())}")
-                
-            return response
-            
-        except Exception as e:
-            log.error(f"Error calling Google AI Embeddings API: {e}")
-            raise
+
+        last_exception: Optional[Exception] = None
+
+        for attempt in range(self._MAX_RETRIES + 1):
+            try:
+                # CRITICAL FIX: Do not modify api_kwargs in place as it breaks retries!
+                call_kwargs = api_kwargs.copy()
+
+                if "content" in call_kwargs:
+                    # Single embedding
+                    response = genai.embed_content(**call_kwargs)
+                elif "contents" in call_kwargs:
+                    # Batch embedding - Google AI supports batch natively
+                    contents = call_kwargs.pop("contents")
+                    # pass as 'content' argument which handles both single and batch
+                    response = genai.embed_content(content=contents, **call_kwargs)
+                else:
+                    raise ValueError(
+                        f"Either 'content' or 'contents' must be provided. "
+                        f"Got kwargs: {list(api_kwargs.keys())}"
+                    )
+
+                # Inter-batch cooldown to avoid burst-hitting rate limits
+                if self._inter_batch_delay > 0:
+                    time.sleep(self._inter_batch_delay)
+
+                return response
+
+            except Exception as e:
+                last_exception = e
+
+                if not self._is_retryable(e) or attempt >= self._MAX_RETRIES:
+                    log.error(
+                        "Google AI Embeddings API call failed (attempt %d/%d, non-retryable or max retries): %s",
+                        attempt + 1, self._MAX_RETRIES + 1, e,
+                    )
+                    raise
+
+                # Exponential backoff with jitter
+                delay = min(self._BASE_DELAY * (2 ** attempt), self._MAX_DELAY)
+                jitter = random.uniform(0, self._JITTER_MAX)
+                sleep_time = delay + jitter
+
+                log.warning(
+                    "Google AI Embeddings API returned retryable error (attempt %d/%d): %s. "
+                    "Retrying in %.1fs ...",
+                    attempt + 1, self._MAX_RETRIES + 1, e, sleep_time,
+                )
+                time.sleep(sleep_time)
+
+        # Should not be reached, but just in case
+        if last_exception:
+            raise last_exception
 
     async def acall(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
         """Async call to Google AI embedding API.
 
@@ -248,14 +248,21 @@
 - The JSON must be valid — if you are unsure, omit it rather than produce invalid JSON
 
 Rules for "simplifiedMermaidSource" (the simplified overview diagram):
-- MUST contain a valid Mermaid diagram with MAXIMUM 5-8 nodes
+- MUST contain a valid Mermaid diagram with MAXIMUM 3-5 nodes showing major subsystems
 - Show ONLY the highest-level architectural components (think "executive summary")
 - Use clear, short labels (2-4 words each, e.g., "User Interface", "API Layer", "Database")
 - Use simple relationships WITHOUT detailed edge labels (just arrows, no labels)
 - Collapse related sub-components into a single node (e.g., merge "Auth Service" + "User Service" into "Backend Services")
 - The simplified diagram must be immediately understandable at a glance by a non-technical person
 - If the full diagram already has 8 or fewer nodes, simplifiedMermaidSource can match mermaidSource
 - Do NOT include implementation details, file names, or technical jargon in the simplified version
+
+Diagram complexity guidelines:
+- For repositories with many files (100+), focus diagrams on architectural subsystems and module boundaries rather than individual files
+- Group related components into logical clusters. A good diagram has 5-8 nodes at the top level
+- Each node should represent a subsystem or module, not an individual file
+- Show data flow and dependencies between modules, not class hierarchies within a single module
+- Prefer fewer, clearer nodes over many detailed ones — users can click nodes for details
 </structured_diagram_data>
 """
 
 
@@ -345,7 +345,8 @@ def _validate_and_filter_embeddings(self, documents: List) -> List:
 
     def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_token: str = None,
                       excluded_dirs: List[str] = None, excluded_files: List[str] = None,
-                      included_dirs: List[str] = None, included_files: List[str] = None):
+                      included_dirs: List[str] = None, included_files: List[str] = None,
+                      max_files: int = 0):
         """
         Prepare the retriever for a repository.
         Will load database from local storage if available.
@@ -357,6 +358,7 @@ def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_
             excluded_files: Optional list of file patterns to exclude from processing
             included_dirs: Optional list of directories to include exclusively
             included_files: Optional list of file patterns to include exclusively
+            max_files: Maximum number of files to process (0 = unlimited)
         """
         self.initialize_db_manager()
         self.repo_url_or_path = repo_url_or_path
@@ -368,7 +370,8 @@ def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_
             excluded_dirs=excluded_dirs,
             excluded_files=excluded_files,
             included_dirs=included_dirs,
-            included_files=included_files
+            included_files=included_files,
+            max_files=max_files,
         )
         logger.info(f"Loaded {len(self.transformed_docs)} documents for retrieval")
 
 
@@ -61,6 +61,7 @@ class ChatCompletionRequest(BaseModel):
     excluded_files: Optional[str] = Field(None, description="Comma-separated list of file patterns to exclude from processing")
     included_dirs: Optional[str] = Field(None, description="Comma-separated list of directories to include exclusively")
     included_files: Optional[str] = Field(None, description="Comma-separated list of file patterns to include exclusively")
+    max_files: Optional[int] = Field(0, description="Maximum number of files to process for embedding (0 = unlimited)")
 
 async def generate_with_retry(rag, query, context_docs, provider, model, language="en", max_retries=3):
     """Generate content with retry and context reduction on failure.
@@ -178,8 +179,12 @@ async def handle_websocket_chat(websocket: WebSocket):
                 included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()]
                 logger.info(f"Using custom included files: {included_files}")
 
+            max_files = request.max_files or 0
+            if max_files > 0:
+                logger.info(f"Using max_files limit: {max_files}")
+
             # Check for a cached RAG session (only when no custom file filters)
-            has_custom_filters = any([excluded_dirs, excluded_files, included_dirs, included_files])
+            has_custom_filters = any([excluded_dirs, excluded_files, included_dirs, included_files, max_files > 0])
             from api.config import get_embedder_type
             embedder_type = get_embedder_type()
             session_key = rag_session_manager.get_session_key(request.repo_url, embedder_type) if not has_custom_filters else None
@@ -195,7 +200,7 @@ async def handle_websocket_chat(websocket: WebSocket):
             else:
                 # Create a new RAG instance
                 request_rag = RAG(provider=request.provider, model=request.model)
-                request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files)
+                request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files, max_files=max_files)
                 # Cache the session if no custom filters were used
                 if session_key:
                     rag_session_manager.put(session_key, request_rag)