feat: Implement keyword-based routing fallback and enhance error handling in routing service

samadpls · samadpls · commit fb5cd57eeff1 · 2025-08-08T01:10:10.000+05:00
diff --git a/src/services/routing_service.py b/src/services/routing_service.py
@@ -75,8 +75,9 @@ def determine_agent(
             return result.output
 
         except Exception as e:
-            # Fallback to simple keyword-based routing
-            return self._fallback_routing(user_query, csv_loaded)
+            print(f"Routing failed with error: {e}")
+            # Use simple keyword-based routing as fallback
+            return self._keyword_based_routing(user_query, csv_loaded)
 
     def handle_conversation_query(self, user_query: str) -> str:
         """Handle conversational queries."""
@@ -131,7 +132,7 @@ def handle_csv_query(
 
         except Exception as e:
             # If LLM fails, provide a graceful response without showing errors
-            return WORST_CASE_SCENARIO 
+            return WORST_CASE_SCENARIO
 
     def _execute_csv_analysis(
         self, python_code: str, csv_info: Dict[str, Any], explanation: str
@@ -224,15 +225,15 @@ def install_package(package):
                             current_code = fixed_code
                             continue  # Try again with fixed code
 
-                    return WORST_CASE_SCENARIO 
+                    return WORST_CASE_SCENARIO
 
         except Exception as e:
-            return WORST_CASE_SCENARIO 
+            return WORST_CASE_SCENARIO
 
         except Exception as e:
-            return WORST_CASE_SCENARIO 
+            return WORST_CASE_SCENARIO
 
-        return WORST_CASE_SCENARIO 
+        return WORST_CASE_SCENARIO
 
     def _fix_python_code(
         self, original_code: str, error_message: str, csv_info: Dict[str, Any]
@@ -375,6 +376,68 @@ def _format_sql_response(self, sql_response) -> str:
 
         return "\n\n".join(response_parts)
 
+    def _keyword_based_routing(self, user_query: str, csv_loaded: bool) -> RoutingDecision:
+        """Keyword-based routing when LLM routing fails."""
+        query_lower = user_query.lower()
+        
+        # CSV Agent keywords
+        csv_keywords = [
+            "csv", "analyze", "chart", "plot", "graph", "average", "mean", "sum", 
+            "count", "max", "min", "statistics", "data", "visualization", "top", 
+            "bottom", "highest", "lowest", "distribution", "correlation"
+        ]
+        
+        # SQL Agent keywords  
+        sql_keywords = [
+            "select", "insert", "update", "delete", "sql", "query", "table", 
+            "database", "users", "customers", "orders", "products", "where", 
+            "join", "group by", "order by", "from"
+        ]
+        
+        # Conversation Agent keywords
+        conversation_keywords = [
+            "hello", "hi", "hey", "how are you", "what can you do", "help", 
+            "thanks", "thank you", "goodbye", "bye", "good morning", "good evening"
+        ]
+        
+        # Check for CSV analysis (prioritize if CSV is loaded)
+        if csv_loaded and any(keyword in query_lower for keyword in csv_keywords):
+            return RoutingDecision(
+                agent="CSV_AGENT",
+                confidence=0.8,
+                reasoning="Keyword-based routing detected CSV analysis request"
+            )
+        
+        # Check for SQL keywords
+        if any(keyword in query_lower for keyword in sql_keywords):
+            return RoutingDecision(
+                agent="SQL_AGENT", 
+                confidence=0.8,
+                reasoning="Keyword-based routing detected SQL request"
+            )
+        
+        # Check for conversation keywords
+        if any(keyword in query_lower for keyword in conversation_keywords):
+            return RoutingDecision(
+                agent="CONVERSATION_AGENT",
+                confidence=0.9,
+                reasoning="Keyword-based routing detected conversation request"
+            )
+        
+        # Default based on context
+        if csv_loaded:
+            return RoutingDecision(
+                agent="CSV_AGENT",
+                confidence=0.6, 
+                reasoning="CSV loaded, defaulting to CSV analysis"
+            )
+        else:
+            return RoutingDecision(
+                agent="CONVERSATION_AGENT",
+                confidence=0.5,
+                reasoning="No clear intent detected, defaulting to conversation"
+            )
+
     def _fallback_routing(self, user_query: str, csv_loaded: bool) -> RoutingDecision:
         """Fallback routing when LLM routing fails - let LLM decide, not hardcoded keywords."""
         # Default to conversation - let the LLM handle all decisions
@@ -386,4 +449,4 @@ def _fallback_routing(self, user_query: str, csv_loaded: bool) -> RoutingDecisio
 
     def _get_fallback_conversation_response(self, user_query: str) -> str:
         """Get fallback conversation response when LLM fails."""
-        return WORST_CASE_SCENARIO 
+        return WORST_CASE_SCENARIO
diff --git a/test_routing_fix.py b/test_routing_fix.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Test script to verify the routing fix is working."""
+
+import sys
+import os
+import tempfile
+import pandas as pd
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from src.services.routing_service import IntelligentRoutingService
+from src.backend.orchestrator import BackendOrchestrator
+from src.schemas.requests import NewChatRequest
+
+
+def test_routing_only():
+    """Test just the routing mechanism."""
+    print("🧠 Testing Routing Mechanism")
+    print("=" * 40)
+    
+    routing_service = IntelligentRoutingService()
+    
+    test_cases = [
+        ("Hello", False, "CONVERSATION_AGENT"),
+        ("What is the average salary?", True, "CSV_AGENT"),
+        ("Show me all users", False, "SQL_AGENT"),
+        ("Create a chart", True, "CSV_AGENT"),
+        ("SELECT * FROM users", False, "SQL_AGENT"),
+    ]
+    
+    for query, csv_loaded, expected in test_cases:
+        print(f"\nQuery: '{query}' (CSV loaded: {csv_loaded})")
+        decision = routing_service.determine_agent(query, [], csv_loaded=csv_loaded)
+        print(f"Expected: {expected}")
+        print(f"Actual: {decision.agent}")
+        print(f"Confidence: {decision.confidence}")
+        print(f"Reasoning: {decision.reasoning}")
+        
+        status = "✅ PASS" if decision.agent == expected else "❌ FAIL"
+        print(f"Status: {status}")
+
+
+def test_csv_analysis_with_real_data():
+    """Test CSV analysis with actual CSV data."""
+    print("\n📊 Testing CSV Analysis with Real Data")
+    print("=" * 40)
+    
+    # Create a temporary CSV file
+    data = {
+        'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
+        'age': [25, 30, 35, 28, 32],
+        'salary': [50000, 60000, 70000, 55000, 65000],
+        'department': ['IT', 'HR', 'IT', 'Finance', 'HR']
+    }
+    
+    df = pd.DataFrame(data)
+    
+    # Create temporary CSV file
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
+        df.to_csv(f.name, index=False)
+        csv_path = f.name
+    
+    print(f"Created test CSV: {csv_path}")
+    print("CSV Content:")
+    print(df.to_string(index=False))
+    
+    # Create orchestrator and test CSV analysis
+    orchestrator = BackendOrchestrator()
+    
+    # Create session
+    session_info = orchestrator.create_new_session(
+        NewChatRequest(session_name="Test Session")
+    )
+    session_id = session_info.session_id
+    print(f"\nCreated session: {session_id}")
+    
+    # Load CSV data
+    with open(csv_path, 'r') as f:
+        csv_content = f.read()
+    
+    result = orchestrator.load_csv_data(session_id, csv_content)
+    print(f"CSV Load Result: {result['status']}")
+    
+    # Test CSV analysis queries
+    test_queries = [
+        "What is the average salary?",
+        "How many people are in each department?",
+        "Who has the highest salary?",
+    ]
+    
+    for query in test_queries:
+        print(f"\n--- Testing Query: '{query}' ---")
+        try:
+            response = orchestrator.generate_intelligent_response(session_id, query)
+            print(f"Response: {response.content}")
+            print(f"Response Type: {type(response.content)}")
+            
+            # Check if this is raw Python code (the old problem)
+            if "import" in response.content or "pd.read_csv" in response.content:
+                print("❌ ISSUE: Response contains raw Python code!")
+            else:
+                print("✅ SUCCESS: Response is clean human-readable text!")
+                
+        except Exception as e:
+            print(f"❌ ERROR: {str(e)}")
+    
+    # Cleanup
+    os.unlink(csv_path)
+
+
+def main():
+    """Run all tests."""
+    print("🚀 Testing Routing Fix")
+    print("=" * 50)
+    
+    # Test 1: Routing mechanism
+    test_routing_only()
+    
+    # Test 2: CSV analysis with real data
+    test_csv_analysis_with_real_data()
+    
+    print("\n🎉 Testing completed!")
+
+
+if __name__ == "__main__":
+    main()