opea-project
diff --git a/‎sample_solutions/MultiAgentQnA/api/.env.example‎
Lines changed: 8 additions & 6 deletions b/‎sample_solutions/MultiAgentQnA/api/.env.example‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎sample_solutions/MultiAgentQnA/api/config.py‎
Lines changed: 0 additions & 3 deletions b/‎sample_solutions/MultiAgentQnA/api/config.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎sample_solutions/MultiAgentQnA/api/services/api_client.py‎
Lines changed: 1 addition & 1 deletion b/‎sample_solutions/MultiAgentQnA/api/services/api_client.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sample_solutions/PDFToPodcast/.env.example‎
Lines changed: 0 additions & 20 deletions b/‎sample_solutions/PDFToPodcast/.env.example‎
Lines changed: 0 additions & 20 deletions
diff --git a/‎sample_solutions/PDFToPodcast/.gitignore‎
Lines changed: 0 additions & 79 deletions b/‎sample_solutions/PDFToPodcast/.gitignore‎
Lines changed: 0 additions & 79 deletions
diff --git a/‎sample_solutions/PDFToPodcast/Dockerfile‎
Lines changed: 0 additions & 18 deletions b/‎sample_solutions/PDFToPodcast/Dockerfile‎
Lines changed: 0 additions & 18 deletions
@@ -7,12 +7,12 @@
 #   - The API key is the litellm_master_key value from the generated vault.yml file
 #
 # **APISIX Gateway**: Provide your APISIX Gateway URL and authentication token
-#   - For embedding: https://apisix-gateway.example.com/bge-base-en-v1.5
-#   - For inference: https://apisix-gateway.example.com/Llama-3.1-8B-Instruct
-#   - Note: APISIX requires the model name in the URL path
+#   - For APISIX, include the model name in the INFERENCE_API_ENDPOINT path
+#   - Example: https://apisix-gateway.example.com/Llama-3.1-8B-Instruct
+#   - Set EMBEDDING_API_ENDPOINT separately for the embedding model
+#   - Example: https://apisix-gateway.example.com/bge-base-en-v1.5
 #   - To generate the APISIX authentication token, use the [generate-token.sh](https://github.com/opea-project/Enterprise-Inference/blob/main/core/scripts/generate-token.sh) script
 #   - The token is generated using Keycloak client credentials
-#   - Set EMBEDDING_API_ENDPOINT and INFERENCE_MODEL_ENDPOINT when using APISIX
 #
 # INFERENCE_API_TOKEN: Authentication token/API key for the inference service
 INFERENCE_API_ENDPOINT=https://api.example.com
@@ -24,8 +24,10 @@ INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
 
 # APISIX Gateway Endpoints
 # Uncomment and set these when using APISIX Gateway:
-# EMBEDDING_API_ENDPOINT=https://api.example.com/bge-base-en-v1.5
-# INFERENCE_MODEL_ENDPOINT=https://api.example.com/Llama-3.1-8B-Instruct
+# IMPORTANT: Use exact APISIX route paths:
+# Example routes: /bge-base-en-v1.5-vllmcpu/* and /Llama-3.1-8B-Instruct-vllmcpu/*
+# INFERENCE_API_ENDPOINT=https://api.example.com/Llama-3.1-8B-Instruct-vllmcpu
+# EMBEDDING_API_ENDPOINT=https://api.example.com/bge-base-en-v1.5-vllmcpu
 
 # Local URL Endpoint (only needed for non-public domains)
 # If using a local domain like api.example.com mapped to localhost:
 
@@ -12,12 +12,9 @@
 INFERENCE_API_TOKEN = os.getenv("INFERENCE_API_TOKEN")
 
 EMBEDDING_API_ENDPOINT = os.getenv("EMBEDDING_API_ENDPOINT")
-INFERENCE_MODEL_ENDPOINT = os.getenv("INFERENCE_MODEL_ENDPOINT")
 
 if not EMBEDDING_API_ENDPOINT:
     EMBEDDING_API_ENDPOINT = INFERENCE_API_ENDPOINT
-if not INFERENCE_MODEL_ENDPOINT:
-    INFERENCE_MODEL_ENDPOINT = INFERENCE_API_ENDPOINT
 
 EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "bge-base-en-v1.5")
 INFERENCE_MODEL_NAME = os.getenv("INFERENCE_MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
 
@@ -19,7 +19,7 @@ class APIClient:
 
     def __init__(self):
         self.embedding_base_url = config.EMBEDDING_API_ENDPOINT
-        self.inference_base_url = config.INFERENCE_MODEL_ENDPOINT
+        self.inference_base_url = config.INFERENCE_API_ENDPOINT
         self.token = config.INFERENCE_API_TOKEN
         self.http_client = httpx.Client(verify=False)
         logger.info(f"✓ API Client initialized - Embedding: {self.embedding_base_url}, Inference: {self.inference_base_url}")