Addressed PR comments

arpan1221 · arpan1221 · commit e6e5e7dee68f · 2026-02-04T12:27:49.000-06:00
diff --git a/sample_solutions/CodeTranslation/.env.example b/sample_solutions/CodeTranslation/.env.example
@@ -4,7 +4,8 @@ BACKEND_PORT=5001
 # Inference API Configuration
 # INFERENCE_API_ENDPOINT: URL to your inference service (without /v1 suffix)
 #   - For GenAI Gateway: https://genai-gateway.example.com
-#   - For APISIX Gateway: https://apisix-gateway.example.com/inference
+#   - For APISIX Gateway: https://apisix-gateway.example.com/CodeLlama-34b-Instruct-hf
+#     Note: APISIX Gateway requires the model name in the URL path
 #
 # INFERENCE_API_TOKEN: Authentication token/API key for the inference service
 #   - For GenAI Gateway: Your GenAI Gateway API key
@@ -18,13 +19,16 @@ LLM_TEMPERATURE=0.2
 LLM_MAX_TOKENS=4096
 
 # Code Translation Settings
-MAX_CODE_LENGTH=10000
+# MAX_CODE_LENGTH: Maximum input code length in characters
+# Note: For Enterprise Inference with CodeLlama-34b (max tokens: 5196)
+#       Recommended value is 8000-12000 characters (~4000-5000 tokens with prompt overhead)
+MAX_CODE_LENGTH=8000
 MAX_FILE_SIZE=10485760
 
 # CORS Configuration
 CORS_ALLOW_ORIGINS=["http://localhost:5173", "http://localhost:3000"]
 
 # Local URL Endpoint (only needed for non-public domains)
-# If using a local domain like inference.example.com mapped to localhost, set to the domain without https://
+# If using a local domain like api.example.com mapped to localhost, set to the domain without https://
 # Otherwise, set to: not-needed
 LOCAL_URL_ENDPOINT=not-needed
diff --git a/sample_solutions/CodeTranslation/README.md b/sample_solutions/CodeTranslation/README.md
@@ -5,12 +5,22 @@ The system integrates a FastAPI backend, alongside a modern React + Vite + Tailw
 
 ## Table of Contents
 
+- [Code Translation](#code-translation)
+- [Table of Contents](#table-of-contents)
 - [Project Overview](#project-overview)
 - [Features](#features)
 - [Architecture](#architecture)
 - [Prerequisites](#prerequisites)
+  - [System Requirements](#system-requirements)
+  - [Required API Configuration](#required-api-configuration)
+  - [Local Development Configuration](#local-development-configuration)
+  - [Verify Docker Installation](#verify-docker-installation)
 - [Quick Start Deployment](#quick-start-deployment)
+  - [Clone the Repository](#clone-the-repository)
+  - [Set up the Environment](#set-up-the-environment)
+  - [Running the Application](#running-the-application)
 - [User Interface](#user-interface)
+  - [Stopping the Application](#stopping-the-application)
 - [Troubleshooting](#troubleshooting)
 - [Additional Info](#additional-info)
 
@@ -124,21 +134,22 @@ Before you begin, ensure you have the following installed:
 This application supports multiple inference deployment patterns:
 
 - **GenAI Gateway**: Provide your GenAI Gateway URL and API key
+  - To generate the GenAI Gateway API key, use the [generate-vault-secrets.sh](https://github.com/opea-project/Enterprise-Inference/blob/main/core/scripts/generate-vault-secrets.sh) script
+  - The API key is the `litellm_master_key` value from the generated `vault.yml` file
+  
 - **APISIX Gateway**: Provide your APISIX Gateway URL and authentication token
-
-Configuration requirements:
-- INFERENCE_API_ENDPOINT: URL to your inference service (GenAI Gateway, APISIX Gateway, etc.)
-- INFERENCE_API_TOKEN: Authentication token/API key for your chosen service
+  - To generate the APISIX authentication token, use the [generate-token.sh](https://github.com/opea-project/Enterprise-Inference/blob/main/core/scripts/generate-token.sh) script
+  - The token is generated using Keycloak client credentials
 
 ### Local Development Configuration
 
 **For Local Testing Only (Optional)**
 
-If you're testing with a local inference endpoint using a custom domain (e.g., `inference.example.com` mapped to localhost in your hosts file):
+If you're testing with a local inference endpoint using a custom domain (e.g., `api.example.com` mapped to localhost in your hosts file):
 
 1. Edit `.env` and set:
    ```bash
-   LOCAL_URL_ENDPOINT=inference.example.com
+   LOCAL_URL_ENDPOINT=api.example.com
    ```
    (Use the domain name from your INFERENCE_API_ENDPOINT without `https://`)
 
@@ -165,8 +176,8 @@ docker ps
 ### Clone the Repository
 
 ```bash
-git clone https://github.com/opea-project/Enterprise-Inference.git
-cd Enterprise-Inference/sample_solutions/CodeTranslation
+git clone https://github.com/cld2labs/GenAISamples.git
+cd GenAISamples/code-translation
 ```
 
 ### Set up the Environment
@@ -182,7 +193,8 @@ BACKEND_PORT=5001
 # Inference API Configuration
 # INFERENCE_API_ENDPOINT: URL to your inference service (without /v1 suffix)
 #   - For GenAI Gateway: https://genai-gateway.example.com
-#   - For APISIX Gateway: https://apisix-gateway.example.com/inference
+#   - For APISIX Gateway: https://apisix-gateway.example.com/CodeLlama-34b-Instruct-hf
+#     Note: APISIX Gateway requires the model name in the URL path
 #
 # INFERENCE_API_TOKEN: Authentication token/API key for the inference service
 #   - For GenAI Gateway: Your GenAI Gateway API key
@@ -196,14 +208,17 @@ LLM_TEMPERATURE=0.2
 LLM_MAX_TOKENS=4096
 
 # Code Translation Settings
-MAX_CODE_LENGTH=10000
+# MAX_CODE_LENGTH: Maximum input code length in characters
+# Note: For Enterprise Inference with CodeLlama-34b (max tokens: 5196)
+#       Recommended value is 8000-12000 characters (~4000-5000 tokens with prompt overhead)
+MAX_CODE_LENGTH=8000
 MAX_FILE_SIZE=10485760
 
 # CORS Configuration
 CORS_ALLOW_ORIGINS=["http://localhost:5173", "http://localhost:3000"]
 
 # Local URL Endpoint (only needed for non-public domains)
-# If using a local domain like inference.example.com mapped to localhost, set to the domain without https://
+# If using a local domain like api.example.com mapped to localhost, set to the domain without https://
 # Otherwise, set to: not-needed
 LOCAL_URL_ENDPOINT=not-needed
 EOF
@@ -218,7 +233,8 @@ BACKEND_PORT=5001
 # Inference API Configuration
 # INFERENCE_API_ENDPOINT: URL to your inference service (without /v1 suffix)
 #   - For GenAI Gateway: https://genai-gateway.example.com
-#   - For APISIX Gateway: https://apisix-gateway.example.com/inference
+#   - For APISIX Gateway: https://apisix-gateway.example.com/CodeLlama-34b-Instruct-hf
+#     Note: APISIX Gateway requires the model name in the URL path
 #
 # INFERENCE_API_TOKEN: Authentication token/API key for the inference service
 #   - For GenAI Gateway: Your GenAI Gateway API key
@@ -232,14 +248,17 @@ LLM_TEMPERATURE=0.2
 LLM_MAX_TOKENS=4096
 
 # Code Translation Settings
-MAX_CODE_LENGTH=10000
+# MAX_CODE_LENGTH: Maximum input code length in characters
+# Note: For Enterprise Inference with CodeLlama-34b (max tokens: 5196)
+#       Recommended value is 8000-12000 characters (~4000-5000 tokens with prompt overhead)
+MAX_CODE_LENGTH=8000
 MAX_FILE_SIZE=10485760
 
 # CORS Configuration
 CORS_ALLOW_ORIGINS=["http://localhost:5173", "http://localhost:3000"]
 
 # Local URL Endpoint (only needed for non-public domains)
-# If using a local domain like inference.example.com mapped to localhost, set to the domain without https://
+# If using a local domain like api.example.com mapped to localhost, set to the domain without https://
 # Otherwise, set to: not-needed
 LOCAL_URL_ENDPOINT=not-needed
 ```
@@ -255,7 +274,7 @@ LOCAL_URL_ENDPOINT=not-needed
 Start both API and UI services together with Docker Compose:
 
 ```bash
-# From the CodeTranslation directory
+# From the code-translation directory
 docker compose up --build
 
 # Or run in detached mode (background)
@@ -343,7 +362,7 @@ For comprehensive troubleshooting guidance, common issues, and solutions, refer
 
 ## Additional Info
 
-The following models have been validated with CodeTranslation:
+The following models have been validated with code-translation:
 
 | Model | Hardware |
 |-------|----------|
diff --git a/sample_solutions/CodeTranslation/TROUBLESHOOTING.md b/sample_solutions/CodeTranslation/TROUBLESHOOTING.md
@@ -22,14 +22,16 @@ This document contains all common issues encountered during development and thei
    ```
 3. Restart the server
 
-#### "Code too long. Maximum length is 10000 characters"
+#### "Code too long. Maximum length is 8000 characters"
 
 **Solution**:
 
 - The limit exists due to model context window constraints
+  - CodeLlama-34b on Enterprise Inference has a max token limit of 5196
+  - 8000 characters ≈ 4000-5000 tokens including prompt overhead
 - Break your code into smaller modules
 - Translate one class or function at a time
-- Or adjust `MAX_CODE_LENGTH` in `.env` if needed
+- Or adjust `MAX_CODE_LENGTH` in `.env` if your deployment supports higher limits (up to ~12000 characters max)
 
 #### "Source language not supported"
 
@@ -120,7 +122,7 @@ npm run dev
 
 ### Character Counter Not Updating
 
-**Problem**: Character counter shows 0 / 10,000 even with code
+**Problem**: Character counter shows 0 / 8,000 even with code
 
 **Solution**:
 
diff --git a/sample_solutions/CodeTranslation/api/config.py b/sample_solutions/CodeTranslation/api/config.py
@@ -24,9 +24,11 @@
 
 # Code Translation Settings
 SUPPORTED_LANGUAGES = ["java", "c", "cpp", "python", "rust", "go"]
-MAX_CODE_LENGTH = 10000  # characters
-LLM_TEMPERATURE = 0.2  # Lower temperature for more deterministic code generation
-LLM_MAX_TOKENS = 4096
+# MAX_CODE_LENGTH: For Enterprise Inference with CodeLlama-34b (max tokens: 5196)
+# Recommended: 8000-12000 characters (~4000-5000 tokens with prompt overhead)
+MAX_CODE_LENGTH = int(os.getenv("MAX_CODE_LENGTH", "8000"))  # characters
+LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.2"))  # Lower temperature for more deterministic code generation
+LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "4096"))
 
 # CORS Settings
 CORS_ALLOW_ORIGINS = ["*"]  # Update with specific origins in production
diff --git a/sample_solutions/CodeTranslation/docker-compose.yaml b/sample_solutions/CodeTranslation/docker-compose.yaml
@@ -14,7 +14,7 @@ services:
       - INFERENCE_MODEL_NAME=${INFERENCE_MODEL_NAME}
       - LLM_TEMPERATURE=${LLM_TEMPERATURE:-0.2}
       - LLM_MAX_TOKENS=${LLM_MAX_TOKENS:-4096}
-      - MAX_CODE_LENGTH=${MAX_CODE_LENGTH:-10000}
+      - MAX_CODE_LENGTH=${MAX_CODE_LENGTH:-8000}
       - MAX_FILE_SIZE=${MAX_FILE_SIZE:-10485760}
     networks:
       - code-trans-network
diff --git a/sample_solutions/CodeTranslation/ui/src/components/CodeTranslator.jsx b/sample_solutions/CodeTranslation/ui/src/components/CodeTranslator.jsx
@@ -127,21 +127,21 @@ export default function CodeTranslator({
             <label className="block text-sm font-medium text-gray-700">
               Source Code ({LANGUAGE_LABELS[sourceLanguage]})
             </label>
-            <span className={`text-xs ${sourceCode.length > 10000 ? 'text-red-600 font-semibold' : 'text-gray-500'}`}>
-              {sourceCode.length.toLocaleString()} / 10,000 characters
+            <span className={`text-xs ${sourceCode.length > 8000 ? 'text-red-600 font-semibold' : 'text-gray-500'}`}>
+              {sourceCode.length.toLocaleString()} / 8,000 characters
             </span>
           </div>
           <textarea
             value={sourceCode}
             onChange={(e) => setSourceCode(e.target.value)}
             placeholder={`Enter your ${sourceLanguage} code here...`}
             className={`w-full h-96 px-3 py-2 border rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-transparent font-mono text-sm resize-none ${
-              sourceCode.length > 10000 ? 'border-red-500' : 'border-gray-300'
+              sourceCode.length > 8000 ? 'border-red-500' : 'border-gray-300'
             }`}
           />
-          {sourceCode.length > 10000 && (
+          {sourceCode.length > 8000 && (
             <p className="text-xs text-red-600 mt-1">
-              Code exceeds maximum length. Please reduce to 10,000 characters or less.
+              Code exceeds maximum length. Please reduce to 8,000 characters or less.
             </p>
           )}
         </div>
@@ -183,7 +183,7 @@ export default function CodeTranslator({
       {/* Translate Button */}
       <button
         onClick={handleTranslate}
-        disabled={isTranslating || !sourceCode.trim() || sourceCode.length > 10000}
+        disabled={isTranslating || !sourceCode.trim() || sourceCode.length > 8000}
         className="w-full bg-gradient-to-r from-blue-500 to-purple-600 text-white py-3 rounded-lg font-medium hover:from-blue-600 hover:to-purple-700 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center space-x-2"
       >
         {isTranslating ? (
@@ -202,8 +202,8 @@ export default function CodeTranslator({
       {/* Info Note */}
       <div className="mt-4 p-3 bg-blue-50 border border-blue-200 rounded-lg">
         <p className="text-xs text-gray-600">
-          <span className="font-semibold">Note:</span> The 10,000 character limit is due to CodeLlama-34b's
-          context window (16K tokens). This ensures optimal translation quality and prevents timeouts.
+          <span className="font-semibold">Note:</span> The 8,000 character limit is due to CodeLlama-34b's
+          max token limit (5196 tokens) on Enterprise Inference. This ensures optimal translation quality and prevents errors.
           For larger files, consider breaking them into smaller modules.
         </p>
       </div>