Release v1.3.1 (#34)

AhmedSeemalK · vhpintel · web-flow · commit f5caf92e8f63 · 2025-11-19T10:59:10.000+05:30
Signed-off-by: amberjain1 &lt;amber.jain@intel.com&gt;
Signed-off-by: psurabh &lt;pradeep.surabhi@intel.com&gt;
Signed-off-by: mdfaheem-intel &lt;mohammad.faheem@intel.com&gt;
Co-authored-by: vhpintel &lt;vijay.kumar.h.p@intel.com&gt;
diff --git a/core/version.txt b/core/version.txt
@@ -1 +1,4 @@
-v1.3.0
+{
+  "version": "1.3.1",
+  "release_date": "2025-11-18"
+}
diff --git a/docs/examples/single-node/inference-config.cfg b/docs/examples/single-node/inference-config.cfg
@@ -4,11 +4,8 @@ key_file=~/certs/key.pem
 keycloak_client_id=my-client-id
 keycloak_admin_user=your-keycloak-admin-user
 keycloak_admin_password=changeme
-hugging_face_token=your_hugging_face_token
-hugging_face_token_falcon3=your_hugging_face_token
-models=
-cpu_or_gpu=
 vault_pass_code=place-holder-123
+model=
 deploy_kubernetes_fresh=on
 deploy_ingress_controller=on
 deploy_keycloak_apisix=on
@@ -17,4 +14,4 @@ deploy_observability=off
 deploy_llm_models=on
 deploy_ceph=off
 deploy_istio=off
-uninstall_ceph=off
+uninstall_ceph=off
diff --git a/ibm_catalog.json b/ibm_catalog.json
@@ -3,7 +3,7 @@
         {
             "label": "Intel® AI for Enterprise Inference",
             "name": "da-enterprise-inference",
-            "version": "1.3.0",
+            "version": "1.3.1",
             "product_kind": "solution",
 			"tags": [
               "opea",
@@ -274,12 +274,6 @@
                             "display_name": "Gaudi worker names",
                             "default_value": "[]",
 			                "description": "Custom names for Gaudi worker nodes. If not provided, defaults to 'inference-workload-gaudi-node-01', etc."
-                        },
-			            {
-                            "key": "image",
-                            "type": "string",
-                            "default_value": "gaudi3-os-u22-01-22-0",
-                            "description": "This variable image name to be used for VSI bringup"
                         },
 			            {
                             "key": "instance_profile",
diff --git a/third_party/IBM/docs/assets/quickstart/quota_error.png b/third_party/IBM/docs/assets/quickstart/quota_error.png
diff --git a/third_party/IBM/docs/assets/quickstart/quota_request.png b/third_party/IBM/docs/assets/quickstart/quota_request.png
diff --git a/third_party/IBM/docs/assets/quickstart/support_request.png b/third_party/IBM/docs/assets/quickstart/support_request.png
diff --git a/third_party/IBM/docs/getting-started.md b/third_party/IBM/docs/getting-started.md
@@ -77,6 +77,9 @@ The following large language models are supported for deployment:
 | Qwen/Qwen2.5-32B-Instruct					| 1       					| 70GB    | 6 		 |
 
 > **Note:** Additional models can be deployed or existing models can be removed after initial deployment by accessing the deployment instance.
+>
+> ### Quota Increase for Gaudi 3 - IBM Cloud Quick Start Guide [Quota Increase for Gaudi 3](./quota-increase.md)
+>
 
 ### Required Components
 
diff --git a/third_party/IBM/docs/quickstart-prerequisites.md b/third_party/IBM/docs/quickstart-prerequisites.md
@@ -161,13 +161,13 @@ ibmcloud resource groups
 ### 8. Model Selection (models)
 **What it is:** Which AI model you want to deploy
 
-| Model Name 								        | Cards Required       		| Storage | Model ID |
+| Model Name 								| Cards Required       		| Storage | Model ID |
 |-------------------------------------------|---------------------------|---------|----------|
-| meta-llama/Llama-3.1-8B-Instruct  		  | 1 	  					      | 20GB    | 1  		|
-| meta-llama/Llama-3.1-405B-Instruct 		  | 8       					   | 900GB   | 3		   |
-| meta-llama/Llama-3.3-70B-Instruct			  | 4       					   | 150GB   | 4 		   |
-| meta-llama/Llama-4-Scout-17B-16E-Instruct | 4       					   | 200GB   | 5 		   |
-| Qwen/Qwen2.5-32B-Instruct					  | 1       					   | 70GB    | 6 		   |
+| meta-llama/Llama-3.1-8B-Instruct  		| 1 	  					| 20GB    | 1  		 |
+| meta-llama/Llama-3.1-405B-Instruct 		| 8       					| 900GB   | 3		 |
+| meta-llama/Llama-3.3-70B-Instruct			| 4       					| 150GB   | 4 		 |
+| meta-llama/Llama-4-Scout-17B-16E-Instruct | 4       					| 200GB   | 5 		 |
+| Qwen/Qwen2.5-32B-Instruct					| 1       					| 70GB    | 6 		 |
 
 **For CLI deployment:** Use the Model ID as a string value (`"1"`, `"3"`, `"4"`, `"5"`, or `"6"`) in your terraform.tfvars file.
 **For UI deployment:** Select from the dropdown - the values will be mapped automatically.
diff --git a/third_party/IBM/docs/quota-increase.md b/third_party/IBM/docs/quota-increase.md
@@ -0,0 +1,51 @@
+# Quota Increase for Gaudi 3 - IBM Cloud Quick Start Guide
+
+## Problem:
+When attempting to provision a Gaudi 3 Virtual Server Instance (VSI), you may encounter the following error: 
+```
+"over_quota" 
+"A non-dedicated host VSI with this profile (XXX vCPUs) will put user over quota….” 
+```
+
+You may also see the following alert when going through the provisioning steps:  
+![Quota error](assets/quickstart/quota_error.png)
+
+## Solution:
+To successfully provision a single Gaudi 3 node, your IBM Cloud account must have the following minimum resource quotas: 
+
+1. **vCPUs**: 160 
+2. **Memory**: 1792 GiB 
+3. **Instance Storage**: 26 TB 
+4. **GPUs**: 8 
+
+If you’d like to provision more than one Gaudi 3 node (8 Gaudi 3 AI Accelerators) under the same IBM Cloud Account, multiply the above values by the number of nodes. 
+
+### Example: Provisioning 2 nodes of Gaudi 3 would require a minimum quota of: 
+
+1. **vCPUs**: 320 (160 x 2) 
+2. **Memory**: 3584 GiB (1792 x 2)  
+3. **Instance Storage**: 52 TB (26 x 2)  
+4. **GPUs**: 16 (8 x 2)  
+
+## Steps to Request a Quota Increase
+
+Familiarize yourself with this documentation: Increasing Account Limits –  [IBM Cloud Documentation](https://cloud.ibm.com/docs/account?topic=account-account-limits&interface=cli)
+
+Follow this link, [Opening a Support Case](https://cloud.ibm.com/unifiedsupport/cases/add) , to open the IBM Cloud Console and find the appropriate page.  
+
+Fill in the support request
+1. For ‘Topic’, choose ‘Virtual Private Cloud (VPC)’ 
+2. For ‘Subtopic’, choose ‘Quota request’ 
+	![Quota request](assets/quickstart/quota_request.png)
+3. For ‘Subject’, type a title that best describes your ask; e.g., “Requesting quota increase for Gaudi 3 resources” 
+4. For ‘Description’, fill in the requested information based on your specific use case and needs
+   - Account number: XXX 
+   - Account name: XXX 
+   - Region: XXX 
+   - Environment: ‘Production’  
+   - Justification REQUIRED: XXX 
+   ![Support request](assets/quickstart/support_request.png)
+5. Fill out any additional information pertinent to the support case, then click ‘Next’. 
+6. On the next page, review the support case and click ‘Submit case’ when finished. 
+7. Reach out to your IBM Tech Seller with the IBM Support Ticket Number. Your Tech Seller will then reach out to the Product Team to expedite the completion of the requested changes.  
+8. Once the quota changes have been made to the IBM Cloud Account, the Tech Seller will notify the customer. Th customer can now provision the necessary Gaudi 3 resources.  
diff --git a/third_party/IBM/docs/standard-prerequisites.md b/third_party/IBM/docs/standard-prerequisites.md
@@ -104,13 +104,13 @@ ibmcloud resource groups
 ### 7. Model Selection (models)
 **What it is:** Which AI model you want to deploy
 
-| Model Name 								        | Cards Required       		| Storage | Model ID |
+| Model Name 								| Cards Required       		| Storage | Model ID |
 |-------------------------------------------|---------------------------|---------|----------|
-| meta-llama/Llama-3.1-8B-Instruct  		  | 1 	  					      | 20GB    | 1  		|
-| meta-llama/Llama-3.1-405B-Instruct 		  | 8       					   | 900GB   | 3		   |
-| meta-llama/Llama-3.3-70B-Instruct			  | 4       					   | 150GB   | 4 		   |
-| meta-llama/Llama-4-Scout-17B-16E-Instruct | 4       					   | 200GB   | 5 		   |
-| Qwen/Qwen2.5-32B-Instruct					  | 1       					   | 70GB    | 6 		   |
+| meta-llama/Llama-3.1-8B-Instruct  		| 1 	  					| 20GB    | 1  		 |
+| meta-llama/Llama-3.1-405B-Instruct 		| 8       					| 900GB   | 3		 |
+| meta-llama/Llama-3.3-70B-Instruct			| 4       					| 150GB   | 4 		 |
+| meta-llama/Llama-4-Scout-17B-16E-Instruct | 4       					| 200GB   | 5 		 |
+| Qwen/Qwen2.5-32B-Instruct					| 1       					| 70GB    | 6 		 |
 
 **For CLI deployment:** Use the Model ID as a string value (`"1"`, `"3"`, `"4"`, `"5"`, or `"6"`) in your terraform.tfvars file.
 **For UI deployment:** Select from the dropdown - the values will be mapped automatically.
diff --git a/third_party/IBM/patterns/quickstart/main.tf b/third_party/IBM/patterns/quickstart/main.tf
@@ -20,20 +20,11 @@ data "ibm_is_subnet" "existing_subnet" {
     name = var.subnet
 }
 
-data "ibm_is_image" "ubuntu" {
-    name = var.image
-}
-
 data "ibm_is_image" "xeon_image" {
     count = local.is_multi_node ? 1 : 0
     name = var.xeon_image
 }
 
-data "ibm_is_image" "gaudi_image" {
-    count = local.is_multi_node ? 1 : 0
-    name = var.gaudi_image
-}
-
 data "ibm_resource_group" "target_rg" {
   name = var.resource_group
 }
@@ -67,13 +58,14 @@ resource "ibm_is_instance" "instance_name" {
     vpc     = data.ibm_is_vpc.existing_vpc.id
     zone    = var.instance_zone
     keys    = [data.ibm_is_ssh_key.ssh_key_id.id]
-    image   = data.ibm_is_image.ubuntu.id
     profile = var.instance_profile
     resource_group = data.ibm_resource_group.target_rg.id
-
+	catalog_offering {
+      version_crn = "crn:v1:bluemix:public:globalcatalog-collection:global::1082e7d2-5e2f-0a11-a3bc-f88a8e1931fc:version:68d4fbbe-3984-4c62-bf27-bd938b9bef8e-global/f61fc831-f7da-485e-8d80-b94cea311960-global"
+    }
     primary_network_interface {
-        subnet          = data.ibm_is_subnet.existing_subnet.id
-        security_groups = [data.ibm_is_security_group.existing_sg.id]
+	  subnet          = data.ibm_is_subnet.existing_subnet.id
+      security_groups = [data.ibm_is_security_group.existing_sg.id]
     }
 }
 
@@ -101,13 +93,14 @@ resource "ibm_is_instance" "worker_gaudi_nodes" {
     vpc     = data.ibm_is_vpc.existing_vpc.id
     zone    = var.instance_zone
     keys    = [data.ibm_is_ssh_key.ssh_key_id.id]
-    image   = data.ibm_is_image.gaudi_image[0].id
     profile = var.instance_profile  # Uses same profile as single-node
     resource_group = data.ibm_resource_group.target_rg.id
-
+	catalog_offering {
+      version_crn = "crn:v1:bluemix:public:globalcatalog-collection:global::1082e7d2-5e2f-0a11-a3bc-f88a8e1931fc:version:68d4fbbe-3984-4c62-bf27-bd938b9bef8e-global/f61fc831-f7da-485e-8d80-b94cea311960-global"
+    }
     primary_network_interface {
-        subnet          = data.ibm_is_subnet.existing_subnet.id
-        security_groups = [data.ibm_is_security_group.existing_sg.id]
+      subnet          = data.ibm_is_subnet.existing_subnet.id
+      security_groups = [data.ibm_is_security_group.existing_sg.id]
     }
 }
 
diff --git a/third_party/IBM/patterns/quickstart/run_script.sh b/third_party/IBM/patterns/quickstart/run_script.sh
@@ -155,7 +155,7 @@ fi
 
 cd ~
 rm -rf /home/ubuntu/Enterprise-Inference
-git clone https://github.com/opea-project/Enterprise-Inference.git /home/ubuntu/Enterprise-Inference
+git clone -b release-1.3.1 --single-branch https://github.com/opea-project/Enterprise-Inference.git /home/ubuntu/Enterprise-Inference
 cd /home/ubuntu/Enterprise-Inference
 
 # Copy appropriate hosts.yaml based on deployment mode
diff --git a/third_party/IBM/patterns/quickstart/variables.tf b/third_party/IBM/patterns/quickstart/variables.tf
@@ -58,23 +58,12 @@ variable "resource_group" {
   default     = ""
 }
 
-variable "image" {
-  description = "IBM Cloud instance image (for single-node or default multi-node image)"
-  type        = string
-  default     = ""
-}
-
 variable "xeon_image" {
   description = "IBM Cloud instance image for Xeon/CPU nodes in multi-node deployment"
   type        = string
   default     = "ibm-ubuntu-22-04-5-minimal-amd64-2"  # Default Ubuntu image for CPU nodes
 }
 
-variable "gaudi_image" {
-  description = "IBM Cloud instance image for Gaudi nodes in multi-node deployment"
-  type        = string
-  default     = "gaudi3-os-u22-01-22-0"
-}
 variable "ssh_private_key" {
   default     = null
   description = "Provide the private SSH key (named id_rsa) used during the creation and configuration of the bastion server to securely authenticate and connect to the bastion server. This allows access to internal network resources from a secure entry point. Note: The corresponding public SSH key (named id_rsa.pub) must already be available in the ~/.ssh/authorized_keys file on the bastion host to establish authentication."
diff --git a/third_party/IBM/patterns/standard/main.tf b/third_party/IBM/patterns/standard/main.tf
@@ -28,20 +28,11 @@ data "ibm_resource_group" "target_rg" {
   name = var.resource_group
 }
 
-data "ibm_is_image" "packer_image" {
-    name = var.image
-}
-
 data "ibm_is_image" "xeon_image" {
     count = local.is_multi_node ? 1 : 0
     name = var.xeon_image
 }
 
-data "ibm_is_image" "gaudi_image" {
-    count = local.is_multi_node ? 1 : 0
-    name = var.gaudi_image
-}
-
 resource "ibm_is_vpc" "new_vpc" {
     name = "${local.BASENAME}-vpc-${random_string.suffix.result}"
     resource_group = data.ibm_resource_group.target_rg.id
@@ -185,19 +176,20 @@ data "ibm_is_ssh_key" "ssh_key_id" {
 
 # Single-node instance (when deployment_mode is single-node)
 resource "ibm_is_instance" "vsi" {
-    count   = local.is_multi_node ? 0 : 1
-    name    = "${local.BASENAME}-vsi-${random_string.suffix.result}"
-    vpc     = ibm_is_vpc.new_vpc.id
-    zone    = var.instance_zone
-    keys    = [data.ibm_is_ssh_key.ssh_key_id.id]
-    image   = data.ibm_is_image.packer_image.id
-    resource_group = data.ibm_resource_group.target_rg.id
-    profile = var.instance_profile
-
-    primary_network_interface {
-        subnet          = ibm_is_subnet.new_subnet.id
-        security_groups = [ibm_is_security_group.new_sg.id]
-    }
+  count   = local.is_multi_node ? 0 : 1
+  name    = "${local.BASENAME}-vsi-${random_string.suffix.result}"
+  vpc     = ibm_is_vpc.new_vpc.id
+  zone    = var.instance_zone
+  keys    = [data.ibm_is_ssh_key.ssh_key_id.id]
+  resource_group = data.ibm_resource_group.target_rg.id
+  profile = var.instance_profile
+  catalog_offering {
+    version_crn = "crn:v1:bluemix:public:globalcatalog-collection:global::1082e7d2-5e2f-0a11-a3bc-f88a8e1931fc:version:68d4fbbe-3984-4c62-bf27-bd938b9bef8e-global/f61fc831-f7da-485e-8d80-b94cea311960-global"
+  }
+  primary_network_interface {
+    subnet          = ibm_is_subnet.new_subnet.id
+    security_groups = [ibm_is_security_group.new_sg.id]
+  }
 }
 
 # Multi-node Control Plane instances (Xeon) - only when deployment_mode is multi-node
@@ -224,7 +216,9 @@ resource "ibm_is_instance" "worker_gaudi_nodes" {
     vpc     = ibm_is_vpc.new_vpc.id
     zone    = var.instance_zone
     keys    = [data.ibm_is_ssh_key.ssh_key_id.id]
-    image   = data.ibm_is_image.gaudi_image[0].id
+    catalog_offering {
+      version_crn = "crn:v1:bluemix:public:globalcatalog-collection:global::1082e7d2-5e2f-0a11-a3bc-f88a8e1931fc:version:68d4fbbe-3984-4c62-bf27-bd938b9bef8e-global/f61fc831-f7da-485e-8d80-b94cea311960-global"
+    }
     resource_group = data.ibm_resource_group.target_rg.id
     profile = var.instance_profile  # Uses same profile as single-node
 
diff --git a/third_party/IBM/patterns/standard/run_script.sh b/third_party/IBM/patterns/standard/run_script.sh
@@ -155,7 +155,7 @@ fi
 
 cd ~
 rm -rf /home/ubuntu/Enterprise-Inference
-git clone https://github.com/opea-project/Enterprise-Inference.git /home/ubuntu/Enterprise-Inference
+git clone -b release-1.3.1 --single-branch https://github.com/opea-project/Enterprise-Inference.git /home/ubuntu/Enterprise-Inference
 cd /home/ubuntu/Enterprise-Inference
 
 # Copy appropriate hosts.yaml based on deployment mode
diff --git a/third_party/IBM/patterns/standard/variables.tf b/third_party/IBM/patterns/standard/variables.tf
@@ -74,11 +74,7 @@ variable "user_key" {
   description = "The contents of the TLS private key (PEM format)"
   type        = string
 }
-variable "image" {
-  description = "IBM Cloud instance image"
-  type        = string
-  default     = "gaudi3-os-u22-01-22-0"
-}
+
 variable "hugging_face_token" {
   description = "This variable specifies the hf token."
   type        = string
@@ -170,11 +166,6 @@ variable "xeon_image" {
   default     = "ibm-ubuntu-22-04-5-minimal-amd64-2"  # Default Ubuntu image for CPU nodes
 }
 
-variable "gaudi_image" {
-  description = "IBM Cloud instance image for Gaudi nodes in multi-node deployment"
-  type        = string
-  default     = "gaudi3-os-u22-01-22-0"
-}
 
 variable "control_plane_names" {
   description = "Optional custom names for control plane nodes. If not provided, defaults to 'inference-control-plane-01', etc."

-Original file line number
+Diff line change
@@ @@ -1 +1,4 @@ @@
 -v1.3.0
 +{
 +  "version": "1.3.1",
 +  "release_date": "2025-11-18"
 +}