1- <<< <<< < HEAD
21# Copyright (C) 2025-2026 Intel Corporation
3- =======
4- # Copyright (C) 2024-2025 Intel Corporation
5- >>>>>>> dell-deploy-1.4-nv
62# SPDX-License-Identifier: Apache-2.0
73
84deploy_inference_llm_models_playbook () {
@@ -14,10 +10,7 @@ deploy_inference_llm_models_playbook() {
1410 cpu_playbook=" true"
1511 gpu_playbook=" false"
1612 gaudi_deployment=" false"
17- <<< <<< < HEAD
18- =======
1913 nvidia_deployment=" false"
20- >>>>>>> dell-deploy-1.4-nv
2114 enable_cpu_balloons=" true" # Enable NRI balloons for CPU deployments
2215 huggingface_model_deployment_name=" ${huggingface_model_deployment_name} -cpu"
2316 if [ " $balloon_policy_cpu " == " enabled" ]; then
@@ -27,17 +20,13 @@ deploy_inference_llm_models_playbook() {
2720 if [ " $cpu_or_gpu " == " g" ]; then
2821 cpu_playbook=" false"
2922 gpu_playbook=" true"
30- <<< <<< < HEAD
31- gaudi_deployment=" true"
32- =======
3323 if [ " $gpu_vendor " == " nvidia" ]; then
3424 gaudi_deployment=" false"
3525 nvidia_deployment=" true"
3626 else
3727 gaudi_deployment=" true"
3828 nvidia_deployment=" false"
3929 fi
40- >>>>>>> dell-deploy-1.4-nv
4130 enable_cpu_balloons=" false"
4231 fi
4332 if [ " $deploy_apisix " == " no" ]; then
@@ -61,19 +50,13 @@ deploy_inference_llm_models_playbook() {
6150 elif [[ " $gaudi_platform " == " gaudi3" ]]; then
6251 gaudi_values_file=$gaudi3_values_file_path
6352 fi
64- <<< <<< < HEAD
65- =======
6653 nvidia_values_file=$nvidia_values_file_path
67- >>>>>>> dell-deploy-1.4-nv
6854
6955 echo " Ingress based Deployment: $ingress_enabled "
7056 echo " APISIX Enabled: $apisix_enabled "
7157 echo " Keycloak Enabled: $deploy_keycloak "
7258 echo " Gaudi based: $gaudi_deployment "
73- <<< <<< < HEAD
74- =======
7559 echo " NVIDIA based: $nvidia_deployment "
76- >>>>>>> dell-deploy-1.4-nv
7760 echo " Model Metrics Enabled: $vllm_metrics_enabled "
7861 echo " CPU NRI Balloons: $enable_cpu_balloons "
7962
@@ -103,11 +86,7 @@ deploy_inference_llm_models_playbook() {
10386 fi
10487
10588 ansible-playbook -i " ${INVENTORY_PATH} " playbooks/deploy-inference-models.yml \
106- <<< <<< < HEAD
107- --extra-vars "kubernetes_platform=${kubernetes_platform} secret_name=${cluster_url} cert_file=${cert_file} key_file=${key_file} keycloak_admin_user=${keycloak_admin_user} keycloak_admin_password=${keycloak_admin_password} keycloak_client_id=${keycloak_client_id} hugging_face_token=${hugging_face_token} install_true=${install_true} model_name_list='${model_name_list//\ /,}' cpu_playbook=${cpu_playbook} gpu_playbook=${gpu_playbook} hugging_face_token_falcon3=${hugging_face_token_falcon3} deploy_keycloak=${deploy_keycloak} apisix_enabled=${apisix_enabled} ingress_enabled=${ingress_enabled} gaudi_deployment=${gaudi_deployment} huggingface_model_id=${huggingface_model_id} hugging_face_model_deployment=${hugging_face_model_deployment} huggingface_model_deployment_name=${huggingface_model_deployment_name} deploy_inference_llm_models_playbook=${deploy_inference_llm_models_playbook} huggingface_tensor_parellel_size=${huggingface_tensor_parellel_size} deploy_genai_gateway=${deploy_genai_gateway} vllm_metrics_enabled=${vllm_metrics_enabled} gaudi_values_file=${gaudi_values_file} xeon_values_file=${xeon_values_file_path} deploy_ceph=${deploy_ceph} enable_cpu_balloons=${enable_cpu_balloons} balloon_policy_cpu=${balloon_policy_cpu} aws_certificate_arn=${aws_certificate_arn}" --tags "$tags" --vault-password-file "$vault_pass_file"
108- =======
10989 --extra-vars "kubernetes_platform=${kubernetes_platform} secret_name=${cluster_url} cert_file=${cert_file} key_file=${key_file} keycloak_admin_user=${keycloak_admin_user} keycloak_admin_password=${keycloak_admin_password} keycloak_client_id=${keycloak_client_id} hugging_face_token=${hugging_face_token} install_true=${install_true} model_name_list='${model_name_list//\ /,}' cpu_playbook=${cpu_playbook} gpu_playbook=${gpu_playbook} hugging_face_token_falcon3=${hugging_face_token_falcon3} deploy_keycloak=${deploy_keycloak} apisix_enabled=${apisix_enabled} ingress_enabled=${ingress_enabled} gaudi_deployment=${gaudi_deployment} nvidia_deployment=${nvidia_deployment} huggingface_model_id=${huggingface_model_id} hugging_face_model_deployment=${hugging_face_model_deployment} huggingface_model_deployment_name=${huggingface_model_deployment_name} deploy_inference_llm_models_playbook=${deploy_inference_llm_models_playbook} huggingface_tensor_parellel_size=${huggingface_tensor_parellel_size} deploy_genai_gateway=${deploy_genai_gateway} vllm_metrics_enabled=${vllm_metrics_enabled} gaudi_values_file=${gaudi_values_file} nvidia_values_file=${nvidia_values_file} xeon_values_file=${xeon_values_file_path} deploy_ceph=${deploy_ceph} enable_cpu_balloons=${enable_cpu_balloons} balloon_policy_cpu=${balloon_policy_cpu} aws_certificate_arn=${aws_certificate_arn}" --tags "$tags" --vault-password-file "$vault_pass_file"
110- >>>>>>> dell-deploy-1.4-nv
11190
11291}
11392
@@ -138,27 +117,7 @@ add_model() {
138117 setup_bastion " $@ "
139118 INVENTORY_PATH=$brownfield_deployment_host_file
140119 fi
141- <<< <<< < HEAD
142- invoke_prereq_workflows " $@ "
143-
144- # Deploy NRI CPU Balloons for CPU deployments (after all infrastructure, before models)
145- if [[ " $deploy_nri_balloon_policy " == " yes" ]]; then
146- # Ensure this is a CPU deployment
147- if [[ " $cpu_or_gpu " != " c" ]]; then
148- echo " ${RED} Error: NRI Balloon Policy can only be deployed for CPU deployments (cpu_or_gpu='c')${NC} "
149- echo " ${RED} Current cpu_or_gpu setting: '$cpu_or_gpu '${NC} "
150- echo " ${RED} Please set cpu_or_gpu to 'c' or disable NRI balloon policy deployment. Exiting!${NC} "
151- exit 1
152- fi
153- execute_and_check " Deploying CPU Optimization (NRI Balloons & Topology Detection)..." deploy_nri_balloons_playbook " $@ " \
154- " CPU optimization deployed successfully." \
155- " Failed to deploy CPU optimization. Exiting!."
156- else
157- echo " Skipping CPU optimization deployment..."
158- fi
159- =======
160120 invoke_prereq_workflows " $@ "
161- >>>>>>> dell-deploy-1.4-nv
162121 execute_and_check " Deploying Inference LLM Models..." deploy_inference_llm_models_playbook " $@ " \
163122 " Inference LLM Model is deployed successfully." \
164123 " Failed to deploy Inference LLM Model Exiting!."
@@ -175,8 +134,4 @@ add_model() {
175134 echo " Please refer to this comprehensive guide for detailed instructions."
176135 echo " "
177136 fi
178- <<< <<< < HEAD
179- }
180- =======
181137}
182- >>>>>>> dell-deploy-1.4-nv
0 commit comments