1- <<<<<<< HEAD
21# Copyright (C) 2025-2026 Intel Corporation
3- =======
4- # Copyright (C) 2024-2025 Intel Corporation
5- >>>>>>> dell-deploy-1.4-nv
62# SPDX-License-Identifier: Apache-2.0
73{{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) }}
84apiVersion : apps/v1
2723 balloon.balloons.resource-policy.nri.io : {{ .Values.cpu_balloon_annotation | quote }}
2824 {{- end }}
2925 {{- end }}
30- <<<<<<< HEAD
31- =======
3226 {{- with .Values.podAnnotations }}
3327 {{- toYaml . | nindent 8 }}
3428 {{- end }}
35- >>>>>>> dell-deploy-1.4-nv
3629 labels :
3730 {{- include "vllm.selectorLabels" . | nindent 8 }}
3831 {{- if not .Values.accelDevice }}
@@ -65,20 +58,12 @@ spec:
6558 {{- if .Values.image.pullPolicy }}
6659 imagePullPolicy : {{ .Values.image.pullPolicy }}
6760 {{- end }}
68- <<<<<<< HEAD
69- # command:
70- # - /bin/bash
71- # - -c
72- # - |
73- # python3 -m vllm.entrypoints.openai.api_server --dtype {{ .Values.d_type }} --model {{ .Values.LLM_MODEL_ID }} --port {{ .Values.port }} --tensor-parallel-size {{ .Values.tensor_parallel_size }} --block-size {{ .Values.block_size }} --max-model-len {{ .Values.max_model_len }} --disable-log-requests
74- =======
7561 {{- if eq .Values.accelDevice "nvidia" }}
7662 command :
7763 - python3
7864 - -m
7965 - vllm.entrypoints.openai.api_server
8066 {{- end }}
81- >>>>>>> dell-deploy-1.4-nv
8267 args :
8368 {{- $modelConfig := (index .Values.modelConfigs $modelName | default dict) }}
8469 {{- $modelArgs := $modelConfig.extraCmdArgs | default .Values.defaultModelConfigs.extraCmdArgs }}
@@ -136,11 +121,6 @@ spec:
136121 memory : {{ .Values.memory | quote }}
137122 {{- end }}
138123 {{- end }}
139- <<<<<<< HEAD
140- {{- else }}
141- limits :
142- habana.ai/gaudi : {{ .Values.tensor_parallel_size | default (index .Values.modelConfigs .Values.LLM_MODEL_ID | default dict).tensor_parallel_size | default .Values.defaultModelConfigs.tensor_parallel_size | quote}}
143- =======
144124 {{- else if eq .Values.accelDevice "gaudi" }}
145125 limits :
146126 habana.ai/gaudi : {{ .Values.tensor_parallel_size | default (index .Values.modelConfigs .Values.LLM_MODEL_ID | default dict).tensor_parallel_size | default .Values.defaultModelConfigs.tensor_parallel_size | quote}}
@@ -149,7 +129,6 @@ spec:
149129 nvidia.com/gpu : {{ .Values.gpuCount | default 1 | quote }}
150130 {{- else }}
151131 {{- toYaml .Values.resources | nindent 12 }}
152- >>>>>>> dell-deploy-1.4-nv
153132 {{- end }}
154133 {{- end }}
155134
@@ -195,8 +174,4 @@ spec:
195174 {{- if not .Values.accelDevice }}
196175 # extra time to finish processing buffered requests on CPU before pod is forcibly terminated
197176 terminationGracePeriodSeconds : 120
198- <<<<<<< HEAD
199- {{- end }}
200- =======
201177 {{- end }}
202- >>>>>>> dell-deploy-1.4-nv
0 commit comments