|
1 | | -<<<<<<< HEAD |
2 | 1 | # Copyright (C) 2025-2026 Intel Corporation |
3 | | -======= |
4 | | -# Copyright (C) 2024-2025 Intel Corporation |
5 | | ->>>>>>> dell-deploy-1.4-nv |
6 | 2 | # SPDX-License-Identifier: Apache-2.0 |
7 | 3 |
|
8 | 4 | --- |
|
32 | 28 | requiredDuringSchedulingIgnoredDuringExecution: |
33 | 29 | nodeSelectorTerms: |
34 | 30 | - matchExpressions: |
35 | | -<<<<<<< HEAD |
36 | | - - key: ei-inference-eligible |
37 | | - operator: In |
38 | | - values: ["true"] |
39 | | -======= |
40 | 31 | - key: role |
41 | 32 | operator: In |
42 | 33 | values: ["inference"] |
|
48 | 39 | - key: role |
49 | 40 | operator: In |
50 | 41 | values: ["inference-cpu"] |
51 | | ->>>>>>> dell-deploy-1.4-nv |
52 | 42 | tolerations: |
53 | 43 | - operator: Exists # Allow scheduling on any node including master |
54 | 44 | securityContext: |
|
235 | 225 | tags: |
236 | 226 | - install |
237 | 227 |
|
238 | | -<<<<<<< HEAD |
239 | | -- name: Refresh node-topology-optimizer pod name (avoid stale pod references) |
240 | | - ansible.builtin.command: > |
241 | | - kubectl -n kube-system get pods -l app=node-topology-optimizer |
242 | | - -o jsonpath='{.items[0].metadata.name}' |
243 | | - register: optimizer_pod_name_refresh |
244 | | - changed_when: false |
245 | | - |
246 | | -- name: Set refreshed optimizer_pod_name |
247 | | - ansible.builtin.set_fact: |
248 | | - optimizer_pod_name: "{{ optimizer_pod_name_refresh.stdout | trim }}" |
249 | | - |
250 | | -======= |
251 | | ->>>>>>> dell-deploy-1.4-nv |
252 | 228 | - name: Get detailed CPU information |
253 | 229 | kubernetes.core.k8s_exec: |
254 | 230 | pod: "{{ optimizer_pod_name }}" |
|
301 | 277 | for s in $(seq 0 $((total_sockets - 1))); do |
302 | 278 | socket_totals[$s]=0; |
303 | 279 | done; |
304 | | -<<<<<<< HEAD |
305 | | - |
306 | | -======= |
307 | 280 | |
308 | | ->>>>>>> dell-deploy-1.4-nv |
309 | 281 | for i in $(seq 0 $((total_numa - 1))); do |
310 | 282 | numa_cpus=$(lscpu | grep "NUMA node$i CPU(s):" | cut -d: -f2 | xargs); |
311 | 283 | if [[ "$numa_cpus" != "" ]]; then |
|
340 | 312 | tags: |
341 | 313 | - install |
342 | 314 |
|
343 | | -<<<<<<< HEAD |
344 | | -- name: Read CPU reservation script content |
345 | | - ansible.builtin.set_fact: |
346 | | - cpu_script_content: "{{ lookup('file', role_path + '/files/compute_reserved_cpus.sh') }}" |
347 | | - tags: |
348 | | - - install |
349 | | - |
350 | | -- name: Execute CPU reservation computation directly from script file |
351 | | - ansible.builtin.shell: |
352 | | - cmd: echo {{ cpu_script_content | quote }} | kubectl exec -n kube-system {{ optimizer_pod_name | quote }} -c ubuntu -i -- bash -s {{ nri_total_reserved_cpus | default(8) | int }} |
353 | | - register: computed_reserved_cpu_list |
354 | | - changed_when: false |
355 | | - tags: |
356 | | - - install |
357 | | - |
358 | | -- name: Set nri_reserved_cpu_list fact |
359 | | - ansible.builtin.set_fact: |
360 | | - nri_reserved_cpu_list: >- |
361 | | - {{ |
362 | | - (computed_reserved_cpu_list.stdout | default('') | trim) |
363 | | - | regex_replace('^.*NRI_RESERVED_CPU_LIST=', '') |
364 | | - | regex_replace('\\s.*$', '') |
365 | | - | regex_replace(',+$', '') |
366 | | - }} |
367 | | -
|
368 | | -- name: Assert nri_reserved_cpu_list computed |
369 | | - ansible.builtin.assert: |
370 | | - that: |
371 | | - - nri_reserved_cpu_list | length > 0 |
372 | | - fail_msg: >- |
373 | | - Failed to compute nri_reserved_cpu_list. |
374 | | - stdout={{ computed_reserved_cpu_list.stdout | default('') }} |
375 | | - stderr={{ computed_reserved_cpu_list.stderr | default('') }} |
376 | | -
|
377 | | -- name: Debug final nri_reserved_cpu_list |
378 | | - ansible.builtin.debug: |
379 | | - msg: "nri_reserved_cpu_list={{ nri_reserved_cpu_list }}" |
380 | | - |
381 | | -======= |
382 | | ->>>>>>> dell-deploy-1.4-nv |
383 | 315 | - name: Parse accurate CPU topology results |
384 | 316 | ansible.builtin.set_fact: |
385 | 317 | numa_cpu_counts: >- |
|
594 | 526 | tags: |
595 | 527 | - install |
596 | 528 |
|
597 | | -<<<<<<< HEAD |
598 | | -- name: Calculate workload CPUs |
599 | | - ansible.builtin.set_fact: |
600 | | - workload_cpus: >- |
601 | | - {%- set total_cpus = (cpus_per_socket | int) * (total_sockets.stdout | int) -%} |
602 | | - {%- set reserved_cpu_count = nri_reserved_cpu_list.split(',') | length -%} |
603 | | - {%- if reserved_cpu_count >= total_cpus -%}{%- set reserved_cpu_count = total_cpus -%}{%- endif -%} |
604 | | - {{ total_cpus - reserved_cpu_count }} |
605 | | - tags: |
606 | | - - install |
607 | | - |
608 | | -- name: Add workload_cpus to balloon configuration |
609 | | - ansible.builtin.set_fact: |
610 | | - optimal_balloon_config: "{{ optimal_balloon_config | combine({'workload_cpus': workload_cpus | int}) }}" |
611 | | - tags: |
612 | | - - install |
613 | | - |
614 | | -======= |
615 | | ->>>>>>> dell-deploy-1.4-nv |
616 | 529 | - name: Check for AMX support |
617 | 530 | kubernetes.core.k8s_exec: |
618 | 531 | pod: "{{ optimizer_pod_name }}" |
|
660 | 573 | {% endfor %} |
661 | 574 |
|
662 | 575 | === CPU Features === |
663 | | -<<<<<<< HEAD |
664 | | - AMX: {{ 'SUPPORTED' if (amx_check is defined and amx_check.rc is defined and amx_check.rc == 0) else 'NOT AVAILABLE' }} |
665 | | - AVX-512: {{ 'SUPPORTED' if (avx512_check is defined and avx512_check.rc is defined and avx512_check.rc == 0) else 'NOT AVAILABLE' }} |
666 | | -======= |
667 | 576 | AMX: {{ 'SUPPORTED' if amx_check.rc == 0 else 'NOT AVAILABLE' }} |
668 | 577 | AVX-512: {{ 'SUPPORTED' if avx512_check.rc == 0 else 'NOT AVAILABLE' }} |
669 | | ->>>>>>> dell-deploy-1.4-nv |
670 | 578 |
|
671 | 579 | === Parallelism Strategy === |
672 | 580 | Strategy: {{ optimal_balloon_config.strategy }} |
|
0 commit comments