Skip to content

Commit 377d70a

Browse files
xkd9Harika
authored andcommitted
updated
Signed-off-by: Harika <codewith3@gmail.com>
1 parent 2c60cfd commit 377d70a

9 files changed

Lines changed: 0 additions & 224 deletions

File tree

core/playbooks/deploy-keycloak-tls-cert.yml

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
<<<<<<< HEAD
21
# Copyright (C) 2025-2026 Intel Corporation
3-
=======
4-
# Copyright (C) 2024-2025 Intel Corporation
5-
>>>>>>> dell-deploy-1.4-nv
62
# SPDX-License-Identifier: Apache-2.0
73
---
84
- name: Create Keycloak TLS secret
@@ -59,18 +55,6 @@
5955
- name: Display the output
6056
debug:
6157
msg: "Secret {{ secret_name }} created."
62-
<<<<<<< HEAD
63-
- name: Retrieve number of Infrastructure Nodes (ei-infra-eligible)
64-
ansible.builtin.shell: kubectl get nodes --selector=ei-infra-eligible=true --no-headers | wc -l
65-
register: inference_node_count
66-
changed_when: false
67-
- name: Set Keycloak Replicas based upon Infrastructure Nodes
68-
set_fact:
69-
inference_infra_replica_count: "{{ inference_node_count.stdout | int if inference_node_count.stdout | int > 0 else 1 }}"
70-
- name: Display the number of Infrastructure Nodes
71-
debug:
72-
msg: "Number of ei-infra-eligible nodes configured: {{ inference_infra_replica_count }}"
73-
=======
7458
- name: Retrieve number of Inference Infrastructure Nodes
7559
ansible.builtin.shell: kubectl get nodes --selector=role=inference-infra --no-headers | wc -l
7660
register: inference_node_count
@@ -81,7 +65,6 @@
8165
- name: Display the number of Inference Infrastructure Nodes
8266
debug:
8367
msg: "Number of inference-infra node configured: {{ inference_infra_replica_count }}"
84-
>>>>>>> dell-deploy-1.4-nv
8568
run_once: true
8669
when: deploy_keycloak == "yes"
8770
- name: Delete genai-gateway-ingress resource
@@ -119,41 +102,10 @@
119102
persistence:
120103
enabled: true
121104
size: 8Gi
122-
<<<<<<< HEAD
123-
tolerations:
124-
- key: node-role.kubernetes.io/control-plane
125-
operator: Exists
126-
effect: NoSchedule
127-
- key: node-role.kubernetes.io/master
128-
operator: Exists
129-
effect: NoSchedule
130-
affinity:
131-
nodeAffinity:
132-
requiredDuringSchedulingIgnoredDuringExecution:
133-
nodeSelectorTerms:
134-
- matchExpressions:
135-
- key: ei-infra-eligible
136-
operator: In
137-
values: ["true"]
138-
=======
139-
>>>>>>> dell-deploy-1.4-nv
140105
volumePermissions:
141106
enabled: true
142107
image:
143108
repository: bitnamilegacy/os-shell
144-
<<<<<<< HEAD
145-
tag: 12-debian-12-r48
146-
production: true
147-
proxy: edge
148-
replicaCount: "{{ inference_infra_replica_count | int }}"
149-
resources:
150-
requests:
151-
memory: "2Gi"
152-
cpu: "500m"
153-
limits:
154-
memory: "4Gi"
155-
cpu: "1500m"
156-
=======
157109
tag: 12-debian-12-r48
158110
tolerations:
159111
- key: node-role.kubernetes.io/control-plane
@@ -176,7 +128,6 @@
176128
production: true
177129
proxy: edge
178130
replicaCount: "{{ inference_infra_replica_count | int }}"
179-
>>>>>>> dell-deploy-1.4-nv
180131
extraEnvVars:
181132
- name: KC_CACHE
182133
value: ispn
@@ -232,15 +183,9 @@
232183
requiredDuringSchedulingIgnoredDuringExecution:
233184
nodeSelectorTerms:
234185
- matchExpressions:
235-
<<<<<<< HEAD
236-
- key: ei-infra-eligible
237-
operator: In
238-
values: ["true"]
239-
=======
240186
- key: role
241187
operator: In
242188
values: ["infra"]
243-
>>>>>>> dell-deploy-1.4-nv
244189
- matchExpressions:
245190
- key: node-role.kubernetes.io/control-plane
246191
operator: Exists
@@ -346,18 +291,10 @@
346291
annotations:
347292
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}'
348293
alb.ingress.kubernetes.io/certificate-arn: "{{ aws_certificate_arn | default('') }}"
349-
<<<<<<< HEAD
350-
alb.ingress.kubernetes.io/group.name: ei-eks
351-
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
352-
alb.ingress.kubernetes.io/scheme: internet-facing
353-
alb.ingress.kubernetes.io/target-type: ip
354-
alb.ingress.kubernetes.io/group.order: '3'
355-
=======
356294
alb.ingress.kubernetes.io/group.name: keycloak-apisix
357295
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
358296
alb.ingress.kubernetes.io/scheme: internet-facing
359297
alb.ingress.kubernetes.io/target-type: ip
360-
>>>>>>> dell-deploy-1.4-nv
361298
spec:
362299
ingressClassName: alb
363300
rules:

core/roles/utils/tasks/get_optimized_cpu_topology.yaml

Lines changed: 0 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
<<<<<<< HEAD
21
# Copyright (C) 2025-2026 Intel Corporation
3-
=======
4-
# Copyright (C) 2024-2025 Intel Corporation
5-
>>>>>>> dell-deploy-1.4-nv
62
# SPDX-License-Identifier: Apache-2.0
73

84
---
@@ -32,11 +28,6 @@
3228
requiredDuringSchedulingIgnoredDuringExecution:
3329
nodeSelectorTerms:
3430
- matchExpressions:
35-
<<<<<<< HEAD
36-
- key: ei-inference-eligible
37-
operator: In
38-
values: ["true"]
39-
=======
4031
- key: role
4132
operator: In
4233
values: ["inference"]
@@ -48,7 +39,6 @@
4839
- key: role
4940
operator: In
5041
values: ["inference-cpu"]
51-
>>>>>>> dell-deploy-1.4-nv
5242
tolerations:
5343
- operator: Exists # Allow scheduling on any node including master
5444
securityContext:
@@ -235,20 +225,6 @@
235225
tags:
236226
- install
237227

238-
<<<<<<< HEAD
239-
- name: Refresh node-topology-optimizer pod name (avoid stale pod references)
240-
ansible.builtin.command: >
241-
kubectl -n kube-system get pods -l app=node-topology-optimizer
242-
-o jsonpath='{.items[0].metadata.name}'
243-
register: optimizer_pod_name_refresh
244-
changed_when: false
245-
246-
- name: Set refreshed optimizer_pod_name
247-
ansible.builtin.set_fact:
248-
optimizer_pod_name: "{{ optimizer_pod_name_refresh.stdout | trim }}"
249-
250-
=======
251-
>>>>>>> dell-deploy-1.4-nv
252228
- name: Get detailed CPU information
253229
kubernetes.core.k8s_exec:
254230
pod: "{{ optimizer_pod_name }}"
@@ -301,11 +277,7 @@
301277
for s in $(seq 0 $((total_sockets - 1))); do
302278
socket_totals[$s]=0;
303279
done;
304-
<<<<<<< HEAD
305-
306-
=======
307280
308-
>>>>>>> dell-deploy-1.4-nv
309281
for i in $(seq 0 $((total_numa - 1))); do
310282
numa_cpus=$(lscpu | grep "NUMA node$i CPU(s):" | cut -d: -f2 | xargs);
311283
if [[ "$numa_cpus" != "" ]]; then
@@ -340,46 +312,6 @@
340312
tags:
341313
- install
342314

343-
<<<<<<< HEAD
344-
- name: Read CPU reservation script content
345-
ansible.builtin.set_fact:
346-
cpu_script_content: "{{ lookup('file', role_path + '/files/compute_reserved_cpus.sh') }}"
347-
tags:
348-
- install
349-
350-
- name: Execute CPU reservation computation directly from script file
351-
ansible.builtin.shell:
352-
cmd: echo {{ cpu_script_content | quote }} | kubectl exec -n kube-system {{ optimizer_pod_name | quote }} -c ubuntu -i -- bash -s {{ nri_total_reserved_cpus | default(8) | int }}
353-
register: computed_reserved_cpu_list
354-
changed_when: false
355-
tags:
356-
- install
357-
358-
- name: Set nri_reserved_cpu_list fact
359-
ansible.builtin.set_fact:
360-
nri_reserved_cpu_list: >-
361-
{{
362-
(computed_reserved_cpu_list.stdout | default('') | trim)
363-
| regex_replace('^.*NRI_RESERVED_CPU_LIST=', '')
364-
| regex_replace('\\s.*$', '')
365-
| regex_replace(',+$', '')
366-
}}
367-
368-
- name: Assert nri_reserved_cpu_list computed
369-
ansible.builtin.assert:
370-
that:
371-
- nri_reserved_cpu_list | length > 0
372-
fail_msg: >-
373-
Failed to compute nri_reserved_cpu_list.
374-
stdout={{ computed_reserved_cpu_list.stdout | default('') }}
375-
stderr={{ computed_reserved_cpu_list.stderr | default('') }}
376-
377-
- name: Debug final nri_reserved_cpu_list
378-
ansible.builtin.debug:
379-
msg: "nri_reserved_cpu_list={{ nri_reserved_cpu_list }}"
380-
381-
=======
382-
>>>>>>> dell-deploy-1.4-nv
383315
- name: Parse accurate CPU topology results
384316
ansible.builtin.set_fact:
385317
numa_cpu_counts: >-
@@ -594,25 +526,6 @@
594526
tags:
595527
- install
596528

597-
<<<<<<< HEAD
598-
- name: Calculate workload CPUs
599-
ansible.builtin.set_fact:
600-
workload_cpus: >-
601-
{%- set total_cpus = (cpus_per_socket | int) * (total_sockets.stdout | int) -%}
602-
{%- set reserved_cpu_count = nri_reserved_cpu_list.split(',') | length -%}
603-
{%- if reserved_cpu_count >= total_cpus -%}{%- set reserved_cpu_count = total_cpus -%}{%- endif -%}
604-
{{ total_cpus - reserved_cpu_count }}
605-
tags:
606-
- install
607-
608-
- name: Add workload_cpus to balloon configuration
609-
ansible.builtin.set_fact:
610-
optimal_balloon_config: "{{ optimal_balloon_config | combine({'workload_cpus': workload_cpus | int}) }}"
611-
tags:
612-
- install
613-
614-
=======
615-
>>>>>>> dell-deploy-1.4-nv
616529
- name: Check for AMX support
617530
kubernetes.core.k8s_exec:
618531
pod: "{{ optimizer_pod_name }}"
@@ -660,13 +573,8 @@
660573
{% endfor %}
661574
662575
=== CPU Features ===
663-
<<<<<<< HEAD
664-
AMX: {{ 'SUPPORTED' if (amx_check is defined and amx_check.rc is defined and amx_check.rc == 0) else 'NOT AVAILABLE' }}
665-
AVX-512: {{ 'SUPPORTED' if (avx512_check is defined and avx512_check.rc is defined and avx512_check.rc == 0) else 'NOT AVAILABLE' }}
666-
=======
667576
AMX: {{ 'SUPPORTED' if amx_check.rc == 0 else 'NOT AVAILABLE' }}
668577
AVX-512: {{ 'SUPPORTED' if avx512_check.rc == 0 else 'NOT AVAILABLE' }}
669-
>>>>>>> dell-deploy-1.4-nv
670578
671579
=== Parallelism Strategy ===
672580
Strategy: {{ optimal_balloon_config.strategy }}

core/scripts/generate-vault-secrets.sh

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,6 @@ MINIO_SECRET=$(generate_password 20)
3333
POSTGRES_USER="postgres"
3434
POSTGRES_PASSWORD=$(generate_password 20)
3535
GRAFANA_ADMIN_PASSWORD=$(generate_password 20)
36-
<<<<<<< HEAD
37-
AGENTICAI_POSTGRES_PASSWORD=$(generate_password 20)
38-
AGENTICAI_REDIS_PASSWORD=$(generate_password 20)
39-
=======
40-
>>>>>>> dell-deploy-1.4-nv
4136

4237
# Generate connection strings
4338
DATABASE_URL="postgresql://admin:${POSTGRESQL_PASSWORD}@genai-gateway-postgresql:5432/litellm"
@@ -72,11 +67,6 @@ minio_user: "$MINIO_USER"
7267
postgres_user: "$POSTGRES_USER"
7368
postgres_password: "$POSTGRES_PASSWORD"
7469
grafana_admin_password: "$GRAFANA_ADMIN_PASSWORD"
75-
<<<<<<< HEAD
76-
agenticai_postgres_password: "$AGENTICAI_POSTGRES_PASSWORD"
77-
agenticai_redis_password: "$AGENTICAI_REDIS_PASSWORD"
78-
=======
79-
>>>>>>> dell-deploy-1.4-nv
8070
EOF
8171

8272
# Set appropriate permissions

docs/README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@ Complete all [prerequisites](./prerequisites.md).
1212

1313
| Deployment Type | Description |
1414
|-----------------------------------------|--------------------------------------------------------------|
15-
<<<<<<< HEAD
16-
| **Single Node (vLLM, non‑production)** | For Quick Testing on Intel® Xeon® processors using vLLM Docker ([Guide](../core/scripts/vllm-quickstart/README.md)) |
17-
=======
18-
>>>>>>> dell-deploy-1.4-nv
1915
| **Single Node** | Quick start for testing or lightweight workloads ([Guide](./single-node-deployment.md)) |
2016
| **Single Master, Multiple Workers** | For higher throughput workloads ([Guide](./inventory-design-guide.md#single-master-multiple-workload-node-deployment)) |
2117
| **Multi-Master, Multiple Workers** | Recommended for HA enterprise clusters ([Guide](./inventory-design-guide.md#multi-master-multi-workload-node-deployment)) |

docs/cpu-optimization-guide.md

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,6 @@ The system automatically optimizes CPU allocation for AI models using balloon po
77
## Automatic Features
88

99
### CPU Allocation
10-
<<<<<<< HEAD
11-
12-
**System CPU Reservation**: A total of **8 vCPUs** is reserved for infrastructure components (Keycloak, APISIX, observability, kube-system), distributed evenly across NUMA nodes.
13-
14-
**Intelligent CPU Selection**:
15-
- Automatically detects NUMA topology and hyperthreading configuration
16-
- For hyperthreaded systems: Balances reservations between physical cores and HT siblings
17-
- Example (48 cores with HT): Reserves from both physical cores (0-23) and HT cores (24-47)
18-
- For non-segmented CPUs (e.g., "0-47"): Creates virtual segments at the midpoint
19-
- For segmented CPUs (e.g., "0-23,48-71"): Uses existing segment boundaries
20-
21-
**Model CPU Allocation**:
22-
- Remaining CPUs (after reservation) are allocated to LLM models
23-
- Assigns dedicated CPU cores to each model for optimal performance
24-
=======
2510
- System automatically detects available CPU cores
2611
- Reserves 18% of CPUs for system processes
2712
- Allocates remaining CPUs to AI models
@@ -31,7 +16,6 @@ The system automatically optimizes CPU allocation for AI models using balloon po
3116
- System automatically detects available memory
3217
- Reserves 18% of memory for system processes
3318
- Allocates remaining memory to AI models
34-
>>>>>>> dell-deploy-1.4-nv
3519

3620
### Hardware Detection
3721
- Automatically detects NUMA topology
@@ -53,18 +37,6 @@ labels:
5337
resources:
5438
requests:
5539
cpu: 40 # Automatically calculated
56-
<<<<<<< HEAD
57-
# for tp1, tp2 system should have minimum 128Gi and for tp>=4 minimum 256Gi memory available for the model's pod
58-
memory: 128Gi
59-
```
60-
61-
## System Component Deployment Recommendations
62-
63-
For single-node Xeon clusters, **Keycloak** and **APISIX** are recommended.
64-
65-
For Gaudi or large multi-node Xeon clusters, the GenAI Gateway is well-suited.
66-
67-
=======
6840
memory: 4G
6941
```
7042
@@ -76,7 +48,6 @@ For single node clusters (e.g., systems with 48 CPU cores), only Keycloak and AP
7648
- For clusters with limited CPU resources, deploy only Keycloak and APISIX.
7749
- GenAI Gateway deployment requires at least 96 CPU cores.
7850
79-
>>>>>>> dell-deploy-1.4-nv
8051
## Status Verification
8152
8253
### Check System Status

docs/examples/multi-node/inference-config.cfg

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,4 @@ deploy_observability=off
1717
deploy_llm_models=on
1818
deploy_ceph=off
1919
deploy_istio=off
20-
<<<<<<< HEAD
2120
uninstall_ceph=off
22-
23-
# Agentic AI Plugin
24-
deploy_agenticai_plugin=off
25-
=======
26-
uninstall_ceph=off
27-
>>>>>>> dell-deploy-1.4-nv

docs/examples/single-node/inference-config.cfg

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,3 @@ deploy_llm_models=on
1515
deploy_ceph=off
1616
deploy_istio=off
1717
uninstall_ceph=off
18-
<<<<<<< HEAD
19-
20-
# Agentic AI Plugin
21-
deploy_agenticai_plugin=off
22-
=======
23-
>>>>>>> dell-deploy-1.4-nv

docs/prerequisites.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,7 @@ The first step is to get access to the hardware platforms. This guide assumes th
1515

1616
| Category | Details |
1717
|---------------------|-------------------------------------------------------------------------------------------------------------------|
18-
<<<<<<< HEAD
19-
| Operating System | Ubuntu 22.04, Ubuntu 24.04 |
20-
=======
2118
| Operating System | Ubuntu 22.04 |
22-
>>>>>>> dell-deploy-1.4-nv
2319
| Hardware Platforms | 4th Gen Intel® Xeon® Scalable processors<br>5th Gen Intel® Xeon® Scalable processors<br>6th Gen Intel® Xeon® Scalable processors<br>3rd Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 2 AI Accelerator<br>4th Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 2 AI Accelerator <br>6th Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 3 AI Accelerator|
2420
| Gaudi Firmware Version | 1.20.0 or newer
2521

0 commit comments

Comments
 (0)