|
15 | 15 | """ |
16 | 16 |
|
17 | 17 | import urllib |
| 18 | +import argparse |
| 19 | +from ..core.system_characteristics import SystemCharacteristics |
18 | 20 | from ..core.blueprint.blueprint_generator import ( |
19 | 21 | a3high_device_type, |
20 | 22 | a4x_device_types, |
|
41 | 43 | ) |
42 | 44 | from ..core.network import get_cluster_subnetworks |
43 | 45 | from ..core.pathways import ( |
44 | | - append_custom_colocated_python_sidecar, |
45 | | - append_custom_pathways_proxy_server, |
46 | | - append_custom_pathways_server, |
47 | | - append_custom_pathways_worker, |
48 | 46 | check_if_pathways_job_is_installed, |
49 | 47 | ensure_pathways_workload_prerequisites, |
50 | 48 | get_pathways_unified_query_link, |
51 | | - get_user_workload_for_pathways, |
52 | 49 | try_to_delete_pathwaysjob_first, |
53 | 50 | ) |
54 | 51 | from ..core.resources import get_cluster_capacity_type, get_cluster_system_characteristics_from_config_map |
|
58 | 55 | ONE_TO_ONE_REPLICA_NODE_POOL_ASSIGNMENT_ANNOTATION, |
59 | 56 | WorkloadScheduling, |
60 | 57 | check_if_workload_can_schedule, |
61 | | - create_tpu_machine_type, |
62 | 58 | create_tpu_slice_topology_annotation, |
63 | | - create_tpu_topology, |
64 | 59 | get_cpu_affinity, |
65 | 60 | get_gpu_scheduler, |
66 | 61 | create_sub_slicing_annotations, |
|
106 | 101 | from jinja2 import Environment, FileSystemLoader |
107 | 102 | from ..utils.templates import get_templates_absolute_path |
108 | 103 |
|
| 104 | +_PATHWAYS_WORKLOAD_TEMPLATE = 'pathways_workload_create.yaml.j2' |
| 105 | + |
109 | 106 | _SUPER_SLICING_WORKLOAD_NAME_LIMIT = 28 |
110 | 107 | """Maximum safe workload name length to avoid exceeding GCE's 63-character limit. |
111 | 108 |
|
|
263 | 260 | containers: |
264 | 261 | {container} |
265 | 262 | """ |
266 | | -# The indentation of PW_WORKLOAD_CREATE_YAML is intentional to allow reusing the user workload container YAML. |
267 | | -PW_WORKLOAD_CREATE_YAML = """apiVersion: jobset.x-k8s.io/v1alpha2 |
268 | | -kind: JobSet |
269 | | -metadata: |
270 | | - name: {args.workload} |
271 | | - labels: |
272 | | - kueue.x-k8s.io/queue-name: {local_queue_name} # Name of the LocalQueue |
273 | | - xpk.google.com/workload: {args.workload} |
274 | | -spec: |
275 | | - coordinator: |
276 | | - replicatedJob: pathways-head |
277 | | - network: |
278 | | - enableDNSHostnames: true |
279 | | - publishNotReadyAddresses: true |
280 | | - failurePolicy: |
281 | | - restartStrategy: Recreate |
282 | | - replicatedJobs: |
283 | | - - name: pathways-head |
284 | | - replicas: 1 |
285 | | - template: |
286 | | - spec: |
287 | | - backoffLimit: 0 |
288 | | - completionMode: Indexed |
289 | | - completions: 1 |
290 | | - parallelism: 1 |
291 | | - template: |
292 | | - metadata: |
293 | | - annotations: |
294 | | - alpha.jobset.sigs.k8s.io/exclusive-topology: kubernetes.io/hostname |
295 | | - spec: |
296 | | - hostNetwork: true |
297 | | - dnsPolicy: ClusterFirstWithHostNet |
298 | | - nodeSelector: |
299 | | - cloud.google.com/gke-nodepool: cpu-np |
300 | | - {autoprovisioning_args} |
301 | | -{pathways_head_containers} |
302 | | - restartPolicy: Never |
303 | | - volumes: |
304 | | - - hostPath: |
305 | | - path: /tmp |
306 | | - type: DirectoryOrCreate |
307 | | - name: shared-tmp |
308 | | - - name: worker |
309 | | - replicas: {args.num_slices} |
310 | | - template: |
311 | | - spec: |
312 | | - backoffLimit: {worker_backoff_limit} |
313 | | - completionMode: Indexed |
314 | | - completions: {vms_per_slice} |
315 | | - parallelism: {vms_per_slice} |
316 | | - template: |
317 | | - metadata: |
318 | | - labels: |
319 | | - xpk.google.com/workload: {args.workload} |
320 | | - annotations: |
321 | | - alpha.jobset.sigs.k8s.io/exclusive-topology: cloud.google.com/gke-nodepool |
322 | | - spec: |
323 | | - hostNetwork: true |
324 | | - dnsPolicy: ClusterFirstWithHostNet |
325 | | - terminationGracePeriodSeconds: {args.termination_grace_period_seconds} |
326 | | - priorityClassName: {args.priority} |
327 | | - nodeSelector: |
328 | | - {accelerator_label} |
329 | | - {node_selector_machine_label} |
330 | | - {placement_policy_label} |
331 | | - {autoprovisioning_args} |
332 | | - containers: |
333 | | - {custom_pathways_worker} |
334 | | - restartPolicy: OnFailure |
335 | | - volumes: |
336 | | - - hostPath: |
337 | | - path: /tmp |
338 | | - type: DirectoryOrCreate |
339 | | - name: shared-tmp |
340 | | - startupPolicy: |
341 | | - startupPolicyOrder: InOrder |
342 | | - {success_policy} |
343 | | - suspend: false |
344 | | -""" |
345 | 263 |
|
346 | 264 | ARM_GPU_WORKLOAD_CREATE_JINJA_FILE = 'arm_gpu_workload_crate.yaml.j2' |
347 | 265 |
|
348 | 266 |
|
| 267 | +def _generate_pathways_workload_yaml( |
| 268 | + args: argparse.Namespace, |
| 269 | + workload_system: SystemCharacteristics, |
| 270 | + parallel_containers: int, |
| 271 | + placement_policy_label: str, |
| 272 | + autoprovisioning_args: str | None, |
| 273 | +) -> str: |
| 274 | + worker_backoff_limit = ( |
| 275 | + (args.max_slice_restarts * workload_system.vms_per_slice) |
| 276 | + if getattr(args, 'elastic_slices', 0) > 0 |
| 277 | + else (workload_system.vms_per_slice * 4) |
| 278 | + ) |
| 279 | + |
| 280 | + proxy_server_image = ( |
| 281 | + getattr(args, 'proxy_server_image', None) |
| 282 | + or 'us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:latest' |
| 283 | + ) |
| 284 | + server_image = ( |
| 285 | + getattr(args, 'server_image', None) |
| 286 | + or 'us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest' |
| 287 | + ) |
| 288 | + worker_image = getattr(args, 'worker_image', None) or server_image |
| 289 | + instance_type = ( |
| 290 | + f'{workload_system.pathways_tpu_version}:{workload_system.topology}' |
| 291 | + if workload_system.pathways_tpu_version |
| 292 | + else workload_system.gce_machine_type |
| 293 | + ) |
| 294 | + if args.headless: |
| 295 | + user_workload_container = '' |
| 296 | + user_workload_env_vars = [] |
| 297 | + else: |
| 298 | + user_workload_container, _ = get_user_workload_container( |
| 299 | + args, workload_system, parallel_containers |
| 300 | + ) |
| 301 | + |
| 302 | + user_workload_env_vars = [ |
| 303 | + { |
| 304 | + 'name': 'PATHWAYS_HEAD', |
| 305 | + 'valueFrom': "metadata.labels['jobset.sigs.k8s.io/coordinator']", |
| 306 | + }, |
| 307 | + { |
| 308 | + 'name': 'JAX_PLATFORMS', |
| 309 | + 'value': 'proxy', |
| 310 | + }, |
| 311 | + { |
| 312 | + 'name': 'XCLOUD_ENVIRONMENT', |
| 313 | + 'value': 'GCP', |
| 314 | + }, |
| 315 | + { |
| 316 | + 'name': 'JAX_BACKEND_TARGET', |
| 317 | + 'value': 'grpc://$(PATHWAYS_HEAD):29000', |
| 318 | + }, |
| 319 | + ] |
| 320 | + |
| 321 | + template_env = Environment( |
| 322 | + loader=FileSystemLoader(searchpath=get_templates_absolute_path()), |
| 323 | + trim_blocks=True, |
| 324 | + lstrip_blocks=True, |
| 325 | + keep_trailing_newline=True, |
| 326 | + ) |
| 327 | + workload_create_yaml = template_env.get_template(_PATHWAYS_WORKLOAD_TEMPLATE) |
| 328 | + return workload_create_yaml.render( |
| 329 | + args=args, |
| 330 | + local_queue_name=LOCAL_QUEUE_NAME, |
| 331 | + proxy_server_image=proxy_server_image, |
| 332 | + server_image=server_image, |
| 333 | + instance_type=instance_type, |
| 334 | + user_workload_container=user_workload_container, |
| 335 | + user_workload_env_vars=user_workload_env_vars, |
| 336 | + worker_backoff_limit=worker_backoff_limit, |
| 337 | + vms_per_slice=workload_system.vms_per_slice, |
| 338 | + workload_system=workload_system, |
| 339 | + accelerator_label=create_accelerator_label(workload_system), |
| 340 | + node_selector_machine_label=create_machine_label(workload_system), |
| 341 | + placement_policy_label=placement_policy_label, |
| 342 | + autoprovisioning_args=autoprovisioning_args, |
| 343 | + worker_image=worker_image, |
| 344 | + ) |
| 345 | + |
| 346 | + |
349 | 347 | def workload_create_pathways(args) -> None: |
350 | 348 | """Run jobset apply command for a file, specifically for Pathways. |
351 | 349 |
|
@@ -695,46 +693,12 @@ def workload_create(args) -> None: |
695 | 693 | elif args.use_pathways and ensure_pathways_workload_prerequisites( |
696 | 694 | args, workload_system |
697 | 695 | ): |
698 | | - if args.headless: |
699 | | - pathways_head_containers = f""" containers: |
700 | | -{append_custom_pathways_proxy_server(args)} |
701 | | -{append_custom_pathways_server(args, workload_system)} |
702 | | -{append_custom_colocated_python_sidecar(args)}""" |
703 | | - success_policy = '' |
704 | | - else: |
705 | | - pathways_head_containers = f""" initContainers: |
706 | | -{append_custom_pathways_proxy_server(args)} |
707 | | -{append_custom_pathways_server(args, workload_system)} |
708 | | -{append_custom_colocated_python_sidecar(args)} |
709 | | - containers: |
710 | | -{get_user_workload_for_pathways(args, workload_system, parallel_containers)}""" |
711 | | - success_policy = """successPolicy: |
712 | | - operator: All |
713 | | - targetReplicatedJobs: |
714 | | - - pathways-head""" |
715 | | - |
716 | | - worker_backoff_limit = ( |
717 | | - (args.max_slice_restarts * workload_system.vms_per_slice) |
718 | | - if getattr(args, 'elastic_slices', 0) > 0 |
719 | | - else (workload_system.vms_per_slice * 4) |
720 | | - ) |
721 | | - |
722 | | - yml_string = PW_WORKLOAD_CREATE_YAML.format( |
| 696 | + yml_string = _generate_pathways_workload_yaml( |
723 | 697 | args=args, |
724 | | - topology=create_tpu_topology(workload_system), |
725 | | - machine_type=create_tpu_machine_type(workload_system), |
726 | | - pathways_head_containers=pathways_head_containers, |
727 | | - custom_pathways_worker=append_custom_pathways_worker( |
728 | | - args, workload_system |
729 | | - ), |
730 | | - worker_backoff_limit=worker_backoff_limit, |
731 | | - success_policy=success_policy, |
732 | | - local_queue_name=LOCAL_QUEUE_NAME, |
733 | | - autoprovisioning_args=autoprovisioning_args, |
| 698 | + workload_system=workload_system, |
| 699 | + parallel_containers=parallel_containers, |
734 | 700 | placement_policy_label=placement_policy_label, |
735 | | - vms_per_slice=workload_system.vms_per_slice, |
736 | | - accelerator_label=create_accelerator_label(workload_system), |
737 | | - node_selector_machine_label=create_machine_label(workload_system), |
| 701 | + autoprovisioning_args=autoprovisioning_args, |
738 | 702 | ) |
739 | 703 | else: |
740 | 704 | if use_sub_slicing: |
|
0 commit comments