File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 77 - name : gpu-container
88 # torch113_cuda117_ds076
99 # image: docker.io/deepspeed/deepspeed:v072_torch112_cu117
10- image : docker. io/zihaokevinzhou/deepspeed :torch113_cuda117_ds076
10+ image : gitlab-registry.nrp-nautilus. io/zihaozhou/nautilus_tutorial :torch113_cuda117_ds076
1111 imagePullPolicy : Always
1212 command : ["sleep", "infinity"]
1313 volumeMounts :
3434 operator : In
3535 values :
3636 - NVIDIA-GeForce-RTX-3090
37- - key : kubernetes.io/hostname
38- operator : In
39- values :
40- - k8s-3090-02.clemson.edu
4137 volumes :
4238 - name : stpp-vol
4339 persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
55kind : Job
66metadata :
77 name : mnist
8- namespace : deep-forecast
98spec :
109 template :
1110 spec :
1211 containers :
1312 - name : gpu-container
14- image : docker. io/horovod/horovod:sha-811cf67
13+ image : gitlab-registry.nrp-nautilus. io/zihaozhou/nautilus_tutorial:hovorod
1514 command : ["/bin/bash","-c"]
1615 # NCCL_DEBUG=INFO
1716 args : ["git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
4140 operator : In
4241 values :
4342 - NVIDIA-GeForce-RTX-3090
44- - key : kubernetes.io/hostname
45- operator : In
46- values :
47- - k8s-3090-02.clemson.edu
4843 volumes :
4944 - name : stpp-vol
5045 persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
55kind : Job
66metadata :
77 name : mnist
8- namespace : deep-forecast
98spec :
109 template :
1110 spec :
1211 containers :
1312 - name : gpu-container
14- image : horovod/horovod:sha-811cf67
13+ image : gitlab-registry.nrp-nautilus.io/zihaozhou/nautilus_tutorial:hovorod
1514 command : ["/bin/bash","-c"]
1615 # NCCL_DEBUG=INFO
1716 args : ["git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
4140 operator : In
4241 values :
4342 - NVIDIA-GeForce-RTX-3090
44- - key : kubernetes.io/hostname
45- operator : In
46- values :
47- - k8s-3090-02.clemson.edu
4843 volumes :
4944 - name : stpp-vol
5045 persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
55kind : Job
66metadata :
77 name : mnist
8- namespace : deep-forecast
98spec :
109 template :
1110 spec :
1211 containers :
1312 - name : gpu-container
14- image : docker. io/zihaokevinzhou/deepspeed :torch113_cuda117_ds076
13+ image : gitlab-registry.nrp-nautilus. io/zihaozhou/nautilus_tutorial :torch113_cuda117_ds076
1514 command : ["/bin/bash","-c"]
1615 # NCCL_DEBUG=INFO
1716 args : ['git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
4140 operator : In
4241 values :
4342 - NVIDIA-GeForce-RTX-3090
44- - key : kubernetes.io/hostname
45- operator : In
46- values :
47- - k8s-3090-02.clemson.edu
4843 volumes :
4944 - name : stpp-vol
5045 persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
55kind : Job
66metadata :
77 name : mnist
8- namespace : deep-forecast
98spec :
109 template :
1110 spec :
1211 containers :
1312 - name : gpu-container
14- image : horovod/horovod:sha-811cf67
13+ image : gitlab-registry.nrp-nautilus.io/zihaozhou/nautilus_tutorial:hovorod
1514 command : ["/bin/bash","-c"]
1615 # NCCL_DEBUG=INFO
1716 args : ['git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
4342 operator : In
4443 values :
4544 - NVIDIA-GeForce-RTX-3090
46- - key : kubernetes.io/hostname
47- operator : In
48- values :
49- - k8s-3090-02.clemson.edu
5045 volumes :
5146 - name : stpp-vol
5247 persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
55kind : Job
66metadata :
77 name : mnist
8- namespace : deep-forecast
98spec :
109 template :
1110 spec :
1211 containers :
1312 - name : gpu-container
14- image : docker. io/horovod/horovod:sha-811cf67
13+ image : gitlab-registry.nrp-nautilus. io/zihaozhou/nautilus_tutorial:hovorod
1514 command : ["/bin/bash","-c"]
1615 # NCCL_DEBUG=INFO
1716 args : ["git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
4140 operator : In
4241 values :
4342 - NVIDIA-GeForce-RTX-3090
44- - key : kubernetes.io/hostname
45- operator : In
46- values :
47- - k8s-3090-02.clemson.edu
4843 volumes :
4944 - name : stpp-vol
5045 persistentVolumeClaim :
You can’t perform that action at this time.
0 commit comments