-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathclusters.py
More file actions
executable file
·109 lines (88 loc) · 3.2 KB
/
clusters.py
File metadata and controls
executable file
·109 lines (88 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
"""
Clusters used in test
Adapted from https://github.com/stackrox/stackrox/blob/master/.openshift-ci/clusters.py
"""
import os
import signal
import subprocess
import time
from common import popen_graceful_kill
class NullCluster:
def provision(self):
pass
def teardown(self):
pass
class GKECluster:
# Provisioning timeout is tightly coupled to the time it may take gke.sh to
# create a cluster.
PROVISION_TIMEOUT = 140 * 60
WAIT_TIMEOUT = 20 * 60
TEARDOWN_TIMEOUT = 5 * 60
# separate script names used for testability - test_clusters.py
PROVISION_PATH = "scripts/ci/gke.sh"
WAIT_PATH = "scripts/ci/gke.sh"
REFRESH_PATH = "scripts/ci/gke.sh"
TEARDOWN_PATH = "scripts/ci/gke.sh"
def __init__(self, cluster_id, num_nodes=None, machine_type=None, disk_gb=None):
self.cluster_id = cluster_id
self.num_nodes = num_nodes
self.machine_type = machine_type
self.disk_gb = disk_gb
self.refresh_token_cmd = None
def provision(self):
if self.num_nodes is not None:
os.environ["NUM_NODES"] = str(self.num_nodes)
if self.machine_type is not None:
os.environ["MACHINE_TYPE"] = str(self.machine_type)
if self.disk_gb is not None:
os.environ["DISK_SIZE_GB"] = str(self.disk_gb)
with subprocess.Popen(
[
self.PROVISION_PATH,
"provision_gke_cluster",
self.cluster_id,
]
) as cmd:
try:
exitstatus = cmd.wait(self.PROVISION_TIMEOUT)
if exitstatus != 0:
raise RuntimeError(
f"Cluster provision failed: exit {exitstatus}")
except subprocess.TimeoutExpired as err:
popen_graceful_kill(cmd)
raise err
# OpenShift CI sends a SIGINT when tests are canceled
signal.signal(signal.SIGINT, self.sigint_handler)
subprocess.run(
[self.WAIT_PATH, "wait_for_cluster"],
check=True,
timeout=self.WAIT_TIMEOUT,
)
# pylint: disable=consider-using-with
self.refresh_token_cmd = subprocess.Popen(
[self.REFRESH_PATH, "refresh_gke_token"]
)
return self
def teardown(self, canceled=False):
while os.path.exists("/tmp/hold-cluster"):
print("Pausing teardown because /tmp/hold-cluster exists")
time.sleep(60)
if self.refresh_token_cmd is not None and not canceled:
print("Terminating GKE token refresh")
try:
popen_graceful_kill(self.refresh_token_cmd)
except Exception as err:
print(f"Could not terminate the token refresh: {err}")
args = [self.TEARDOWN_PATH, "teardown_gke_cluster"]
if canceled:
args.append("true")
subprocess.run(
args,
check=True,
timeout=self.TEARDOWN_TIMEOUT,
)
return self
def sigint_handler(self, signum, frame):
print("Tearing down the cluster due to SIGINT", signum, frame)
self.teardown(canceled=True)