Skip to content

Commit abbafff

Browse files
committed
MON-4036: Add NodeExporterConfig to ClusterMonitoring API
Add configuration for the node-exporter agent that runs as a DaemonSet in openshift-monitoring, collecting hardware and OS-level metrics from every node in the cluster. Signed-off-by: Daniel Mellado <dmellado@fedoraproject.org>
1 parent f50e695 commit abbafff

8 files changed

Lines changed: 3281 additions & 1 deletion

File tree

config/v1alpha1/tests/clustermonitorings.config.openshift.io/ClusterMonitoringConfig.yaml

Lines changed: 371 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,377 @@ tests:
10941094
operator: "Exists"
10951095
effect: "NoSchedule"
10961096
expectedError: 'spec.telemeterClientConfig.tolerations: Too many: 11: must have at most 10 items'
1097+
- name: Should be able to create NodeExporterConfig with valid resources
1098+
initial: |
1099+
apiVersion: config.openshift.io/v1alpha1
1100+
kind: ClusterMonitoring
1101+
spec:
1102+
nodeExporterConfig:
1103+
resources:
1104+
- name: "cpu"
1105+
request: "50m"
1106+
limit: "200m"
1107+
- name: "memory"
1108+
request: "50Mi"
1109+
limit: "200Mi"
1110+
expected: |
1111+
apiVersion: config.openshift.io/v1alpha1
1112+
kind: ClusterMonitoring
1113+
spec:
1114+
nodeExporterConfig:
1115+
resources:
1116+
- name: "cpu"
1117+
request: "50m"
1118+
limit: "200m"
1119+
- name: "memory"
1120+
request: "50Mi"
1121+
limit: "200Mi"
1122+
- name: Should be able to create NodeExporterConfig with valid tolerations
1123+
initial: |
1124+
apiVersion: config.openshift.io/v1alpha1
1125+
kind: ClusterMonitoring
1126+
spec:
1127+
nodeExporterConfig:
1128+
tolerations:
1129+
- operator: "Exists"
1130+
expected: |
1131+
apiVersion: config.openshift.io/v1alpha1
1132+
kind: ClusterMonitoring
1133+
spec:
1134+
nodeExporterConfig:
1135+
tolerations:
1136+
- operator: "Exists"
1137+
- name: Should be able to create NodeExporterConfig with collectors
1138+
initial: |
1139+
apiVersion: config.openshift.io/v1alpha1
1140+
kind: ClusterMonitoring
1141+
spec:
1142+
nodeExporterConfig:
1143+
collectors:
1144+
cpuFreq:
1145+
collectionPolicy: Collect
1146+
tcpStat:
1147+
collectionPolicy: DoNotCollect
1148+
netDev:
1149+
collectionPolicy: Collect
1150+
netClass:
1151+
collectionPolicy: Collect
1152+
collect:
1153+
statsGatherer: Netlink
1154+
systemd:
1155+
collectionPolicy: Collect
1156+
collect:
1157+
units:
1158+
- "kubelet.service"
1159+
- "crio.service"
1160+
expected: |
1161+
apiVersion: config.openshift.io/v1alpha1
1162+
kind: ClusterMonitoring
1163+
spec:
1164+
nodeExporterConfig:
1165+
collectors:
1166+
cpuFreq:
1167+
collectionPolicy: Collect
1168+
tcpStat:
1169+
collectionPolicy: DoNotCollect
1170+
netDev:
1171+
collectionPolicy: Collect
1172+
netClass:
1173+
collectionPolicy: Collect
1174+
collect:
1175+
statsGatherer: Netlink
1176+
systemd:
1177+
collectionPolicy: Collect
1178+
collect:
1179+
units:
1180+
- "kubelet.service"
1181+
- "crio.service"
1182+
- name: Should be able to create NodeExporterConfig with all fields
1183+
initial: |
1184+
apiVersion: config.openshift.io/v1alpha1
1185+
kind: ClusterMonitoring
1186+
spec:
1187+
nodeExporterConfig:
1188+
nodeSelector:
1189+
kubernetes.io/os: linux
1190+
resources:
1191+
- name: "cpu"
1192+
request: "50m"
1193+
limit: "200m"
1194+
tolerations:
1195+
- operator: "Exists"
1196+
collectors:
1197+
cpuFreq:
1198+
collectionPolicy: Collect
1199+
buddyInfo:
1200+
collectionPolicy: DoNotCollect
1201+
maxProcs: 4
1202+
ignoredNetworkDevices:
1203+
- "^veth.*$"
1204+
- "^docker.*$"
1205+
expected: |
1206+
apiVersion: config.openshift.io/v1alpha1
1207+
kind: ClusterMonitoring
1208+
spec:
1209+
nodeExporterConfig:
1210+
nodeSelector:
1211+
kubernetes.io/os: linux
1212+
resources:
1213+
- name: "cpu"
1214+
request: "50m"
1215+
limit: "200m"
1216+
tolerations:
1217+
- operator: "Exists"
1218+
collectors:
1219+
cpuFreq:
1220+
collectionPolicy: Collect
1221+
buddyInfo:
1222+
collectionPolicy: DoNotCollect
1223+
maxProcs: 4
1224+
ignoredNetworkDevices:
1225+
- "^veth.*$"
1226+
- "^docker.*$"
1227+
- name: Should reject NodeExporterConfig with empty object
1228+
initial: |
1229+
apiVersion: config.openshift.io/v1alpha1
1230+
kind: ClusterMonitoring
1231+
spec:
1232+
nodeExporterConfig: {}
1233+
expectedError: 'spec.nodeExporterConfig: Invalid value: 0: spec.nodeExporterConfig in body should have at least 1 properties'
1234+
- name: Should reject NodeExporterConfig with too many resources
1235+
initial: |
1236+
apiVersion: config.openshift.io/v1alpha1
1237+
kind: ClusterMonitoring
1238+
spec:
1239+
nodeExporterConfig:
1240+
resources:
1241+
- name: "cpu"
1242+
request: "100m"
1243+
- name: "memory"
1244+
request: "64Mi"
1245+
- name: "hugepages-2Mi"
1246+
request: "32Mi"
1247+
- name: "hugepages-1Gi"
1248+
request: "1Gi"
1249+
- name: "ephemeral-storage"
1250+
request: "1Gi"
1251+
- name: "nvidia.com/gpu"
1252+
request: "1"
1253+
expectedError: 'spec.nodeExporterConfig.resources: Too many'
1254+
- name: Should reject NodeExporterConfig with duplicate resource names
1255+
initial: |
1256+
apiVersion: config.openshift.io/v1alpha1
1257+
kind: ClusterMonitoring
1258+
spec:
1259+
nodeExporterConfig:
1260+
resources:
1261+
- name: "cpu"
1262+
request: "100m"
1263+
- name: "cpu"
1264+
request: "200m"
1265+
expectedError: 'spec.nodeExporterConfig.resources[1]: Duplicate value: map[string]interface {}{"name":"cpu"}'
1266+
- name: Should reject NodeExporterConfig with limit less than request
1267+
initial: |
1268+
apiVersion: config.openshift.io/v1alpha1
1269+
kind: ClusterMonitoring
1270+
spec:
1271+
nodeExporterConfig:
1272+
resources:
1273+
- name: "cpu"
1274+
request: "500m"
1275+
limit: "200m"
1276+
expectedError: 'spec.nodeExporterConfig.resources[0]: Invalid value: "object": limit must be greater than or equal to request'
1277+
- name: Should reject NodeExporterConfig with empty resources array
1278+
initial: |
1279+
apiVersion: config.openshift.io/v1alpha1
1280+
kind: ClusterMonitoring
1281+
spec:
1282+
nodeExporterConfig:
1283+
resources: []
1284+
expectedError: 'spec.nodeExporterConfig.resources: Invalid value: 0: spec.nodeExporterConfig.resources in body should have at least 1 items'
1285+
- name: Should reject NodeExporterConfig with empty collectors object
1286+
initial: |
1287+
apiVersion: config.openshift.io/v1alpha1
1288+
kind: ClusterMonitoring
1289+
spec:
1290+
nodeExporterConfig:
1291+
collectors: {}
1292+
expectedError: 'spec.nodeExporterConfig.collectors: Invalid value: 0: spec.nodeExporterConfig.collectors in body should have at least 1 properties'
1293+
- name: Should accept NodeExporterConfig with empty ignoredNetworkDevices list
1294+
initial: |
1295+
apiVersion: config.openshift.io/v1alpha1
1296+
kind: ClusterMonitoring
1297+
spec:
1298+
nodeExporterConfig:
1299+
ignoredNetworkDevices: []
1300+
expected: |
1301+
apiVersion: config.openshift.io/v1alpha1
1302+
kind: ClusterMonitoring
1303+
spec:
1304+
nodeExporterConfig:
1305+
ignoredNetworkDevices: []
1306+
- name: Should reject NodeExporterConfig with maxProcs below minimum
1307+
initial: |
1308+
apiVersion: config.openshift.io/v1alpha1
1309+
kind: ClusterMonitoring
1310+
spec:
1311+
nodeExporterConfig:
1312+
maxProcs: -1
1313+
expectedError: 'spec.nodeExporterConfig.maxProcs'
1314+
- name: Should reject NodeExporterConfig with maxProcs exceeding maximum
1315+
initial: |
1316+
apiVersion: config.openshift.io/v1alpha1
1317+
kind: ClusterMonitoring
1318+
spec:
1319+
nodeExporterConfig:
1320+
maxProcs: 1025
1321+
expectedError: 'spec.nodeExporterConfig.maxProcs'
1322+
- name: Should reject netClass with collect set when collector is DoNotCollect
1323+
initial: |
1324+
apiVersion: config.openshift.io/v1alpha1
1325+
kind: ClusterMonitoring
1326+
spec:
1327+
nodeExporterConfig:
1328+
collectors:
1329+
netClass:
1330+
collectionPolicy: DoNotCollect
1331+
collect:
1332+
statsGatherer: Netlink
1333+
expectedError: 'collect configuration is forbidden when collectionPolicy is DoNotCollect'
1334+
- name: Should accept netClass DoNotCollect without collect
1335+
initial: |
1336+
apiVersion: config.openshift.io/v1alpha1
1337+
kind: ClusterMonitoring
1338+
spec:
1339+
nodeExporterConfig:
1340+
collectors:
1341+
netClass:
1342+
collectionPolicy: DoNotCollect
1343+
expected: |
1344+
apiVersion: config.openshift.io/v1alpha1
1345+
kind: ClusterMonitoring
1346+
spec:
1347+
nodeExporterConfig:
1348+
collectors:
1349+
netClass:
1350+
collectionPolicy: DoNotCollect
1351+
- name: Should reject systemd with collect set when collector is DoNotCollect
1352+
initial: |
1353+
apiVersion: config.openshift.io/v1alpha1
1354+
kind: ClusterMonitoring
1355+
spec:
1356+
nodeExporterConfig:
1357+
collectors:
1358+
systemd:
1359+
collectionPolicy: DoNotCollect
1360+
collect:
1361+
units:
1362+
- "kubelet.service"
1363+
expectedError: 'collect configuration is forbidden when collectionPolicy is DoNotCollect'
1364+
- name: Should accept systemd DoNotCollect without collect
1365+
initial: |
1366+
apiVersion: config.openshift.io/v1alpha1
1367+
kind: ClusterMonitoring
1368+
spec:
1369+
nodeExporterConfig:
1370+
collectors:
1371+
systemd:
1372+
collectionPolicy: DoNotCollect
1373+
expected: |
1374+
apiVersion: config.openshift.io/v1alpha1
1375+
kind: ClusterMonitoring
1376+
spec:
1377+
nodeExporterConfig:
1378+
collectors:
1379+
systemd:
1380+
collectionPolicy: DoNotCollect
1381+
- name: Should reject NodeExporterConfig with empty nodeSelector
1382+
initial: |
1383+
apiVersion: config.openshift.io/v1alpha1
1384+
kind: ClusterMonitoring
1385+
spec:
1386+
nodeExporterConfig:
1387+
nodeSelector: {}
1388+
expectedError: 'spec.nodeExporterConfig.nodeSelector: Invalid value: 0: spec.nodeExporterConfig.nodeSelector in body should have at least 1 properties'
1389+
- name: Should reject NodeExporterConfig with too many nodeSelector entries
1390+
initial: |
1391+
apiVersion: config.openshift.io/v1alpha1
1392+
kind: ClusterMonitoring
1393+
spec:
1394+
nodeExporterConfig:
1395+
nodeSelector:
1396+
key1: val1
1397+
key2: val2
1398+
key3: val3
1399+
key4: val4
1400+
key5: val5
1401+
key6: val6
1402+
key7: val7
1403+
key8: val8
1404+
key9: val9
1405+
key10: val10
1406+
key11: val11
1407+
expectedError: 'spec.nodeExporterConfig.nodeSelector: Too many: 11: must have at most 10 items'
1408+
- name: Should reject NodeExporterConfig with empty tolerations array
1409+
initial: |
1410+
apiVersion: config.openshift.io/v1alpha1
1411+
kind: ClusterMonitoring
1412+
spec:
1413+
nodeExporterConfig:
1414+
tolerations: []
1415+
expectedError: 'spec.nodeExporterConfig.tolerations: Invalid value: 0: spec.nodeExporterConfig.tolerations in body should have at least 1 items'
1416+
- name: Should reject NodeExporterConfig with too many tolerations
1417+
initial: |
1418+
apiVersion: config.openshift.io/v1alpha1
1419+
kind: ClusterMonitoring
1420+
spec:
1421+
nodeExporterConfig:
1422+
tolerations:
1423+
- key: "key1"
1424+
operator: "Exists"
1425+
- key: "key2"
1426+
operator: "Exists"
1427+
- key: "key3"
1428+
operator: "Exists"
1429+
- key: "key4"
1430+
operator: "Exists"
1431+
- key: "key5"
1432+
operator: "Exists"
1433+
- key: "key6"
1434+
operator: "Exists"
1435+
- key: "key7"
1436+
operator: "Exists"
1437+
- key: "key8"
1438+
operator: "Exists"
1439+
- key: "key9"
1440+
operator: "Exists"
1441+
- key: "key10"
1442+
operator: "Exists"
1443+
- key: "key11"
1444+
operator: "Exists"
1445+
expectedError: 'spec.nodeExporterConfig.tolerations: Too many: 11: must have at most 10 items'
1446+
- name: Should reject systemd collect with empty object
1447+
initial: |
1448+
apiVersion: config.openshift.io/v1alpha1
1449+
kind: ClusterMonitoring
1450+
spec:
1451+
nodeExporterConfig:
1452+
collectors:
1453+
systemd:
1454+
collectionPolicy: Collect
1455+
collect: {}
1456+
expectedError: "spec.nodeExporterConfig.collectors.systemd.collect: Invalid value"
1457+
- name: Should reject netClass collect with empty object
1458+
initial: |
1459+
apiVersion: config.openshift.io/v1alpha1
1460+
kind: ClusterMonitoring
1461+
spec:
1462+
nodeExporterConfig:
1463+
collectors:
1464+
netClass:
1465+
collectionPolicy: Collect
1466+
collect: {}
1467+
expectedError: "spec.nodeExporterConfig.collectors.netClass.collect: Invalid value"
10971468
onUpdate:
10981469
- name: Should reject updating TelemeterClientConfig to empty object
10991470
initial: |

0 commit comments

Comments
 (0)