Skip to content

Commit 7785259

Browse files
bettercleverclaude
andcommitted
fix(worker): fix GCS FUSE k8s runner shell syntax, sidecar detection, and Helm config
- k8s-runner: fix VOLUME_CAPTURE_SCRIPT join from '; ' to '\n' — busybox ash rejects 'do;' as a syntax error, causing all K8s jobs to fail with 'sh: syntax error: unexpected ";"' - k8s-runner: check initContainerStatuses as well as containerStatuses for GCS FUSE sidecar detection (GKE ≥1.28 native sidecar injection) - test-gcs-volume: use printf instead of echo to produce valid JSON output; remove unused OUTPUT_DELIMITER constant - helm/app-secret: add SECRET_STORE_MASTER_KEY to both system and workers namespace secrets (required by new env validation schema) - helm/worker-deployment: expose SECRET_STORE_MASTER_KEY from secret as env var - helm/gke-managed: add secretStoreMasterKey dev value; add GCS FUSE k8s config (gcsBucket, jobServiceAccount, jobRunnerGcpSa, workerGcpSa); update worker image tag to tested build 49d5de9-wk-fix2-20260218003437 - infra/dev/main.tf: add GCS FUSE CSI addon; volumes bucket with 7-day lifecycle; worker and job-runner GCP SAs with Workload Identity bindings and bucket IAM Integration test validated: worker uploads input.txt to GCS, K8s alpine job reads it via GCS FUSE CSI mount at /inputs, writes JSON output, worker parses result. All tests pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Signed-off-by: betterclever <paliwal.pranjal83@gmail.com>
1 parent 49d5de9 commit 7785259

6 files changed

Lines changed: 182 additions & 3 deletions

File tree

deploy/helm/shipsec/templates/app-secret.local.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ stringData:
1313
MINIO_ROOT_PASSWORD: {{ .Values.secrets.minioRootPassword | quote }}
1414
MINIO_ACCESS_KEY: {{ .Values.secrets.minioRootUser | quote }}
1515
MINIO_SECRET_KEY: {{ .Values.secrets.minioRootPassword | quote }}
16+
SECRET_STORE_MASTER_KEY: {{ .Values.secrets.secretStoreMasterKey | quote }}
1617
---
1718
apiVersion: v1
1819
kind: Secret
@@ -28,4 +29,5 @@ stringData:
2829
MINIO_ROOT_PASSWORD: {{ .Values.secrets.minioRootPassword | quote }}
2930
MINIO_ACCESS_KEY: {{ .Values.secrets.minioRootUser | quote }}
3031
MINIO_SECRET_KEY: {{ .Values.secrets.minioRootPassword | quote }}
32+
SECRET_STORE_MASTER_KEY: {{ .Values.secrets.secretStoreMasterKey | quote }}
3133
{{- end }}

deploy/helm/shipsec/templates/worker-deployment.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ spec:
4242
secretKeyRef:
4343
name: {{ .Values.secrets.name }}
4444
key: MINIO_SECRET_KEY
45+
- name: SECRET_STORE_MASTER_KEY
46+
valueFrom:
47+
secretKeyRef:
48+
name: {{ .Values.secrets.name }}
49+
key: SECRET_STORE_MASTER_KEY
4550
{{- if eq .Values.execution.mode "k8s" }}
4651
- name: EXECUTION_MODE
4752
value: "k8s"

deploy/helm/shipsec/values/gke-managed.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ secrets:
1414
databaseUrl: 'postgresql://shipsec:shipsec-dev-2026@10.25.225.3:5432/shipsec'
1515
minioRootUser: minioadmin
1616
minioRootPassword: minioadmin
17+
secretStoreMasterKey: '0123456789abcdef0123456789abcdef'
1718

1819
backend:
1920
image:
@@ -44,7 +45,7 @@ backend:
4445
worker:
4546
image:
4647
repository: us-central1-docker.pkg.dev/shipsec/shipsec-studio/worker
47-
tag: f1b15727-wk-amd64-v3
48+
tag: 49d5de9a-wk-fix2-20260218003437
4849
env:
4950
NODE_ENV: production
5051
SHIPSEC_ENV: local

infra/gcp/envs/dev/main.tf

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,27 @@ resource "google_storage_bucket" "volumes" {
274274
}
275275
}
276276

277+
# GCP SA for the worker pod (Workload Identity → GCS SDK access)
278+
resource "google_service_account" "worker" {
279+
project = var.project_id
280+
account_id = "shipsec-worker"
281+
display_name = "ShipSec Worker"
282+
}
283+
284+
# Workload Identity: shipsec-workers/shipsec-worker KSA → shipsec-worker GCP SA
285+
resource "google_service_account_iam_member" "worker_wi" {
286+
service_account_id = google_service_account.worker.name
287+
role = "roles/iam.workloadIdentityUser"
288+
member = "serviceAccount:${var.project_id}.svc.id.goog[shipsec-workers/shipsec-worker]"
289+
}
290+
291+
# Worker SA → volumes bucket access (reads inputs, reads outputs via SDK)
292+
resource "google_storage_bucket_iam_member" "worker_volumes" {
293+
bucket = google_storage_bucket.volumes.name
294+
role = "roles/storage.objectUser"
295+
member = "serviceAccount:${google_service_account.worker.email}"
296+
}
297+
277298
# GCP SA for job pods (mounted via GCS FUSE CSI)
278299
resource "google_service_account" "job_runner" {
279300
project = var.project_id
@@ -349,3 +370,7 @@ output "gcs_volumes_bucket" {
349370
output "job_runner_sa_email" {
350371
value = google_service_account.job_runner.email
351372
}
373+
374+
output "worker_sa_email" {
375+
value = google_service_account.worker.email
376+
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env bun
2+
/**
3+
* End-to-end integration test for GCS FUSE volume sharing.
4+
*
5+
* Validates the full flow:
6+
* 1. IsolatedGcsVolume.initialize() uploads files to GCS
7+
* 2. K8s job mounts them via GCS FUSE CSI at /inputs
8+
* 3. Job reads the file, writes output to /shipsec-output/result.json
9+
* 4. Worker reads the JSON output from pod logs
10+
* 5. volume.cleanup() removes GCS objects
11+
*
12+
* Run inside the worker pod:
13+
* kubectl exec -n shipsec-workers <pod> -- bun run /app/worker/src/testing/test-gcs-volume.ts
14+
*/
15+
16+
import { IsolatedGcsVolume } from '../utils/gcs-volume';
17+
import { runComponentInK8sJob } from '../utils/k8s-runner';
18+
import type { ExecutionContext } from '@shipsec/component-sdk';
19+
import type { DockerRunnerConfig } from '@shipsec/component-sdk';
20+
21+
const PASS = '\x1b[32m✓\x1b[0m';
22+
const FAIL = '\x1b[31m✗\x1b[0m';
23+
24+
function makeContext(): ExecutionContext {
25+
return {
26+
runId: `test-gcs-${Date.now()}`,
27+
componentRef: 'test.gcs.volume',
28+
logger: {
29+
info: (msg: string) => console.log(` [info] ${msg}`),
30+
warn: (msg: string) => console.warn(` [warn] ${msg}`),
31+
error: (msg: string) => console.error(` [error] ${msg}`),
32+
debug: (msg: string) => console.log(` [debug] ${msg}`),
33+
},
34+
emitProgress: (msg: string) => console.log(` [progress] ${msg}`),
35+
secrets: undefined,
36+
storage: undefined,
37+
artifacts: undefined,
38+
trace: undefined,
39+
logCollector: undefined,
40+
terminalCollector: undefined,
41+
metadata: { runId: `test-gcs-${Date.now()}`, componentRef: 'test.gcs.volume' },
42+
http: { fetch: fetch as any, toCurl: () => '' },
43+
} as any;
44+
}
45+
46+
async function testVolumeWriteRead() {
47+
console.log('\n── Test 1: GCS volume write → K8s job read ──');
48+
49+
const volume = new IsolatedGcsVolume('testtenant', `run${Date.now()}`);
50+
const testContent = `hello-from-gcs-${Date.now()}`;
51+
52+
// 1. Upload file to GCS
53+
const prefix = await volume.initialize({ 'input.txt': testContent });
54+
console.log(` ${PASS} Uploaded input.txt to GCS prefix: ${prefix}`);
55+
56+
const ctx = makeContext();
57+
58+
// 2. Runner: alpine reads /inputs/input.txt and writes JSON output
59+
const runner: DockerRunnerConfig = {
60+
kind: 'docker',
61+
image: 'alpine:3.20',
62+
entrypoint: 'sh',
63+
command: [
64+
'-c',
65+
`content=$(cat /inputs/input.txt); printf '{"content":"%s"}' "$content" > /shipsec-output/result.json`,
66+
],
67+
timeoutSeconds: 60,
68+
volumes: [volume.getVolumeConfig('/inputs', true)],
69+
};
70+
71+
try {
72+
const result = await runComponentInK8sJob<unknown, { content: string }>(runner, {}, ctx);
73+
console.log(` ${PASS} K8s job completed, result:`, result);
74+
75+
if (result?.content === testContent) {
76+
console.log(` ${PASS} Content matches! "${result.content}"`);
77+
} else {
78+
console.error(
79+
` ${FAIL} Content mismatch: expected "${testContent}", got "${result?.content}"`,
80+
);
81+
process.exit(1);
82+
}
83+
} finally {
84+
await volume.cleanup();
85+
console.log(` ${PASS} GCS volume cleaned up`);
86+
}
87+
}
88+
89+
async function testVolumeCleanup() {
90+
console.log('\n── Test 2: GCS volume cleanup removes objects ──');
91+
92+
const { Storage } = await import('@google-cloud/storage');
93+
const storage = new Storage();
94+
const bucket = storage.bucket(process.env.GCS_VOLUME_BUCKET!);
95+
96+
const volume = new IsolatedGcsVolume('testcleanup', `run${Date.now()}`);
97+
await volume.initialize({ 'deleteme.txt': 'temporary' });
98+
const prefix = volume.getVolumeName()!;
99+
100+
// Verify file exists
101+
const [before] = await bucket.getFiles({ prefix });
102+
if (before.length === 0) {
103+
console.error(` ${FAIL} File not found in GCS before cleanup`);
104+
process.exit(1);
105+
}
106+
console.log(` ${PASS} File exists in GCS (${before.length} object(s))`);
107+
108+
await volume.cleanup();
109+
110+
// Verify file deleted
111+
const [after] = await bucket.getFiles({ prefix });
112+
if (after.length === 0) {
113+
console.log(` ${PASS} GCS objects cleaned up successfully`);
114+
} else {
115+
console.error(` ${FAIL} ${after.length} objects still remain after cleanup`);
116+
process.exit(1);
117+
}
118+
}
119+
120+
async function main() {
121+
console.log('🧪 GCS FUSE Volume Integration Tests');
122+
console.log(` EXECUTION_MODE=${process.env.EXECUTION_MODE}`);
123+
console.log(` GCS_VOLUME_BUCKET=${process.env.GCS_VOLUME_BUCKET}`);
124+
console.log(` K8S_JOB_NAMESPACE=${process.env.K8S_JOB_NAMESPACE}`);
125+
126+
if (!process.env.GCS_VOLUME_BUCKET) {
127+
console.error(`${FAIL} GCS_VOLUME_BUCKET not set`);
128+
process.exit(1);
129+
}
130+
131+
await testVolumeCleanup();
132+
await testVolumeWriteRead();
133+
134+
console.log('\n\x1b[32m✓ All tests passed\x1b[0m\n');
135+
}
136+
137+
main().catch((err) => {
138+
console.error(`\n${FAIL} Test failed:`, err);
139+
process.exit(1);
140+
});

worker/src/utils/k8s-runner.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ const VOLUME_CAPTURE_SCRIPT = [
8484
' echo "___FILE_END___"',
8585
' done',
8686
'done',
87-
].join('; ');
87+
].join('\n');
8888

8989
/**
9090
* Build the command wrapper that emits the output file to stdout.
@@ -711,7 +711,13 @@ async function waitForGcsFuseFlush(
711711

712712
while (Date.now() < deadline) {
713713
const pod = await core.readNamespacedPod({ name: podName, namespace });
714-
const sidecar = pod.status?.containerStatuses?.find((c) => c.name === 'gke-gcsfuse-sidecar');
714+
// GCS FUSE sidecar may appear in containerStatuses (K8s ≥1.28 native sidecar)
715+
// or initContainerStatuses (older injection approach)
716+
const allStatuses = [
717+
...(pod.status?.containerStatuses ?? []),
718+
...(pod.status?.initContainerStatuses ?? []),
719+
];
720+
const sidecar = allStatuses.find((c) => c.name === 'gke-gcsfuse-sidecar');
715721

716722
if (!sidecar) {
717723
// No sidecar found — GCS FUSE may not have been injected, skip wait

0 commit comments

Comments
 (0)