Skip to content

Commit 959c3b1

Browse files
bettercleverclaude
andcommitted
feat(worker): add GCS FUSE volume support for K8s runner
Replace emptyDir volumes with GCS FUSE-backed volumes for K8s job pods, enabling persistent shared storage between the worker and job containers. - Add IsolatedGcsVolume class for GCS-backed volume management - Update K8s runner with gcsfuse sidecar volumes, pod annotations, and flush logic - Update isolated-volume factory to route to GCS when configured - Add GCS file listing in prowler-scan component - Add @google-cloud/storage dependency - Terraform: enable GCS FUSE addon, create bucket, configure IAM - Helm: add job runner KSA, env vars, and GCS config values Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: betterclever <paliwal.pranjal83@gmail.com>
1 parent b7c19eb commit 959c3b1

11 files changed

Lines changed: 513 additions & 17 deletions

File tree

bun.lock

Lines changed: 95 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deploy/helm/shipsec/templates/worker-deployment.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ spec:
5353
- name: K8S_IMAGE_PULL_SECRET
5454
value: {{ .Values.execution.k8s.imagePullSecret | quote }}
5555
{{- end }}
56+
{{- if .Values.execution.k8s.gcsBucket }}
57+
- name: GCS_VOLUME_BUCKET
58+
value: {{ .Values.execution.k8s.gcsBucket | quote }}
59+
{{- end }}
60+
{{- if .Values.execution.k8s.jobServiceAccount }}
61+
- name: K8S_JOB_SERVICE_ACCOUNT
62+
value: {{ .Values.execution.k8s.jobServiceAccount | quote }}
63+
{{- end }}
5664
{{- else if .Values.execution.workerDockerHost }}
5765
- name: DOCKER_HOST
5866
value: {{ .Values.execution.workerDockerHost | quote }}

deploy/helm/shipsec/templates/worker-rbac.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ metadata:
88
labels:
99
{{- include "shipsec.labels" . | nindent 4 }}
1010
app.kubernetes.io/component: worker
11+
{{- if .Values.execution.k8s.workerGcpSa }}
12+
annotations:
13+
iam.gke.io/gcp-service-account: {{ .Values.execution.k8s.workerGcpSa | quote }}
14+
{{- end }}
1115
---
1216
# Role in the workloads namespace — worker creates Jobs and ConfigMaps here
1317
apiVersion: rbac.authorization.k8s.io/v1
@@ -44,4 +48,17 @@ subjects:
4448
- kind: ServiceAccount
4549
name: shipsec-worker
4650
namespace: {{ .Values.global.namespaces.workers }}
51+
{{- if .Values.execution.k8s.jobRunnerGcpSa }}
52+
---
53+
# ServiceAccount for job pods (GCS FUSE CSI access via Workload Identity)
54+
apiVersion: v1
55+
kind: ServiceAccount
56+
metadata:
57+
name: shipsec-job-runner
58+
namespace: {{ .Values.global.namespaces.workloads }}
59+
labels:
60+
{{- include "shipsec.labels" . | nindent 4 }}
61+
annotations:
62+
iam.gke.io/gcp-service-account: {{ .Values.execution.k8s.jobRunnerGcpSa | quote }}
63+
{{- end }}
4764
{{- end }}

deploy/helm/shipsec/values/gke-managed.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,7 @@ execution:
8888
jobNamespace: shipsec-workloads
8989
imagePullPolicy: IfNotPresent
9090
imagePullSecret: ghcr-creds
91+
gcsBucket: shipsec-volumes-shipsec-dev
92+
jobServiceAccount: shipsec-job-runner
93+
jobRunnerGcpSa: shipsec-job-runner@shipsec.iam.gserviceaccount.com
94+
workerGcpSa: shipsec-worker@shipsec.iam.gserviceaccount.com

infra/gcp/envs/dev/main.tf

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ resource "google_container_cluster" "gke" {
7878
workload_pool = "${var.project_id}.svc.id.goog"
7979
}
8080

81+
addons_config {
82+
gcs_fuse_csi_driver_config {
83+
enabled = true
84+
}
85+
}
86+
8187
depends_on = [google_project_service.enabled]
8288
}
8389

@@ -101,6 +107,78 @@ resource "google_container_node_pool" "default_pool" {
101107
}
102108
}
103109

110+
# --- GCS FUSE volume support ---
111+
112+
# GCS bucket for job volumes
113+
resource "google_storage_bucket" "volumes" {
114+
project = var.project_id
115+
name = "${var.project_id}-volumes-${var.cluster_name}"
116+
location = var.region
117+
uniform_bucket_level_access = true
118+
force_destroy = true
119+
120+
lifecycle_rule {
121+
condition {
122+
age = 7
123+
}
124+
action {
125+
type = "Delete"
126+
}
127+
}
128+
}
129+
130+
# GCP SA for job pods (mounted via GCS FUSE CSI)
131+
resource "google_service_account" "job_runner" {
132+
project = var.project_id
133+
account_id = "shipsec-job-runner"
134+
display_name = "ShipSec K8s Job Runner"
135+
}
136+
137+
# Job runner SA → bucket access
138+
resource "google_storage_bucket_iam_member" "job_runner_storage" {
139+
bucket = google_storage_bucket.volumes.name
140+
role = "roles/storage.objectUser"
141+
member = "serviceAccount:${google_service_account.job_runner.email}"
142+
}
143+
144+
# Workload Identity: K8s SA → GCP SA (for job pods in shipsec-workloads)
145+
resource "google_service_account_iam_member" "job_runner_wi" {
146+
service_account_id = google_service_account.job_runner.name
147+
role = "roles/iam.workloadIdentityUser"
148+
member = "serviceAccount:${var.project_id}.svc.id.goog[shipsec-workloads/shipsec-job-runner]"
149+
}
150+
151+
# Worker SA also needs GCS access (to upload inputs / read outputs via SDK)
152+
resource "google_service_account" "worker" {
153+
project = var.project_id
154+
account_id = "shipsec-worker"
155+
display_name = "ShipSec Worker"
156+
}
157+
158+
resource "google_storage_bucket_iam_member" "worker_storage" {
159+
bucket = google_storage_bucket.volumes.name
160+
role = "roles/storage.objectUser"
161+
member = "serviceAccount:${google_service_account.worker.email}"
162+
}
163+
164+
resource "google_service_account_iam_member" "worker_wi" {
165+
service_account_id = google_service_account.worker.name
166+
role = "roles/iam.workloadIdentityUser"
167+
member = "serviceAccount:${var.project_id}.svc.id.goog[shipsec-workers/shipsec-worker]"
168+
}
169+
170+
output "gcs_volumes_bucket" {
171+
value = google_storage_bucket.volumes.name
172+
}
173+
174+
output "job_runner_sa_email" {
175+
value = google_service_account.job_runner.email
176+
}
177+
178+
output "worker_sa_email" {
179+
value = google_service_account.worker.email
180+
}
181+
104182
output "artifact_registry_repo" {
105183
value = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.docker.repository_id}"
106184
}

worker/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"@ai-sdk/mcp": "^1.0.13",
2424
"@ai-sdk/openai": "^3.0.18",
2525
"@aws-sdk/client-s3": "^3.975.0",
26+
"@google-cloud/storage": "^7.14.0",
2627
"@googleapis/admin": "^29.0.0",
2728
"@grpc/grpc-js": "^1.14.3",
2829
"@kubernetes/client-node": "^1.4.0",

worker/src/components/security/prowler-scan.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,22 @@ async function listVolumeFiles(volume: ReturnType<typeof createIsolatedVolume>):
300300
const volumeName = volume.getVolumeName();
301301
if (!volumeName) return [];
302302

303-
// In K8s mode, volumes are ConfigMap-backed — list keys via K8s API
303+
// In K8s mode, volumes are ConfigMap-backed or GCS-backed
304304
if (process.env.EXECUTION_MODE === 'k8s') {
305+
// GCS FUSE volumes: list objects in the GCS prefix via SDK
306+
if (process.env.GCS_VOLUME_BUCKET) {
307+
try {
308+
const { Storage } = await import('@google-cloud/storage');
309+
const storage = new Storage();
310+
const bucket = storage.bucket(process.env.GCS_VOLUME_BUCKET);
311+
const [files] = await bucket.getFiles({ prefix: `${volumeName}/` });
312+
return files.map((f) => f.name.replace(`${volumeName}/`, ''));
313+
} catch {
314+
return [];
315+
}
316+
}
317+
318+
// ConfigMap-backed volumes: list keys via K8s API
305319
try {
306320
const k8s = await import('@kubernetes/client-node');
307321
const kc = new k8s.KubeConfig();

worker/src/utils/gcs-volume.ts

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
/**
2+
* IsolatedGcsVolume — GCS FUSE CSI volume replacement for IsolatedK8sVolume.
3+
*
4+
* Uses a GCS bucket mounted via the GCS FUSE CSI driver instead of ConfigMaps.
5+
* Same interface as IsolatedK8sVolume / IsolatedContainerVolume so components
6+
* can swap transparently via the createIsolatedVolume() factory.
7+
*
8+
* Advantages over ConfigMap-backed volumes:
9+
* - No 1 MiB size limit (handles large outputs like Prowler)
10+
* - Native read-write (no log-based writeback hack)
11+
* - ReadWriteMany (parallel pods can share data)
12+
* - Worker reads output directly from GCS via SDK
13+
*/
14+
import { Storage } from '@google-cloud/storage';
15+
import { ValidationError, ConfigurationError, ContainerError } from '@shipsec/component-sdk';
16+
17+
let _storage: Storage | null = null;
18+
19+
function getStorage(): Storage {
20+
if (!_storage) {
21+
// Auto-discovers Workload Identity credentials in GKE
22+
_storage = new Storage();
23+
}
24+
return _storage;
25+
}
26+
27+
function getBucketName(): string {
28+
const bucket = process.env.GCS_VOLUME_BUCKET;
29+
if (!bucket) {
30+
throw new ConfigurationError('GCS_VOLUME_BUCKET environment variable is not set');
31+
}
32+
return bucket;
33+
}
34+
35+
function sanitizeName(raw: string): string {
36+
return raw
37+
.toLowerCase()
38+
.replace(/[^a-z0-9-]/g, '-')
39+
.replace(/-+/g, '-')
40+
.replace(/^-|-$/g, '')
41+
.slice(0, 53);
42+
}
43+
44+
export class IsolatedGcsVolume {
45+
private prefix?: string;
46+
private isInitialized = false;
47+
private bucketName: string;
48+
49+
constructor(
50+
private tenantId: string,
51+
private runId: string,
52+
) {
53+
if (!/^[a-zA-Z0-9_-]+$/.test(tenantId)) {
54+
throw new ValidationError(
55+
'Invalid tenant ID: must contain only alphanumeric characters, hyphens, and underscores',
56+
{
57+
fieldErrors: {
58+
tenantId: ['must contain only alphanumeric characters, hyphens, and underscores'],
59+
},
60+
},
61+
);
62+
}
63+
if (!/^[a-zA-Z0-9_-]+$/.test(runId)) {
64+
throw new ValidationError(
65+
'Invalid run ID: must contain only alphanumeric characters, hyphens, and underscores',
66+
{
67+
fieldErrors: {
68+
runId: ['must contain only alphanumeric characters, hyphens, and underscores'],
69+
},
70+
},
71+
);
72+
}
73+
this.bucketName = getBucketName();
74+
}
75+
76+
/**
77+
* Upload files to GCS under a unique prefix and return the prefix.
78+
* GCS key structure: {tenantId}/{runId}/{timestamp}/{filename}
79+
*/
80+
async initialize(files: Record<string, string | Buffer>): Promise<string> {
81+
if (this.isInitialized) {
82+
throw new ConfigurationError('Volume already initialized', {
83+
details: { prefix: this.prefix, tenantId: this.tenantId, runId: this.runId },
84+
});
85+
}
86+
87+
const timestamp = Date.now();
88+
const tenantShort = sanitizeName(this.tenantId);
89+
const runShort = sanitizeName(this.runId);
90+
this.prefix = `${tenantShort}/${runShort}/${timestamp}`;
91+
92+
try {
93+
const storage = getStorage();
94+
const bucket = storage.bucket(this.bucketName);
95+
96+
const uploads = Object.entries(files).map(async ([filename, content]) => {
97+
this.validateFilename(filename);
98+
const key = `${this.prefix}/${filename}`;
99+
const file = bucket.file(key);
100+
const data = typeof content === 'string' ? Buffer.from(content, 'utf-8') : content;
101+
await file.save(data);
102+
});
103+
104+
await Promise.all(uploads);
105+
106+
this.isInitialized = true;
107+
return this.prefix;
108+
} catch (error) {
109+
if (this.prefix) {
110+
await this.cleanup().catch(() => {});
111+
}
112+
throw new ContainerError(
113+
`Failed to initialize GCS volume: ${error instanceof Error ? error.message : String(error)}`,
114+
{
115+
cause: error instanceof Error ? error : undefined,
116+
details: { tenantId: this.tenantId, runId: this.runId },
117+
},
118+
);
119+
}
120+
}
121+
122+
private validateFilename(filename: string): void {
123+
if (filename.includes('..') || filename.startsWith('/')) {
124+
throw new ValidationError(`Invalid filename (path traversal): ${filename}`, {
125+
fieldErrors: { filename: ['path traversal not allowed'] },
126+
});
127+
}
128+
const safePattern = /^[a-zA-Z0-9._/-]+$/;
129+
if (!safePattern.test(filename)) {
130+
throw new ValidationError(`Invalid filename (contains unsafe characters): ${filename}`, {
131+
fieldErrors: { filename: ['contains unsafe characters'] },
132+
});
133+
}
134+
}
135+
136+
/**
137+
* Download files from GCS by name.
138+
*/
139+
async readFiles(filenames: string[]): Promise<Record<string, string>> {
140+
if (!this.prefix) {
141+
throw new ConfigurationError('Volume not initialized');
142+
}
143+
144+
const storage = getStorage();
145+
const bucket = storage.bucket(this.bucketName);
146+
const results: Record<string, string> = {};
147+
148+
for (const filename of filenames) {
149+
try {
150+
const key = `${this.prefix}/${filename}`;
151+
const file = bucket.file(key);
152+
const [contents] = await file.download();
153+
results[filename] = contents.toString('utf-8');
154+
} catch (error) {
155+
console.warn(
156+
`Could not read file ${filename} from GCS: ${error instanceof Error ? error.message : String(error)}`,
157+
);
158+
}
159+
}
160+
161+
return results;
162+
}
163+
164+
/**
165+
* Returns volume config for the runner.
166+
* The K8s runner recognizes the "gcsfuse:" prefix and creates a CSI volume.
167+
* Format: "gcsfuse:{bucketName}:{prefix}"
168+
*/
169+
getVolumeConfig(containerPath = '/inputs', readOnly = true) {
170+
if (!this.prefix) {
171+
throw new ConfigurationError('Volume not initialized');
172+
}
173+
return {
174+
source: `gcsfuse:${this.bucketName}:${this.prefix}`,
175+
target: containerPath,
176+
readOnly,
177+
};
178+
}
179+
180+
/**
181+
* Returns a bind mount string (for interface compatibility).
182+
*/
183+
getBindMount(containerPath = '/inputs', readOnly = true): string {
184+
if (!this.prefix) {
185+
throw new ConfigurationError('Volume not initialized');
186+
}
187+
const mode = readOnly ? 'ro' : 'rw';
188+
return `gcsfuse:${this.bucketName}:${this.prefix}:${containerPath}:${mode}`;
189+
}
190+
191+
/**
192+
* Delete all objects under the GCS prefix.
193+
*/
194+
async cleanup(): Promise<void> {
195+
if (!this.prefix) return;
196+
197+
try {
198+
const storage = getStorage();
199+
const bucket = storage.bucket(this.bucketName);
200+
await bucket.deleteFiles({ prefix: `${this.prefix}/` });
201+
} catch (error) {
202+
console.error(
203+
`Failed to cleanup GCS volume ${this.prefix}: ${error instanceof Error ? error.message : String(error)}`,
204+
);
205+
} finally {
206+
this.isInitialized = false;
207+
this.prefix = undefined;
208+
}
209+
}
210+
211+
getVolumeName(): string | undefined {
212+
return this.prefix;
213+
}
214+
}

worker/src/utils/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ export {
88
createIsolatedVolume,
99
} from './isolated-volume';
1010
export { IsolatedK8sVolume } from './k8s-volume';
11+
export { IsolatedGcsVolume } from './gcs-volume';

0 commit comments

Comments
 (0)