|
| 1 | +/** |
| 2 | + * IsolatedGcsVolume — GCS FUSE CSI volume replacement for IsolatedK8sVolume. |
| 3 | + * |
| 4 | + * Uses a GCS bucket mounted via the GCS FUSE CSI driver instead of ConfigMaps. |
| 5 | + * Same interface as IsolatedK8sVolume / IsolatedContainerVolume so components |
| 6 | + * can swap transparently via the createIsolatedVolume() factory. |
| 7 | + * |
| 8 | + * Advantages over ConfigMap-backed volumes: |
| 9 | + * - No 1 MiB size limit (handles large outputs like Prowler) |
| 10 | + * - Native read-write (no log-based writeback hack) |
| 11 | + * - ReadWriteMany (parallel pods can share data) |
| 12 | + * - Worker reads output directly from GCS via SDK |
| 13 | + */ |
| 14 | +import { Storage } from '@google-cloud/storage'; |
| 15 | +import { ValidationError, ConfigurationError, ContainerError } from '@shipsec/component-sdk'; |
| 16 | + |
| 17 | +let _storage: Storage | null = null; |
| 18 | + |
| 19 | +function getStorage(): Storage { |
| 20 | + if (!_storage) { |
| 21 | + // Auto-discovers Workload Identity credentials in GKE |
| 22 | + _storage = new Storage(); |
| 23 | + } |
| 24 | + return _storage; |
| 25 | +} |
| 26 | + |
| 27 | +function getBucketName(): string { |
| 28 | + const bucket = process.env.GCS_VOLUME_BUCKET; |
| 29 | + if (!bucket) { |
| 30 | + throw new ConfigurationError('GCS_VOLUME_BUCKET environment variable is not set'); |
| 31 | + } |
| 32 | + return bucket; |
| 33 | +} |
| 34 | + |
| 35 | +function sanitizeName(raw: string): string { |
| 36 | + return raw |
| 37 | + .toLowerCase() |
| 38 | + .replace(/[^a-z0-9-]/g, '-') |
| 39 | + .replace(/-+/g, '-') |
| 40 | + .replace(/^-|-$/g, '') |
| 41 | + .slice(0, 53); |
| 42 | +} |
| 43 | + |
| 44 | +export class IsolatedGcsVolume { |
| 45 | + private prefix?: string; |
| 46 | + private isInitialized = false; |
| 47 | + private bucketName: string; |
| 48 | + |
| 49 | + constructor( |
| 50 | + private tenantId: string, |
| 51 | + private runId: string, |
| 52 | + ) { |
| 53 | + if (!/^[a-zA-Z0-9_-]+$/.test(tenantId)) { |
| 54 | + throw new ValidationError( |
| 55 | + 'Invalid tenant ID: must contain only alphanumeric characters, hyphens, and underscores', |
| 56 | + { |
| 57 | + fieldErrors: { |
| 58 | + tenantId: ['must contain only alphanumeric characters, hyphens, and underscores'], |
| 59 | + }, |
| 60 | + }, |
| 61 | + ); |
| 62 | + } |
| 63 | + if (!/^[a-zA-Z0-9_-]+$/.test(runId)) { |
| 64 | + throw new ValidationError( |
| 65 | + 'Invalid run ID: must contain only alphanumeric characters, hyphens, and underscores', |
| 66 | + { |
| 67 | + fieldErrors: { |
| 68 | + runId: ['must contain only alphanumeric characters, hyphens, and underscores'], |
| 69 | + }, |
| 70 | + }, |
| 71 | + ); |
| 72 | + } |
| 73 | + this.bucketName = getBucketName(); |
| 74 | + } |
| 75 | + |
| 76 | + /** |
| 77 | + * Upload files to GCS under a unique prefix and return the prefix. |
| 78 | + * GCS key structure: {tenantId}/{runId}/{timestamp}/{filename} |
| 79 | + */ |
| 80 | + async initialize(files: Record<string, string | Buffer>): Promise<string> { |
| 81 | + if (this.isInitialized) { |
| 82 | + throw new ConfigurationError('Volume already initialized', { |
| 83 | + details: { prefix: this.prefix, tenantId: this.tenantId, runId: this.runId }, |
| 84 | + }); |
| 85 | + } |
| 86 | + |
| 87 | + const timestamp = Date.now(); |
| 88 | + const tenantShort = sanitizeName(this.tenantId); |
| 89 | + const runShort = sanitizeName(this.runId); |
| 90 | + this.prefix = `${tenantShort}/${runShort}/${timestamp}`; |
| 91 | + |
| 92 | + try { |
| 93 | + const storage = getStorage(); |
| 94 | + const bucket = storage.bucket(this.bucketName); |
| 95 | + |
| 96 | + const uploads = Object.entries(files).map(async ([filename, content]) => { |
| 97 | + this.validateFilename(filename); |
| 98 | + const key = `${this.prefix}/${filename}`; |
| 99 | + const file = bucket.file(key); |
| 100 | + const data = typeof content === 'string' ? Buffer.from(content, 'utf-8') : content; |
| 101 | + await file.save(data); |
| 102 | + }); |
| 103 | + |
| 104 | + await Promise.all(uploads); |
| 105 | + |
| 106 | + this.isInitialized = true; |
| 107 | + return this.prefix; |
| 108 | + } catch (error) { |
| 109 | + if (this.prefix) { |
| 110 | + await this.cleanup().catch(() => {}); |
| 111 | + } |
| 112 | + throw new ContainerError( |
| 113 | + `Failed to initialize GCS volume: ${error instanceof Error ? error.message : String(error)}`, |
| 114 | + { |
| 115 | + cause: error instanceof Error ? error : undefined, |
| 116 | + details: { tenantId: this.tenantId, runId: this.runId }, |
| 117 | + }, |
| 118 | + ); |
| 119 | + } |
| 120 | + } |
| 121 | + |
| 122 | + private validateFilename(filename: string): void { |
| 123 | + if (filename.includes('..') || filename.startsWith('/')) { |
| 124 | + throw new ValidationError(`Invalid filename (path traversal): ${filename}`, { |
| 125 | + fieldErrors: { filename: ['path traversal not allowed'] }, |
| 126 | + }); |
| 127 | + } |
| 128 | + const safePattern = /^[a-zA-Z0-9._/-]+$/; |
| 129 | + if (!safePattern.test(filename)) { |
| 130 | + throw new ValidationError(`Invalid filename (contains unsafe characters): ${filename}`, { |
| 131 | + fieldErrors: { filename: ['contains unsafe characters'] }, |
| 132 | + }); |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + /** |
| 137 | + * Download files from GCS by name. |
| 138 | + */ |
| 139 | + async readFiles(filenames: string[]): Promise<Record<string, string>> { |
| 140 | + if (!this.prefix) { |
| 141 | + throw new ConfigurationError('Volume not initialized'); |
| 142 | + } |
| 143 | + |
| 144 | + const storage = getStorage(); |
| 145 | + const bucket = storage.bucket(this.bucketName); |
| 146 | + const results: Record<string, string> = {}; |
| 147 | + |
| 148 | + for (const filename of filenames) { |
| 149 | + try { |
| 150 | + const key = `${this.prefix}/${filename}`; |
| 151 | + const file = bucket.file(key); |
| 152 | + const [contents] = await file.download(); |
| 153 | + results[filename] = contents.toString('utf-8'); |
| 154 | + } catch (error) { |
| 155 | + console.warn( |
| 156 | + `Could not read file ${filename} from GCS: ${error instanceof Error ? error.message : String(error)}`, |
| 157 | + ); |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + return results; |
| 162 | + } |
| 163 | + |
| 164 | + /** |
| 165 | + * Returns volume config for the runner. |
| 166 | + * The K8s runner recognizes the "gcsfuse:" prefix and creates a CSI volume. |
| 167 | + * Format: "gcsfuse:{bucketName}:{prefix}" |
| 168 | + */ |
| 169 | + getVolumeConfig(containerPath = '/inputs', readOnly = true) { |
| 170 | + if (!this.prefix) { |
| 171 | + throw new ConfigurationError('Volume not initialized'); |
| 172 | + } |
| 173 | + return { |
| 174 | + source: `gcsfuse:${this.bucketName}:${this.prefix}`, |
| 175 | + target: containerPath, |
| 176 | + readOnly, |
| 177 | + }; |
| 178 | + } |
| 179 | + |
| 180 | + /** |
| 181 | + * Returns a bind mount string (for interface compatibility). |
| 182 | + */ |
| 183 | + getBindMount(containerPath = '/inputs', readOnly = true): string { |
| 184 | + if (!this.prefix) { |
| 185 | + throw new ConfigurationError('Volume not initialized'); |
| 186 | + } |
| 187 | + const mode = readOnly ? 'ro' : 'rw'; |
| 188 | + return `gcsfuse:${this.bucketName}:${this.prefix}:${containerPath}:${mode}`; |
| 189 | + } |
| 190 | + |
| 191 | + /** |
| 192 | + * Delete all objects under the GCS prefix. |
| 193 | + */ |
| 194 | + async cleanup(): Promise<void> { |
| 195 | + if (!this.prefix) return; |
| 196 | + |
| 197 | + try { |
| 198 | + const storage = getStorage(); |
| 199 | + const bucket = storage.bucket(this.bucketName); |
| 200 | + await bucket.deleteFiles({ prefix: `${this.prefix}/` }); |
| 201 | + } catch (error) { |
| 202 | + console.error( |
| 203 | + `Failed to cleanup GCS volume ${this.prefix}: ${error instanceof Error ? error.message : String(error)}`, |
| 204 | + ); |
| 205 | + } finally { |
| 206 | + this.isInitialized = false; |
| 207 | + this.prefix = undefined; |
| 208 | + } |
| 209 | + } |
| 210 | + |
| 211 | + getVolumeName(): string | undefined { |
| 212 | + return this.prefix; |
| 213 | + } |
| 214 | +} |
0 commit comments