-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathstorage-module-cache.ts
More file actions
284 lines (255 loc) · 9.54 KB
/
storage-module-cache.ts
File metadata and controls
284 lines (255 loc) · 9.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
import { Logger } from '@pgpmjs/logger';
import { LRUCache } from 'lru-cache';
import type { StorageModuleConfig, BucketConfig } from './types';
const log = new Logger('graphile-presigned-url:cache');
// --- Defaults ---
const DEFAULT_UPLOAD_URL_EXPIRY_SECONDS = 900; // 15 minutes
const DEFAULT_DOWNLOAD_URL_EXPIRY_SECONDS = 3600; // 1 hour
const DEFAULT_MAX_FILE_SIZE = 200 * 1024 * 1024; // 200MB
const DEFAULT_MAX_FILENAME_LENGTH = 1024;
const DEFAULT_CACHE_TTL_SECONDS = process.env.NODE_ENV === 'development' ? 300 : 3600;
const FIVE_MINUTES_MS = 1000 * 60 * 5;
const ONE_HOUR_MS = 1000 * 60 * 60;
/**
* LRU cache for per-database StorageModuleConfig.
*
* Each PostGraphile instance serves a single database, but the presigned URL
* plugin needs to know the generated table names (buckets, files,
* upload_requests) and their schemas. This cache avoids re-querying metaschema
* on every request.
*
* Pattern: same as graphile-cache's LRU with TTL-based eviction.
*/
const storageModuleCache = new LRUCache<string, StorageModuleConfig>({
max: 50,
ttl: process.env.NODE_ENV === 'development' ? FIVE_MINUTES_MS : ONE_HOUR_MS,
updateAgeOnGet: true,
});
/**
* SQL query to resolve storage module config for a database.
*
* Joins storage_module → table → schema to get fully-qualified table names.
*/
const STORAGE_MODULE_QUERY = `
SELECT
sm.id,
bs.schema_name AS buckets_schema,
bt.name AS buckets_table,
fs.schema_name AS files_schema,
ft.name AS files_table,
urs.schema_name AS upload_requests_schema,
urt.name AS upload_requests_table,
sm.endpoint,
sm.public_url_prefix,
sm.provider,
sm.allowed_origins,
sm.upload_url_expiry_seconds,
sm.download_url_expiry_seconds,
sm.default_max_file_size,
sm.max_filename_length,
sm.cache_ttl_seconds
FROM metaschema_modules_public.storage_module sm
JOIN metaschema_public.table bt ON bt.id = sm.buckets_table_id
JOIN metaschema_public.schema bs ON bs.id = bt.schema_id
JOIN metaschema_public.table ft ON ft.id = sm.files_table_id
JOIN metaschema_public.schema fs ON fs.id = ft.schema_id
JOIN metaschema_public.table urt ON urt.id = sm.upload_requests_table_id
JOIN metaschema_public.schema urs ON urs.id = urt.schema_id
WHERE sm.database_id = $1
LIMIT 1
`;
interface StorageModuleRow {
id: string;
buckets_schema: string;
buckets_table: string;
files_schema: string;
files_table: string;
upload_requests_schema: string;
upload_requests_table: string;
endpoint: string | null;
public_url_prefix: string | null;
provider: string | null;
allowed_origins: string[] | null;
upload_url_expiry_seconds: number | null;
download_url_expiry_seconds: number | null;
default_max_file_size: number | null;
max_filename_length: number | null;
cache_ttl_seconds: number | null;
}
/**
* Resolve the storage module config for a database, using the LRU cache.
*
* @param pgClient - A pg client from the Graphile context (withPgClient or pgClient)
* @param databaseId - The metaschema database UUID
* @returns StorageModuleConfig or null if no storage module is provisioned
*/
export async function getStorageModuleConfig(
pgClient: { query: (opts: { text: string; values?: unknown[] }) => Promise<{ rows: unknown[] }> },
databaseId: string,
): Promise<StorageModuleConfig | null> {
const cacheKey = `storage:${databaseId}`;
const cached = storageModuleCache.get(cacheKey);
if (cached) {
return cached;
}
log.debug(`Cache miss for database ${databaseId}, querying metaschema...`);
const result = await pgClient.query({ text: STORAGE_MODULE_QUERY, values: [databaseId] });
if (result.rows.length === 0) {
log.warn(`No storage module found for database ${databaseId}`);
return null;
}
const row = result.rows[0] as StorageModuleRow;
const cacheTtlSeconds = row.cache_ttl_seconds ?? DEFAULT_CACHE_TTL_SECONDS;
const config: StorageModuleConfig = {
id: row.id,
bucketsQualifiedName: `"${row.buckets_schema}"."${row.buckets_table}"`,
filesQualifiedName: `"${row.files_schema}"."${row.files_table}"`,
uploadRequestsQualifiedName: `"${row.upload_requests_schema}"."${row.upload_requests_table}"`,
schemaName: row.buckets_schema,
bucketsTableName: row.buckets_table,
filesTableName: row.files_table,
uploadRequestsTableName: row.upload_requests_table,
endpoint: row.endpoint,
publicUrlPrefix: row.public_url_prefix,
provider: row.provider,
allowedOrigins: row.allowed_origins,
uploadUrlExpirySeconds: row.upload_url_expiry_seconds ?? DEFAULT_UPLOAD_URL_EXPIRY_SECONDS,
downloadUrlExpirySeconds: row.download_url_expiry_seconds ?? DEFAULT_DOWNLOAD_URL_EXPIRY_SECONDS,
defaultMaxFileSize: row.default_max_file_size ?? DEFAULT_MAX_FILE_SIZE,
maxFilenameLength: row.max_filename_length ?? DEFAULT_MAX_FILENAME_LENGTH,
cacheTtlSeconds,
};
storageModuleCache.set(cacheKey, config);
log.debug(`Cached storage config for database ${databaseId}: ${config.bucketsQualifiedName}`);
return config;
}
// --- Bucket metadata cache ---
/**
* LRU cache for per-database bucket metadata.
*
* Buckets are essentially static config — created once and rarely changed.
* Caching avoids a DB query on every requestUploadUrl call. The bucket
* lookup in the plugin runs under RLS, but since AuthzEntityMembership
* grants all org members access to all org buckets, and the cached data
* is just config (mime types, size limits), bypassing RLS on cache hits
* is safe. The important RLS is on the files table (INSERT/UPDATE),
* which is never cached.
*
* Keys: `bucket:${databaseId}:${bucketKey}`
* TTL: same as storage module cache (5min dev / 1hr prod)
*/
const bucketCache = new LRUCache<string, BucketConfig>({
max: 500, // many buckets across many databases
ttl: process.env.NODE_ENV === 'development' ? FIVE_MINUTES_MS : ONE_HOUR_MS,
updateAgeOnGet: true,
});
/**
* Resolve bucket metadata for a given database + bucket key, using the LRU cache.
*
* On cache miss, queries the bucket table (RLS-enforced via pgSettings on
* the pgClient). On cache hit, returns the cached metadata directly.
*
* @param pgClient - A pg client from the Graphile context
* @param storageConfig - The resolved StorageModuleConfig for this database
* @param databaseId - The metaschema database UUID (used as cache key prefix)
* @param bucketKey - The bucket key (e.g., "public", "private")
* @returns BucketConfig or null if the bucket doesn't exist / isn't accessible
*/
export async function getBucketConfig(
pgClient: { query: (opts: { text: string; values?: unknown[] }) => Promise<{ rows: unknown[] }> },
storageConfig: StorageModuleConfig,
databaseId: string,
bucketKey: string,
): Promise<BucketConfig | null> {
const cacheKey = `bucket:${databaseId}:${bucketKey}`;
const cached = bucketCache.get(cacheKey);
if (cached) {
return cached;
}
log.debug(`Bucket cache miss for ${databaseId}:${bucketKey}, querying DB...`);
const result = await pgClient.query({
text: `SELECT id, key, type, is_public, owner_id, allowed_mime_types, max_file_size
FROM ${storageConfig.bucketsQualifiedName}
WHERE key = $1
LIMIT 1`,
values: [bucketKey],
});
if (result.rows.length === 0) {
return null;
}
const row = result.rows[0] as {
id: string;
key: string;
type: string;
is_public: boolean;
owner_id: string;
allowed_mime_types: string[] | null;
max_file_size: number | null;
};
const config: BucketConfig = {
id: row.id,
key: row.key,
type: row.type as BucketConfig['type'],
is_public: row.is_public,
owner_id: row.owner_id,
allowed_mime_types: row.allowed_mime_types,
max_file_size: row.max_file_size,
};
bucketCache.set(cacheKey, config);
log.debug(`Cached bucket config for ${databaseId}:${bucketKey} (id=${config.id})`);
return config;
}
// --- S3 bucket existence cache ---
/**
* In-memory set of S3 bucket names that are known to exist.
*
* Used by the lazy provisioning logic in the presigned URL plugin:
* before generating a presigned PUT URL, the plugin checks this set.
* If the bucket name is absent, it calls `ensureBucketProvisioned`
* to create the S3 bucket, then adds the name here. Subsequent
* requests for the same bucket skip the provisioning entirely.
*
* No TTL needed — S3 buckets are never deleted during normal operation.
* The set resets on server restart, which is fine because the
* provisioner's createBucket is idempotent (handles "already exists").
*/
const provisionedBuckets = new Set<string>();
/**
* Check whether an S3 bucket has already been provisioned (cached).
*/
export function isS3BucketProvisioned(s3BucketName: string): boolean {
return provisionedBuckets.has(s3BucketName);
}
/**
* Mark an S3 bucket as provisioned in the in-memory cache.
*/
export function markS3BucketProvisioned(s3BucketName: string): void {
provisionedBuckets.add(s3BucketName);
log.debug(`Marked S3 bucket "${s3BucketName}" as provisioned`);
}
/**
* Clear the storage module cache AND bucket cache.
* Useful for testing or schema changes.
*/
export function clearStorageModuleCache(): void {
storageModuleCache.clear();
bucketCache.clear();
provisionedBuckets.clear();
}
/**
* Clear cached bucket entries for a specific database.
* Useful when bucket config changes are detected.
*/
export function clearBucketCache(databaseId?: string): void {
if (!databaseId) {
bucketCache.clear();
return;
}
// Evict all entries for this database
const prefix = `bucket:${databaseId}:`;
for (const key of bucketCache.keys()) {
if (key.startsWith(prefix)) {
bucketCache.delete(key);
}
}
}