Skip to content

Commit 65c8760

Browse files
committed
Fix CSV loader to handle out-of-order and subset columns
- Parse CSV header to extract column names and order - Use COPY table (col1, col2, ...) FROM STDIN WITH CSV HEADER to specify column order - Handle BOM, whitespace, and quoted column names - Validate column names against SQL identifier rules - Fix posts-subset-header.csv to have correct test data values This allows CSV files to have columns in any order and only include a subset of table columns, as long as omitted columns are nullable or have defaults. Co-Authored-By: Dan Lynch <pyramation@gmail.com>
1 parent 7ecbdc7 commit 65c8760

2 files changed

Lines changed: 63 additions & 3 deletions

File tree

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
id,content,user_id
2-
1,'sdf',1
3-
2,'sdf',2
2+
1,Hello world!,1
3+
2,Graphile is cool!,2

packages/pgsql-test/src/seed/csv.ts

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { pipeline } from 'node:stream/promises';
2+
import { createInterface } from 'node:readline';
23

34
import { Logger } from '@launchql/logger';
45
import { createReadStream, createWriteStream,existsSync } from 'fs';
@@ -28,9 +29,68 @@ export function csv(tables: CsvSeedMap): SeedAdapter {
2829
};
2930
}
3031

32+
/**
33+
* Parse and validate CSV header columns
34+
*/
35+
async function parseCsvHeader(filePath: string): Promise<string[]> {
36+
const fileStream = createReadStream(filePath);
37+
const rl = createInterface({
38+
input: fileStream,
39+
crlfDelay: Infinity
40+
});
41+
42+
let headerLine: string | null = null;
43+
44+
for await (const line of rl) {
45+
headerLine = line;
46+
break; // Only read the first line
47+
}
48+
49+
rl.close();
50+
fileStream.destroy();
51+
52+
if (!headerLine) {
53+
throw new Error('CSV file is empty or has no header');
54+
}
55+
56+
if (headerLine.charCodeAt(0) === 0xFEFF) {
57+
headerLine = headerLine.slice(1);
58+
}
59+
60+
const columns = headerLine.split(',').map(col => {
61+
let cleaned = col.trim();
62+
if ((cleaned.startsWith('"') && cleaned.endsWith('"')) ||
63+
(cleaned.startsWith("'") && cleaned.endsWith("'"))) {
64+
cleaned = cleaned.slice(1, -1);
65+
}
66+
return cleaned.toLowerCase();
67+
});
68+
69+
const validIdentifier = /^[a-z_][a-z0-9_]*$/;
70+
for (const col of columns) {
71+
if (!validIdentifier.test(col)) {
72+
throw new Error(`Invalid column name in CSV header: "${col}". Column names must be valid SQL identifiers.`);
73+
}
74+
}
75+
76+
if (columns.length === 0) {
77+
throw new Error('CSV header has no columns');
78+
}
79+
80+
return columns;
81+
}
82+
3183
export async function copyCsvIntoTable(pg: PgTestClient, table: string, filePath: string): Promise<void> {
3284
const client: Client = pg.client;
33-
const stream = client.query(copyFrom(`COPY ${table} FROM STDIN WITH CSV HEADER`));
85+
86+
const columns = await parseCsvHeader(filePath);
87+
88+
const columnList = columns.join(', ');
89+
const copyCommand = `COPY ${table} (${columnList}) FROM STDIN WITH CSV HEADER`;
90+
91+
log.info(`Using columns: ${columnList}`);
92+
93+
const stream = client.query(copyFrom(copyCommand));
3494
const source = createReadStream(filePath);
3595

3696
try {

0 commit comments

Comments
 (0)