Skip to content

Commit ec13f90

Browse files
committed
Add optional fields and streaming CSV parser with comprehensive tests
- Add optional fields (email, bio, title, published) to test table schemas - Add csv-parse dependency for robust CSV header parsing - Replace naive CSV header parsing with streaming csv-parse implementation - Handles BOM, CRLF/LF line endings - Properly parses quoted commas and escaped quotes - Validates column names against SQL identifier rules - Normalizes headers to snake_case lowercase - Reads only first record (header) for efficiency with large files - Create comprehensive test cases: - posts-with-optional.csv: CSV with optional fields populated - posts-quoted-commas.csv: CSV with quoted commas in fields - posts-escaped-quotes.csv: CSV with escaped quotes in fields - postgres-test.csv-optional-fields.test.ts: Test optional fields - postgres-test.csv-edge-cases.test.ts: Test quoted commas and escaped quotes - All 5 CSV tests passing (csv, csv-subset-header, csv-optional-fields, csv-edge-cases) Co-Authored-By: Dan Lynch <pyramation@gmail.com>
1 parent 65c8760 commit ec13f90

9 files changed

Lines changed: 213 additions & 46 deletions
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
process.env.LOG_SCOPE = 'pgsql-test';
2+
3+
import path from 'path';
4+
5+
import { seed } from '../src';
6+
import { getConnections } from '../src/connect';
7+
import { PgTestClient } from '../src/test-client';
8+
9+
const csv = (file: string) => path.resolve(__dirname, '../csv', file);
10+
11+
describe('CSV edge cases', () => {
12+
describe('quoted commas', () => {
13+
let pg: PgTestClient;
14+
let teardown: () => Promise<void>;
15+
16+
beforeAll(async () => {
17+
({ pg, teardown } = await getConnections({}, [
18+
seed.fn(async ({ pg }) => {
19+
await pg.query(`
20+
CREATE SCHEMA custom;
21+
CREATE TABLE custom.posts (
22+
id SERIAL PRIMARY KEY,
23+
user_id INT NOT NULL,
24+
content TEXT NOT NULL,
25+
title TEXT
26+
);
27+
`);
28+
}),
29+
30+
seed.csv({
31+
'custom.posts': csv('posts-quoted-commas.csv')
32+
})
33+
]));
34+
});
35+
36+
afterAll(async () => {
37+
await teardown();
38+
});
39+
40+
it('handles quoted commas in CSV fields', async () => {
41+
const res = await pg.query(`
42+
SELECT id, user_id, content, title
43+
FROM custom.posts
44+
ORDER BY id
45+
`);
46+
47+
expect(res.rows).toEqual([
48+
{ id: 1, user_id: 1, content: 'Hello, world!', title: 'First Post, Ever' },
49+
{ id: 2, user_id: 2, content: 'Graphile is cool!', title: 'GraphQL, PostGraphile' }
50+
]);
51+
});
52+
});
53+
54+
describe('escaped quotes', () => {
55+
let pg: PgTestClient;
56+
let teardown: () => Promise<void>;
57+
58+
beforeAll(async () => {
59+
({ pg, teardown } = await getConnections({}, [
60+
seed.fn(async ({ pg }) => {
61+
await pg.query(`
62+
CREATE SCHEMA custom;
63+
CREATE TABLE custom.posts (
64+
id SERIAL PRIMARY KEY,
65+
user_id INT NOT NULL,
66+
content TEXT NOT NULL
67+
);
68+
`);
69+
}),
70+
71+
seed.csv({
72+
'custom.posts': csv('posts-escaped-quotes.csv')
73+
})
74+
]));
75+
});
76+
77+
afterAll(async () => {
78+
await teardown();
79+
});
80+
81+
it('handles escaped quotes in CSV fields', async () => {
82+
const res = await pg.query(`
83+
SELECT id, user_id, content
84+
FROM custom.posts
85+
ORDER BY id
86+
`);
87+
88+
expect(res.rows).toEqual([
89+
{ id: 1, user_id: 1, content: 'He said "hello"' },
90+
{ id: 2, user_id: 2, content: 'She replied "hi"' }
91+
]);
92+
});
93+
});
94+
});
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
process.env.LOG_SCOPE = 'pgsql-test';
2+
3+
import path from 'path';
4+
5+
import { seed } from '../src';
6+
import { getConnections } from '../src/connect';
7+
import { PgTestClient } from '../src/test-client';
8+
9+
const csv = (file: string) => path.resolve(__dirname, '../csv', file);
10+
11+
let pg: PgTestClient;
12+
let teardown: () => Promise<void>;
13+
14+
beforeAll(async () => {
15+
({ pg, teardown } = await getConnections({}, [
16+
seed.fn(async ({ pg }) => {
17+
await pg.query(`
18+
CREATE SCHEMA custom;
19+
CREATE TABLE custom.posts (
20+
id SERIAL PRIMARY KEY,
21+
user_id INT NOT NULL,
22+
content TEXT NOT NULL,
23+
title TEXT,
24+
published BOOLEAN
25+
);
26+
`);
27+
}),
28+
29+
seed.csv({
30+
'custom.posts': csv('posts-with-optional.csv')
31+
}),
32+
33+
seed.fn(async ({ pg }) => {
34+
await pg.query(`SELECT setval(pg_get_serial_sequence('custom.posts', 'id'), (SELECT MAX(id) FROM custom.posts));`);
35+
})
36+
]));
37+
});
38+
39+
afterAll(async () => {
40+
await teardown();
41+
});
42+
43+
it('csv with optional fields', async () => {
44+
const res = await pg.query(`
45+
SELECT id, user_id, content, title, published
46+
FROM custom.posts
47+
ORDER BY id
48+
`);
49+
50+
expect(res.rows).toEqual([
51+
{ id: 1, user_id: 1, content: 'Hello world!', title: 'My First Post', published: true },
52+
{ id: 2, user_id: 2, content: 'Graphile is cool!', title: 'GraphQL Rocks', published: false }
53+
]);
54+
});

packages/pgsql-test/__tests__/postgres-test.csv.test.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,17 @@ beforeAll(async () => {
2121
CREATE SCHEMA custom;
2222
CREATE TABLE custom.users (
2323
id SERIAL PRIMARY KEY,
24-
name TEXT NOT NULL
24+
name TEXT NOT NULL,
25+
email TEXT,
26+
bio TEXT
2527
);
2628
2729
CREATE TABLE custom.posts (
2830
id SERIAL PRIMARY KEY,
2931
user_id INT REFERENCES custom.users(id),
30-
content TEXT NOT NULL
32+
content TEXT NOT NULL,
33+
title TEXT,
34+
published BOOLEAN
3135
);
3236
`);
3337
}),
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,user_id,content
2+
1,1,"He said ""hello"""
3+
2,2,"She replied ""hi"""
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,user_id,content,title
2+
1,1,"Hello, world!","First Post, Ever"
3+
2,2,"Graphile is cool!","GraphQL, PostGraphile"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,user_id,content,title,published
2+
1,1,Hello world!,My First Post,true
3+
2,2,Graphile is cool!,GraphQL Rocks,false

packages/pgsql-test/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
"@launchql/env": "^2.4.3",
6565
"@launchql/server-utils": "^2.4.3",
6666
"@launchql/types": "^2.6.2",
67+
"csv-parse": "^6.1.0",
6768
"pg": "^8.16.0",
6869
"pg-cache": "^1.3.4",
6970
"pg-copy-streams": "^6.0.6",

packages/pgsql-test/src/seed/csv.ts

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { pipeline } from 'node:stream/promises';
2-
import { createInterface } from 'node:readline';
32

43
import { Logger } from '@launchql/logger';
4+
import { parse } from 'csv-parse';
55
import { createReadStream, createWriteStream,existsSync } from 'fs';
66
import { Client } from 'pg';
77
import { from as copyFrom, to as copyTo } from 'pg-copy-streams';
@@ -11,6 +11,8 @@ import { SeedAdapter, SeedContext } from './types';
1111

1212
const log = new Logger('csv');
1313

14+
const VALID_IDENTIFIER = /^[a-z_][a-z0-9_]*$/;
15+
1416
interface CsvSeedMap {
1517
[tableName: string]: string;
1618
}
@@ -29,55 +31,53 @@ export function csv(tables: CsvSeedMap): SeedAdapter {
2931
};
3032
}
3133

32-
/**
33-
* Parse and validate CSV header columns
34-
*/
3534
async function parseCsvHeader(filePath: string): Promise<string[]> {
36-
const fileStream = createReadStream(filePath);
37-
const rl = createInterface({
38-
input: fileStream,
39-
crlfDelay: Infinity
35+
const file = createReadStream(filePath);
36+
const parser = parse({
37+
bom: true,
38+
to_line: 1,
39+
relax_column_count: true,
40+
skip_empty_lines: true,
41+
trim: true,
4042
});
4143

42-
let headerLine: string | null = null;
43-
44-
for await (const line of rl) {
45-
headerLine = line;
46-
break; // Only read the first line
47-
}
48-
49-
rl.close();
50-
fileStream.destroy();
51-
52-
if (!headerLine) {
53-
throw new Error('CSV file is empty or has no header');
54-
}
44+
return new Promise<string[]>((resolve, reject) => {
45+
const cleanup = (err?: unknown) => {
46+
parser.destroy();
47+
file.destroy();
48+
if (err) reject(err);
49+
};
50+
51+
parser.on('readable', () => {
52+
const row = parser.read() as string[] | null;
53+
if (!row) return;
54+
try {
55+
const cols = row.map((c) => {
56+
const cleaned = c.trim().replace(/\s+/g, '_').toLowerCase();
57+
if (!VALID_IDENTIFIER.test(cleaned)) {
58+
throw new Error(
59+
`Invalid column "${c}" → "${cleaned}". Must match /^[a-z_][a-z0-9_]*$/.`
60+
);
61+
}
62+
return cleaned;
63+
});
64+
65+
if (cols.length === 0) {
66+
throw new Error('CSV header has no columns');
67+
}
68+
69+
cleanup();
70+
resolve(cols);
71+
} catch (e) {
72+
cleanup(e);
73+
}
74+
});
5575

56-
if (headerLine.charCodeAt(0) === 0xFEFF) {
57-
headerLine = headerLine.slice(1);
58-
}
76+
parser.on('error', cleanup);
77+
file.on('error', cleanup);
5978

60-
const columns = headerLine.split(',').map(col => {
61-
let cleaned = col.trim();
62-
if ((cleaned.startsWith('"') && cleaned.endsWith('"')) ||
63-
(cleaned.startsWith("'") && cleaned.endsWith("'"))) {
64-
cleaned = cleaned.slice(1, -1);
65-
}
66-
return cleaned.toLowerCase();
79+
file.pipe(parser);
6780
});
68-
69-
const validIdentifier = /^[a-z_][a-z0-9_]*$/;
70-
for (const col of columns) {
71-
if (!validIdentifier.test(col)) {
72-
throw new Error(`Invalid column name in CSV header: "${col}". Column names must be valid SQL identifiers.`);
73-
}
74-
}
75-
76-
if (columns.length === 0) {
77-
throw new Error('CSV header has no columns');
78-
}
79-
80-
return columns;
8181
}
8282

8383
export async function copyCsvIntoTable(pg: PgTestClient, table: string, filePath: string): Promise<void> {

yarn.lock

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4232,6 +4232,11 @@ csstype@^3.0.2:
42324232
resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.1.3.tgz#d80ff294d114fb0e6ac500fbf85b60137d7eff81"
42334233
integrity sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==
42344234

4235+
csv-parse@^6.1.0:
4236+
version "6.1.0"
4237+
resolved "https://registry.yarnpkg.com/csv-parse/-/csv-parse-6.1.0.tgz#c642ec5b7fc57c1f477a07d179beb5ff0dfd5ed0"
4238+
integrity sha512-CEE+jwpgLn+MmtCpVcPtiCZpVtB6Z2OKPTr34pycYYoL7sxdOkXDdQ4lRiw6ioC0q6BLqhc6cKweCVvral8yhw==
4239+
42354240
csv-parser@^2.3.3:
42364241
version "2.3.5"
42374242
resolved "https://registry.yarnpkg.com/csv-parser/-/csv-parser-2.3.5.tgz#6b3bf0907684914ff2c5abfbadab111a69eae5db"

0 commit comments

Comments
 (0)