Skip to content

Commit 7db7880

Browse files
committed
feat: Support fetching distributed table metadata with cluster()
1 parent a36b350 commit 7db7880

5 files changed

Lines changed: 320 additions & 91 deletions

File tree

.changeset/tidy-phones-brake.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@hyperdx/common-utils": patch
3+
---
4+
5+
feat: Support fetching distributed table metadata with cluster()

packages/common-utils/src/__tests__/metadata.test.ts

Lines changed: 169 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,62 @@ describe('Metadata', () => {
192192
expect(result!.partition_key).toEqual('column1');
193193
});
194194

195+
it('should query via cluster() for Distributed table underlying metadata', async () => {
196+
const distributedMetadata = {
197+
database: 'test_db',
198+
name: 'dist_table',
199+
engine: 'Distributed',
200+
engine_full:
201+
"Distributed('my_cluster', 'test_db', 'local_table', rand())",
202+
partition_key: '',
203+
sorting_key: '',
204+
primary_key: '',
205+
sampling_key: '',
206+
create_table_query: 'CREATE TABLE test_db.dist_table ...',
207+
};
208+
209+
const localMetadata = {
210+
database: 'test_db',
211+
name: 'local_table',
212+
engine: 'MergeTree',
213+
engine_full: 'MergeTree() ORDER BY id',
214+
partition_key: 'toYYYYMM(timestamp)',
215+
sorting_key: 'id, timestamp',
216+
primary_key: 'id',
217+
sampling_key: '',
218+
create_table_query: 'CREATE TABLE test_db.local_table ...',
219+
};
220+
221+
let callCount = 0;
222+
(mockClickhouseClient.query as jest.Mock).mockImplementation(() => {
223+
callCount++;
224+
return Promise.resolve({
225+
json: jest.fn().mockResolvedValue({
226+
data: [callCount === 1 ? distributedMetadata : localMetadata],
227+
}),
228+
});
229+
});
230+
231+
const result = await metadata.getTableMetadata({
232+
databaseName: 'test_db',
233+
tableName: 'dist_table',
234+
connectionId: 'test_connection',
235+
});
236+
237+
// Two queries: one for the distributed table, one via cluster() for the local table
238+
expect(callCount).toBe(2);
239+
expect(result!.engine).toBe('MergeTree');
240+
expect(result!.sorting_key).toBe('id, timestamp');
241+
expect(result!.create_local_table_query).toBe(
242+
'CREATE TABLE test_db.local_table ...',
243+
);
244+
// The second query should use cluster() - verify it references system.tables via cluster
245+
const secondQuery = (mockClickhouseClient.query as jest.Mock).mock
246+
.calls[1][0].query;
247+
expect(secondQuery).toContain('cluster(');
248+
expect(secondQuery).toContain('system.tables');
249+
});
250+
195251
it('should use the cache when retrieving table metadata', async () => {
196252
// Setup the mock implementation
197253
mockCache.getOrFetch.mockReset();
@@ -206,7 +262,7 @@ describe('Metadata', () => {
206262

207263
// Setup the cache to return the mock data
208264
mockCache.getOrFetch.mockImplementation((key, queryFn) => {
209-
if (key === 'test_connection.test_db.test_table.metadata') {
265+
if (key === 'test_connection.test_db.test_table.undefined.metadata') {
210266
return Promise.resolve(mockTableMetadata);
211267
}
212268
return queryFn();
@@ -220,7 +276,7 @@ describe('Metadata', () => {
220276

221277
// Verify the cache was called with the right key
222278
expect(mockCache.getOrFetch).toHaveBeenCalledWith(
223-
'test_connection.test_db.test_table.metadata',
279+
'test_connection.test_db.test_table.undefined.metadata',
224280
expect.any(Function),
225281
);
226282

@@ -232,6 +288,117 @@ describe('Metadata', () => {
232288
});
233289
});
234290

291+
describe('getSkipIndices', () => {
292+
beforeEach(() => {
293+
mockCache.getOrFetch.mockImplementation((key, queryFn) => queryFn());
294+
});
295+
296+
it('should query via cluster() for Distributed table skip indices', async () => {
297+
const distributedMetadata = {
298+
database: 'test_db',
299+
name: 'dist_table',
300+
engine: 'Distributed',
301+
engine_full:
302+
"Distributed('my_cluster', 'test_db', 'local_table', rand())",
303+
create_table_query: 'CREATE TABLE test_db.dist_table ...',
304+
};
305+
306+
const skipIndicesData = [
307+
{
308+
name: 'idx_body',
309+
type: 'tokenbf_v1',
310+
typeFull: "tokenbf_v1(tokenizer='splitByNonAlpha')",
311+
expression: 'tokens(lower(Body))',
312+
granularity: '1',
313+
},
314+
];
315+
316+
let callCount = 0;
317+
(mockClickhouseClient.query as jest.Mock).mockImplementation(() => {
318+
callCount++;
319+
return Promise.resolve({
320+
json: jest.fn().mockResolvedValue({
321+
data: callCount === 1 ? [distributedMetadata] : skipIndicesData,
322+
}),
323+
});
324+
});
325+
326+
const result = await metadata.getSkipIndices({
327+
databaseName: 'test_db',
328+
tableName: 'dist_table',
329+
connectionId: 'test_connection',
330+
});
331+
332+
// Two queries: one for table metadata, one via cluster() for skip indices
333+
expect(callCount).toBe(2);
334+
expect(result).toEqual([
335+
{
336+
name: 'idx_body',
337+
type: 'tokenbf_v1',
338+
typeFull: "tokenbf_v1(tokenizer='splitByNonAlpha')",
339+
expression: 'tokens(lower(Body))',
340+
granularity: 1,
341+
},
342+
]);
343+
// The second query should use cluster() for system.data_skipping_indices
344+
const secondQuery = (mockClickhouseClient.query as jest.Mock).mock
345+
.calls[1][0].query;
346+
expect(secondQuery).toContain('cluster(');
347+
expect(secondQuery).toContain('system.data_skipping_indices');
348+
});
349+
350+
it('should query local system.data_skipping_indices for non-Distributed tables', async () => {
351+
const mergeTreeMetadata = {
352+
database: 'test_db',
353+
name: 'local_table',
354+
engine: 'MergeTree',
355+
engine_full: 'MergeTree() ORDER BY id',
356+
};
357+
358+
const skipIndicesData = [
359+
{
360+
name: 'idx_body',
361+
type: 'tokenbf_v1',
362+
typeFull: "tokenbf_v1(tokenizer='splitByNonAlpha')",
363+
expression: 'tokens(lower(Body))',
364+
granularity: '1',
365+
},
366+
];
367+
368+
let callCount = 0;
369+
(mockClickhouseClient.query as jest.Mock).mockImplementation(() => {
370+
callCount++;
371+
return Promise.resolve({
372+
json: jest.fn().mockResolvedValue({
373+
data: callCount === 1 ? [mergeTreeMetadata] : skipIndicesData,
374+
}),
375+
});
376+
});
377+
378+
const result = await metadata.getSkipIndices({
379+
databaseName: 'test_db',
380+
tableName: 'local_table',
381+
connectionId: 'test_connection',
382+
});
383+
384+
expect(callCount).toBe(2);
385+
expect(result).toEqual([
386+
{
387+
name: 'idx_body',
388+
type: 'tokenbf_v1',
389+
typeFull: "tokenbf_v1(tokenizer='splitByNonAlpha')",
390+
expression: 'tokens(lower(Body))',
391+
granularity: 1,
392+
},
393+
]);
394+
// Should NOT use cluster() for non-Distributed tables
395+
const secondQuery = (mockClickhouseClient.query as jest.Mock).mock
396+
.calls[1][0].query;
397+
expect(secondQuery).not.toContain('cluster(');
398+
expect(secondQuery).toContain('system.data_skipping_indices');
399+
});
400+
});
401+
235402
describe('getKeyValues', () => {
236403
const mockChartConfig: BuilderChartConfigWithDateRange = {
237404
from: {

packages/common-utils/src/__tests__/utils.test.ts

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ import {
1717
findJsonExpressions,
1818
formatDate,
1919
getAlignedDateRange,
20+
getDistributedTableArgs,
2021
getFirstOrderingItem,
21-
getLocalTableFromDistributedTable,
2222
isFirstOrderByAscending,
2323
isJsonExpression,
2424
isTimestampExpressionInFirstOrderBy,
@@ -30,7 +30,6 @@ import {
3030
replaceJsonExpressions,
3131
splitAndTrimCSV,
3232
splitAndTrimWithBracket,
33-
TextIndexTokenizer,
3433
} from '../core/utils';
3534

3635
describe('utils', () => {
@@ -1996,61 +1995,85 @@ describe('utils', () => {
19961995
({ engine_full: engineFull }) as any;
19971996

19981997
it('parses a simple Distributed engine_full', () => {
1999-
const result = getLocalTableFromDistributedTable(
1998+
const result = getDistributedTableArgs(
20001999
makeMetadata("Distributed('default', 'mydb', 'local_table', rand())"),
20012000
);
2002-
expect(result).toEqual({ database: 'mydb', table: 'local_table' });
2001+
expect(result).toEqual({
2002+
cluster: 'default',
2003+
database: 'mydb',
2004+
table: 'local_table',
2005+
});
20032006
});
20042007

20052008
it('parses without a sharding key', () => {
2006-
const result = getLocalTableFromDistributedTable(
2009+
const result = getDistributedTableArgs(
20072010
makeMetadata("Distributed('cluster', 'db', 'tbl')"),
20082011
);
2009-
expect(result).toEqual({ database: 'db', table: 'tbl' });
2012+
expect(result).toEqual({
2013+
cluster: 'cluster',
2014+
database: 'db',
2015+
table: 'tbl',
2016+
});
20102017
});
20112018

20122019
it('handles double-quoted identifiers', () => {
2013-
const result = getLocalTableFromDistributedTable(
2020+
const result = getDistributedTableArgs(
20142021
makeMetadata('Distributed("cluster", "my_database", "my_table")'),
20152022
);
2016-
expect(result).toEqual({ database: 'my_database', table: 'my_table' });
2023+
expect(result).toEqual({
2024+
cluster: 'cluster',
2025+
database: 'my_database',
2026+
table: 'my_table',
2027+
});
20172028
});
20182029

20192030
it('handles backtick-quoted identifiers', () => {
2020-
const result = getLocalTableFromDistributedTable(
2031+
const result = getDistributedTableArgs(
20212032
makeMetadata("Distributed('cluster', `mydb`, `local_tbl`, rand())"),
20222033
);
2023-
expect(result).toEqual({ database: 'mydb', table: 'local_tbl' });
2034+
expect(result).toEqual({
2035+
cluster: 'cluster',
2036+
database: 'mydb',
2037+
table: 'local_tbl',
2038+
});
20242039
});
20252040

20262041
it('handles unquoted identifiers', () => {
2027-
const result = getLocalTableFromDistributedTable(
2042+
const result = getDistributedTableArgs(
20282043
makeMetadata('Distributed(cluster, mydb, local_tbl, rand())'),
20292044
);
2030-
expect(result).toEqual({ database: 'mydb', table: 'local_tbl' });
2045+
expect(result).toEqual({
2046+
cluster: 'cluster',
2047+
database: 'mydb',
2048+
table: 'local_tbl',
2049+
});
20312050
});
20322051

20332052
it('returns undefined when engine_full has fewer than 3 args', () => {
2034-
const result = getLocalTableFromDistributedTable(
2053+
const result = getDistributedTableArgs(
20352054
makeMetadata("Distributed('cluster', 'db')"),
20362055
);
20372056
expect(result).toBeUndefined();
20382057
});
20392058

20402059
it('returns undefined when engine_full does not match Distributed pattern', () => {
2041-
const result = getLocalTableFromDistributedTable(
2060+
const result = getDistributedTableArgs(
20422061
makeMetadata('MergeTree() ORDER BY id'),
20432062
);
20442063
expect(result).toBeUndefined();
20452064
});
20462065

20472066
it('handles a complex sharding expression with nested parentheses', () => {
2048-
const result = getLocalTableFromDistributedTable(
2067+
const result = getDistributedTableArgs(
20492068
makeMetadata(
20502069
"Distributed('cluster', 'db', 'tbl', sipHash64(UserID, EventDate))",
20512070
),
20522071
);
2053-
expect(result).toEqual({ database: 'db', table: 'tbl' });
2072+
expect(result).toEqual({
2073+
cluster: 'cluster',
2074+
database: 'db',
2075+
table: 'tbl',
2076+
});
20542077
});
20552078
});
20562079
});

0 commit comments

Comments
 (0)