Skip to content

Commit f9db15b

Browse files
Export analytics queries to Seafowl in addition to tables
Export queries to tables `monthly_user_stats` and `monthly_issue_stats` in the same schema/namespace as the tables. We also export the tables, or at least the few that we explicitly asked to import.
1 parent 8a3d36c commit f9db15b

8 files changed

Lines changed: 338 additions & 65 deletions

File tree

examples/nextjs-import-airbyte-github-export-seafowl/components/ImportExportStepper/ExportLoadingBars.tsx

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@ export const ExportLoadingBars = () => {
77

88
return (
99
<div className={styles.exportLoadingBars}>
10-
{Array.from(exportedTablesLoading).map(({ tableName, taskId }) => (
11-
<ExportTableLoadingBar
12-
key={taskId}
13-
tableName={tableName}
14-
taskId={taskId}
15-
/>
16-
))}
10+
{Array.from(exportedTablesLoading).map(
11+
({ destinationSchema, destinationTable, sourceQuery, taskId }) => (
12+
<ExportTableLoadingBar
13+
key={taskId}
14+
destinationSchema={destinationSchema}
15+
destinationTable={destinationTable}
16+
sourceQuery={sourceQuery}
17+
taskId={taskId}
18+
/>
19+
)
20+
)}
1721
</div>
1822
);
1923
};

examples/nextjs-import-airbyte-github-export-seafowl/components/ImportExportStepper/ExportPanel.tsx

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,48 +4,92 @@ import { useStepper } from "./StepperContext";
44
import styles from "./ExportPanel.module.css";
55
import { ExportLoadingBars } from "./ExportLoadingBars";
66

7-
import { relevantGitHubTableNames } from "../../lib/config/github-tables";
7+
import { relevantGitHubTableNamesForImport } from "../../lib/config/github-tables";
8+
import { makeQueriesToExport } from "../../lib/config/queries-to-export";
9+
import type {
10+
ExportQueryInput,
11+
ExportTableInput,
12+
StartExportToSeafowlRequestShape,
13+
StartExportToSeafowlResponseData,
14+
} from "../../pages/api/start-export-to-seafowl";
15+
import { useMemo, useCallback } from "react";
816

917
export const ExportPanel = () => {
1018
const [
1119
{ stepperState, exportError, splitgraphRepository, splitgraphNamespace },
1220
dispatch,
1321
] = useStepper();
1422

15-
const handleStartExport = async () => {
23+
const queriesToExport = useMemo<ExportQueryInput[]>(
24+
() =>
25+
makeQueriesToExport({
26+
splitgraphSourceRepository: splitgraphRepository,
27+
splitgraphSourceNamespace: splitgraphNamespace,
28+
seafowlDestinationSchema: `${splitgraphNamespace}/${splitgraphRepository}`,
29+
}),
30+
[splitgraphRepository, splitgraphNamespace]
31+
);
32+
33+
const tablesToExport = useMemo<ExportTableInput[]>(
34+
() =>
35+
relevantGitHubTableNamesForImport.map((tableName) => ({
36+
namespace: splitgraphNamespace,
37+
repository: splitgraphRepository,
38+
table: tableName,
39+
})),
40+
[
41+
splitgraphNamespace,
42+
splitgraphRepository,
43+
relevantGitHubTableNamesForImport,
44+
]
45+
);
46+
47+
const handleStartExport = useCallback(async () => {
1648
try {
1749
const response = await fetch("/api/start-export-to-seafowl", {
1850
method: "POST",
1951
body: JSON.stringify({
20-
tables: relevantGitHubTableNames.map((tableName) => ({
21-
namespace: splitgraphNamespace,
22-
repository: splitgraphRepository,
23-
table: tableName,
24-
})),
25-
}),
52+
tables: tablesToExport,
53+
queries: queriesToExport,
54+
} as StartExportToSeafowlRequestShape),
2655
headers: {
2756
"Content-Type": "application/json",
2857
},
2958
});
30-
const data = await response.json();
59+
const data = (await response.json()) as StartExportToSeafowlResponseData;
60+
61+
if ("error" in data && data["error"]) {
62+
throw new Error(data["error"]);
63+
}
3164

32-
if (!data.tables || !data.tables.length) {
65+
if (!("tables" in data) || !("queries" in data)) {
3366
throw new Error("Response missing tables");
3467
}
3568

3669
dispatch({
3770
type: "start_export",
38-
tables: data.tables.map(
39-
({ tableName, taskId }: { tableName: string; taskId: string }) => ({
40-
taskId,
41-
tableName,
42-
})
43-
),
71+
tables: [
72+
...data["queries"].map(
73+
({ sourceQuery, taskId, destinationSchema, destinationTable }) => ({
74+
taskId,
75+
destinationTable,
76+
destinationSchema,
77+
sourceQuery,
78+
})
79+
),
80+
...data["tables"].map(
81+
({ destinationTable, destinationSchema, taskId }) => ({
82+
taskId,
83+
destinationTable,
84+
destinationSchema,
85+
})
86+
),
87+
],
4488
});
4589
} catch (error) {
4690
dispatch({ type: "export_error", error: error.message });
4791
}
48-
};
92+
}, [queriesToExport, tablesToExport, dispatch]);
4993

5094
return (
5195
<div className={styles.exportPanel}>

examples/nextjs-import-airbyte-github-export-seafowl/components/ImportExportStepper/ExportTableLoadingBar.tsx

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,28 @@ import { useStepper } from "./StepperContext";
33
import styles from "./ExportTableLoadingBar.module.css";
44

55
interface ExportTableLoadingBarProps {
6-
tableName: string;
6+
destinationTable: string;
7+
destinationSchema: string;
8+
sourceQuery?: string;
79
taskId: string;
810
}
911

1012
export const ExportTableLoadingBar = ({
11-
tableName,
13+
destinationTable,
14+
destinationSchema,
15+
sourceQuery,
1216
taskId,
1317
}: React.PropsWithoutRef<ExportTableLoadingBarProps>) => {
1418
const [{ stepperState, exportedTablesLoading }, dispatch] = useStepper();
1519

1620
useEffect(() => {
17-
if (!taskId || !tableName) {
18-
console.log("Don't check export until we have taskId and tableName");
21+
if (!taskId || !destinationTable) {
22+
console.log(
23+
"Don't check export until we have taskId and destinationTable"
24+
);
1925
console.table({
2026
taskId,
21-
tableName,
27+
destinationTable,
2228
});
2329
return;
2430
}
@@ -44,7 +50,12 @@ export const ExportTableLoadingBar = ({
4450
if (data.completed) {
4551
dispatch({
4652
type: "export_table_task_complete",
47-
completedTable: { tableName, taskId },
53+
completedTable: {
54+
destinationTable,
55+
taskId,
56+
destinationSchema,
57+
sourceQuery,
58+
},
4859
});
4960
} else if (data.error) {
5061
if (!data.completed) {
@@ -56,14 +67,14 @@ export const ExportTableLoadingBar = ({
5667
} catch (error) {
5768
dispatch({
5869
type: "export_error",
59-
error: `Error exporting ${tableName}: ${error.message}`,
70+
error: `Error exporting ${destinationTable}: ${error.message}`,
6071
});
6172
}
6273
};
6374

6475
const interval = setInterval(pollExportTask, 3000);
6576
return () => clearInterval(interval);
66-
}, [stepperState, tableName, taskId, dispatch]);
77+
}, [stepperState, destinationTable, taskId, dispatch]);
6778

6879
const isLoading = !!Array.from(exportedTablesLoading).find(
6980
(t) => t.taskId === taskId
@@ -73,10 +84,10 @@ export const ExportTableLoadingBar = ({
7384
<div className={styles.exportTableLoadingBar}>
7485
<div className={styles.loadingBar}>
7586
{isLoading
76-
? `Loading ${tableName}...`
77-
: `Successfully exported ${tableName}`}
87+
? `Loading ${destinationTable}...`
88+
: `Successfully exported ${destinationTable}`}
7889
</div>
79-
<div className={styles.tableName}>{tableName}</div>
90+
<div className={styles.destinationTable}>{destinationTable}</div>
8091
</div>
8192
);
8293
};

examples/nextjs-import-airbyte-github-export-seafowl/components/ImportExportStepper/stepper-states.ts

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@ import { ParsedUrlQuery } from "querystring";
33
import { useEffect, useReducer } from "react";
44
export type GitHubRepository = { namespace: string; repository: string };
55

6-
type ExportTable = { tableName: string; taskId: string };
6+
type ExportTable = {
7+
destinationSchema: string;
8+
destinationTable: string;
9+
taskId: string;
10+
sourceQuery?: string;
11+
};
712

813
export type StepperState = {
914
stepperState:
@@ -210,8 +215,18 @@ const stepperReducer = (
210215
const exportedTablesLoading = new Set<ExportTable>();
211216
const exportedTablesCompleted = new Set<ExportTable>();
212217

213-
for (const { tableName, taskId } of tables) {
214-
exportedTablesLoading.add({ tableName, taskId });
218+
for (const {
219+
destinationTable,
220+
destinationSchema,
221+
sourceQuery,
222+
taskId,
223+
} of tables) {
224+
exportedTablesLoading.add({
225+
destinationTable,
226+
destinationSchema,
227+
sourceQuery,
228+
taskId,
229+
});
215230
}
216231

217232
return {

examples/nextjs-import-airbyte-github-export-seafowl/lib/config/github-tables.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Note that Airbyte will still import tables that depend on these tables due
88
* to foreign keys, and will also import airbyte metaata tables.
99
*/
10-
export const relevantGitHubTableNames = `commits
10+
export const relevantGitHubTableNamesForImport = `commits
1111
comments
1212
pull_requests
1313
pull_request_stats
@@ -17,12 +17,12 @@ issue_reactions`
1717

1818
/**
1919
* List of "downstream" GitHub table names that will be imported by default by
20-
* the `airbyte-github` connector, given the list of `relevantGitHubTableNames`,
20+
* the `airbyte-github` connector, given the list of `relevantGitHubTableNamesForImport`,
2121
* because they're either an Airbyte meta table or a table that depends on
2222
* one of the "relevant" tables.
2323
*
2424
* This is manually curated and might not be totally accurate. It's up to date
25-
* given the following list of `relevantGitHubTableNames`:
25+
* given the following list of `relevantGitHubTableNamesForImport`:
2626
*
2727
* ```
2828
* commits
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/**
2+
* Return a a list of queries to export from Splitgraph to Seafowl, given the
3+
* source repository (where the GitHub data was imported into), and the destination
4+
* schema (where the data will be exported to at Seafowl).
5+
*/
6+
export const makeQueriesToExport = ({
7+
splitgraphSourceRepository,
8+
splitgraphSourceNamespace,
9+
seafowlDestinationSchema,
10+
splitgraphSourceImageHashOrTag = "latest",
11+
}: {
12+
splitgraphSourceNamespace: string;
13+
splitgraphSourceRepository: string;
14+
seafowlDestinationSchema: string;
15+
splitgraphSourceImageHashOrTag?: string;
16+
}): {
17+
sourceQuery: string;
18+
destinationSchema: string;
19+
destinationTable: string;
20+
}[] => [
21+
{
22+
destinationSchema: seafowlDestinationSchema,
23+
destinationTable: "monthly_user_stats",
24+
sourceQuery: `
25+
WITH
26+
27+
commits AS (
28+
SELECT
29+
date_trunc('month', created_at) AS created_at_month,
30+
author->>'login' AS username,
31+
count(*) as no_commits
32+
FROM "${splitgraphSourceNamespace}/${splitgraphSourceRepository}:${splitgraphSourceImageHashOrTag}".commits
33+
GROUP BY 1, 2
34+
),
35+
36+
comments AS (
37+
SELECT
38+
date_trunc('month', created_at) AS created_at_month,
39+
"user"->>'login' AS username,
40+
count(*) filter (where exists(select regexp_matches(issue_url, '.*/pull/.*'))) as no_pull_request_comments,
41+
count(*) filter (where exists(select regexp_matches(issue_url, '.*/issue/.*'))) as no_issue_comments,
42+
sum(length(body)) as total_comment_length
43+
FROM "${splitgraphSourceNamespace}/${splitgraphSourceRepository}:${splitgraphSourceImageHashOrTag}".comments
44+
GROUP BY 1, 2
45+
),
46+
47+
pull_requests AS (
48+
WITH pull_request_creator AS (
49+
SELECT id, "user"->>'login' AS username
50+
FROM "${splitgraphSourceNamespace}/${splitgraphSourceRepository}:${splitgraphSourceImageHashOrTag}".pull_requests
51+
)
52+
53+
SELECT
54+
date_trunc('month', updated_at) AS created_at_month,
55+
username,
56+
count(*) filter (where merged = true) AS merged_pull_requests,
57+
count(*) AS total_pull_requests,
58+
sum(additions::integer) filter (where merged = true) AS lines_added,
59+
sum(deletions::integer) filter (where merged = true) AS lines_deleted
60+
FROM "${splitgraphSourceNamespace}/${splitgraphSourceRepository}:${splitgraphSourceImageHashOrTag}".pull_request_stats
61+
INNER JOIN pull_request_creator USING (id)
62+
GROUP BY 1, 2
63+
),
64+
65+
all_months_users AS (
66+
SELECT DISTINCT created_at_month, username FROM commits
67+
UNION SELECT DISTINCT created_at_month, username FROM comments
68+
UNION SELECT DISTINCT created_at_month, username FROM pull_requests
69+
),
70+
71+
user_stats AS (
72+
SELECT
73+
amu.created_at_month,
74+
amu.username,
75+
COALESCE(cmt.no_commits, 0) AS no_commits,
76+
COALESCE(cmnt.no_pull_request_comments, 0) AS no_pull_request_comments,
77+
COALESCE(cmnt.no_issue_comments, 0) AS no_issue_comments,
78+
COALESCE(cmnt.total_comment_length, 0) AS total_comment_length,
79+
COALESCE(pr.merged_pull_requests, 0) AS merged_pull_requests,
80+
COALESCE(pr.total_pull_requests, 0) AS total_pull_requests,
81+
COALESCE(pr.lines_added, 0) AS lines_added,
82+
COALESCE(pr.lines_deleted, 0) AS lines_deleted
83+
84+
FROM all_months_users amu
85+
LEFT JOIN commits cmt ON amu.created_at_month = cmt.created_at_month AND amu.username = cmt.username
86+
LEFT JOIN comments cmnt ON amu.created_at_month = cmnt.created_at_month AND amu.username = cmnt.username
87+
LEFT JOIN pull_requests pr ON amu.created_at_month = pr.created_at_month AND amu.username = pr.username
88+
89+
ORDER BY created_at_month ASC, username ASC
90+
)
91+
92+
SELECT * FROM user_stats;
93+
`,
94+
},
95+
{
96+
destinationSchema: seafowlDestinationSchema,
97+
destinationTable: "monthly_issue_stats",
98+
sourceQuery: `
99+
SELECT
100+
issue_number,
101+
date_trunc('month', created_at::TIMESTAMP) as created_at_month,
102+
COUNT(*) AS total_reacts,
103+
COUNT(*) FILTER (WHERE content = '+1') AS no_plus_one,
104+
COUNT(*) FILTER (WHERE content = '-1') AS no_minus_one,
105+
COUNT(*) FILTER (WHERE content = 'laugh') AS no_laugh,
106+
COUNT(*) FILTER (WHERE content = 'confused') AS no_confused,
107+
COUNT(*) FILTER (WHERE content = 'heart') AS no_heart,
108+
COUNT(*) FILTER (WHERE content = 'hooray') AS no_hooray,
109+
COUNT(*) FILTER (WHERE content = 'rocket') AS no_rocket,
110+
COUNT(*) FILTER (WHERE content = 'eyes') AS no_eyes
111+
FROM
112+
"${splitgraphSourceNamespace}/${splitgraphSourceRepository}:${splitgraphSourceImageHashOrTag}"."issue_reactions"
113+
GROUP BY 1, 2 ORDER BY 2, 3 DESC;
114+
`,
115+
},
116+
];

0 commit comments

Comments
 (0)