Skip to content

Commit 6c42ff9

Browse files
Hardcode list of "relevant" table names for ingestion from GitHub
The `airbyte-github` plugin by default imports 163 tables into Splitgraph, but we only need a few of them for the analytics queries we want to make in the demo app. So, hardcode the list of those, but also hardcode the list of all 163 tables for reference, and also the 43 tables that are imported given the relevant tables (because either they depend on them via a foreign key relationship, or they're an airbyte meta table). For the 43 tables, see this recent import of `splitgraph/seafowl`: * https://www.splitgraph.com/miles/github-import-splitgraph-seafowl/20230526-224723/-/tables This took 3 minutes and 40 seconds to import into Splitgraph.
1 parent 38f922e commit 6c42ff9

3 files changed

Lines changed: 256 additions & 14 deletions

File tree

examples/nextjs-import-airbyte-github-export-seafowl/components/ImportExportStepper/ExportPanel.tsx

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,7 @@ import { useStepper } from "./StepperContext";
44
import styles from "./ExportPanel.module.css";
55
import { ExportLoadingBars } from "./ExportLoadingBars";
66

7-
// TODO: don't hardcode this? or at least hardcode all of them and make it official
8-
const importedTableNames = [
9-
"stargazers",
10-
// NOTE: If we only specify stargazers, then stargazers_user is still included since it's a dependent table
11-
"stargazers_user",
12-
];
7+
import { relevantGitHubTableNames } from "../../lib/config";
138

149
export const ExportPanel = () => {
1510
const [
@@ -22,7 +17,7 @@ export const ExportPanel = () => {
2217
const response = await fetch("/api/start-export-to-seafowl", {
2318
method: "POST",
2419
body: JSON.stringify({
25-
tables: importedTableNames.map((tableName) => ({
20+
tables: relevantGitHubTableNames.map((tableName) => ({
2621
namespace: splitgraphNamespace,
2722
repository: splitgraphRepository,
2823
table: tableName,
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/**
2+
* List of GitHub table names that we want to import with the Airbyte connector
3+
* into Splitgraph. By default, there are 163 tables available. But we only want
4+
* some of them, and by selecting them explicitly, the import will be much faster,
5+
* especially for large repositories.
6+
*
7+
* Note that Airbyte will still import tables that depend on these tables due
8+
* to foreign keys, and will also import airbyte metaata tables.
9+
*/
10+
export const relevantGitHubTableNames = `commits
11+
comments
12+
pull_requests
13+
pull_request_stats
14+
issue_reactions`
15+
.split("\n")
16+
.filter((t) => !!t);
17+
18+
/**
19+
* List of "downstream" GitHub table names that will be imported by default by
20+
* the `airbyte-github` connector, given the list of `relevantGitHubTableNames`,
21+
* because they're either an Airbyte meta table or a table that depends on
22+
* one of the "relevant" tables.
23+
*
24+
* This is manually curated and might not be totally accurate. It's up to date
25+
* given the following list of `relevantGitHubTableNames`:
26+
*
27+
* ```
28+
* commits
29+
* comments
30+
* pull_requests
31+
* pull_request_stats
32+
* issue_reactions
33+
* ```
34+
*/
35+
export const expectedImportedTableNames = `_airbyte_raw_comments
36+
_airbyte_raw_commits
37+
_airbyte_raw_issue_reactions
38+
_airbyte_raw_pull_request_stats
39+
_airbyte_raw_pull_requests
40+
_sg_ingestion_state
41+
comments
42+
comments_user
43+
commits
44+
commits_author
45+
commits_commit
46+
commits_commit_author
47+
commits_commit_committer
48+
commits_commit_tree
49+
commits_commit_verification
50+
commits_committer
51+
commits_parents
52+
issue_reactions
53+
issue_reactions_user
54+
pull_request_stats
55+
pull_request_stats_merged_by
56+
pull_requests
57+
pull_requests__links
58+
pull_requests__links_comments
59+
pull_requests__links_commits
60+
pull_requests__links_html
61+
pull_requests__links_issue
62+
pull_requests__links_review_comment
63+
pull_requests__links_review_comments
64+
pull_requests__links_self
65+
pull_requests__links_statuses
66+
pull_requests_assignee
67+
pull_requests_assignees
68+
pull_requests_auto_merge
69+
pull_requests_auto_merge_enabled_by
70+
pull_requests_base
71+
pull_requests_head
72+
pull_requests_labels
73+
pull_requests_milestone
74+
pull_requests_milestone_creator
75+
pull_requests_requested_reviewers
76+
pull_requests_requested_teams
77+
pull_requests_user
78+
`;
79+
80+
/**
81+
* This is the list of all tables imported by Airbyte by default when no tables
82+
* are explicitly provided to the plugin.
83+
*
84+
* This is not consumed anywhere, but is useful for referencing, and if you'd
85+
* like to extend or modify the code, you can choose tables from here to include.
86+
*/
87+
export const allGitHubTableNames = `_airbyte_raw_assignees
88+
_airbyte_raw_branches
89+
_airbyte_raw_collaborators
90+
_airbyte_raw_comments
91+
_airbyte_raw_commit_comment_reactions
92+
_airbyte_raw_commit_comments
93+
_airbyte_raw_commits
94+
_airbyte_raw_deployments
95+
_airbyte_raw_events
96+
_airbyte_raw_issue_comment_reactions
97+
_airbyte_raw_issue_events
98+
_airbyte_raw_issue_labels
99+
_airbyte_raw_issue_milestones
100+
_airbyte_raw_issue_reactions
101+
_airbyte_raw_issues
102+
_airbyte_raw_organizations
103+
_airbyte_raw_project_cards
104+
_airbyte_raw_project_columns
105+
_airbyte_raw_projects
106+
_airbyte_raw_pull_request_comment_reactions
107+
_airbyte_raw_pull_request_commits
108+
_airbyte_raw_pull_request_stats
109+
_airbyte_raw_pull_requests
110+
_airbyte_raw_releases
111+
_airbyte_raw_repositories
112+
_airbyte_raw_review_comments
113+
_airbyte_raw_reviews
114+
_airbyte_raw_stargazers
115+
_airbyte_raw_tags
116+
_airbyte_raw_team_members
117+
_airbyte_raw_team_memberships
118+
_airbyte_raw_teams
119+
_airbyte_raw_users
120+
_airbyte_raw_workflow_jobs
121+
_airbyte_raw_workflow_runs
122+
_airbyte_raw_workflows
123+
_sg_ingestion_state
124+
assignees
125+
branches
126+
branches_commit
127+
branches_protection
128+
branches_protection_required_status_checks
129+
collaborators
130+
collaborators_permissions
131+
comments
132+
comments_user
133+
commit_comment_reactions
134+
commit_comment_reactions_user
135+
commit_comments
136+
commit_comments_user
137+
commits
138+
commits_author
139+
commits_commit
140+
commits_commit_author
141+
commits_commit_committer
142+
commits_commit_tree
143+
commits_commit_verification
144+
commits_committer
145+
commits_parents
146+
deployments
147+
deployments_creator
148+
events
149+
events_actor
150+
events_org
151+
events_repo
152+
issue_comment_reactions
153+
issue_comment_reactions_user
154+
issue_events
155+
issue_events_actor
156+
issue_events_issue
157+
issue_events_issue_user
158+
issue_labels
159+
issue_milestones
160+
issue_milestones_creator
161+
issue_reactions
162+
issue_reactions_user
163+
issues
164+
issues_assignee
165+
issues_assignees
166+
issues_labels
167+
issues_milestone
168+
issues_milestone_creator
169+
issues_pull_request
170+
issues_user
171+
organizations
172+
organizations_plan
173+
project_cards
174+
project_cards_creator
175+
project_columns
176+
projects
177+
projects_creator
178+
pull_request_comment_reactions
179+
pull_request_comment_reactions_user
180+
pull_request_commits
181+
pull_request_commits_author
182+
pull_request_commits_commit
183+
pull_request_commits_commit_author
184+
pull_request_commits_commit_committer
185+
pull_request_commits_commit_tree
186+
pull_request_commits_commit_verification
187+
pull_request_commits_committer
188+
pull_request_commits_parents
189+
pull_request_stats
190+
pull_request_stats_merged_by
191+
pull_requests
192+
pull_requests__links
193+
pull_requests__links_comments
194+
pull_requests__links_commits
195+
pull_requests__links_html
196+
pull_requests__links_issue
197+
pull_requests__links_review_comment
198+
pull_requests__links_review_comments
199+
pull_requests__links_self
200+
pull_requests__links_statuses
201+
pull_requests_assignee
202+
pull_requests_assignees
203+
pull_requests_auto_merge
204+
pull_requests_auto_merge_enabled_by
205+
pull_requests_base
206+
pull_requests_head
207+
pull_requests_labels
208+
pull_requests_milestone
209+
pull_requests_milestone_creator
210+
pull_requests_requested_reviewers
211+
pull_requests_requested_teams
212+
pull_requests_user
213+
releases
214+
releases_assets
215+
releases_author
216+
repositories
217+
repositories_license
218+
repositories_owner
219+
repositories_permissions
220+
review_comments
221+
review_comments__links
222+
review_comments__links_html
223+
review_comments__links_pull_request
224+
review_comments__links_self
225+
review_comments_user
226+
reviews
227+
reviews__links
228+
reviews__links_html
229+
reviews__links_pull_request
230+
reviews_user
231+
stargazers
232+
stargazers_user
233+
tags
234+
tags_commit
235+
team_members
236+
team_memberships
237+
teams
238+
users
239+
workflow_jobs
240+
workflow_jobs_steps
241+
workflow_runs
242+
workflow_runs_head_commit
243+
workflow_runs_head_commit_author
244+
workflow_runs_head_commit_committer
245+
workflow_runs_head_repository
246+
workflow_runs_head_repository_owner
247+
workflow_runs_repository
248+
workflow_runs_repository_owner
249+
workflows`;

examples/nextjs-import-airbyte-github-export-seafowl/pages/api/start-import-from-github.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
makeAuthenticatedSplitgraphDb,
44
claimsFromJWT,
55
} from "../../lib/backend/splitgraph-db";
6+
import { relevantGitHubTableNames } from "../../lib/config";
67

78
const GITHUB_PAT_SECRET = process.env.GITHUB_PAT_SECRET;
89

@@ -100,14 +101,11 @@ const startImport = async ({
100101
namespace: splitgraphNamespace,
101102
repository: splitgraphDestinationRepository,
102103
tables: [
103-
{
104-
name: "stargazers",
105-
options: {
106-
airbyte_cursor_field: ["starred_at"],
107-
airbyte_primary_key_field: [],
108-
},
104+
...relevantGitHubTableNames.map((t) => ({
105+
name: t,
106+
options: {},
109107
schema: [],
110-
},
108+
})),
111109
],
112110
},
113111
{ defer: true }

0 commit comments

Comments
 (0)