Skip to content

Commit b253e10

Browse files
Add backend config and API routes for starting, awaiting import task
1 parent cff4b4e commit b253e10

9 files changed

Lines changed: 355 additions & 1 deletion

File tree

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# IMPORTANT: Put your own values in `.env.local` (a git-ignored file) when running this locally
2+
# Configure them in Vercel settings when running in production
3+
# This file is mostly to show which variables exist, since it's the only one checked into the repo.
4+
# SEE: https://nextjs.org/docs/app/building-your-application/configuring/environment-variables
5+
6+
# Create your own API key and secret: https://www.splitgraph.com/connect
7+
SPLITGRAPH_API_KEY="********************************"
8+
SPLITGRAPH_API_SECRET="********************************"
9+
10+
# Create a GitHub token that can query the repositories you want to connect
11+
# For example, a token with read-only access to public repos is sufficient
12+
# CREATE ONE HERE: https://github.com/settings/personal-access-tokens/new
13+
GITHUB_PAT_SECRET="github_pat_**********************_***********************************************************"
14+
15+
# OPTIONAL: Set this environment variable to a proxy address to capture requests from API routes
16+
# e.g. To intercept requests to Splitgraph API sent from madatdata libraries in API routes
17+
# You can also set this by running: yarn dev-mitm (see package.json)
18+
# MITMPROXY_ADDRESS="http://localhost:7979"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
.next
2+
.env.local
3+
.env.*.local
4+
!.env.test.local
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
namespace NodeJS {
2+
interface ProcessEnv {
3+
/**
4+
* The API key of an existing Splitgraph account.
5+
*
6+
* This should be defined in `.env.local` (a git-ignored file) or in Vercel settings.
7+
*
8+
* Get credentials: https://www.splitgraph.com/connect
9+
*/
10+
SPLITGRAPH_API_KEY: string;
11+
12+
/**
13+
* The API secret of an existing Splitgraph account.
14+
*
15+
* This should be defined in `.env.local` (a git-ignored file) or in Vercel settings.
16+
*
17+
* Get credentials: https://www.splitgraph.com/connect
18+
*/
19+
SPLITGRAPH_API_SECRET: string;
20+
21+
/**
22+
* A GitHub personal access token that can be used for importing repositories.
23+
* It will be passed to the Airbyte connector that runs on Splitgraph servers
24+
* and ingests data from GitHub into Splitgraph.
25+
*
26+
* This should be defined in `.env.local` (a git-ignored file) or in Vercel settings.
27+
*
28+
* Create one here: https://github.com/settings/personal-access-tokens/new
29+
*/
30+
GITHUB_PAT_SECRET: string;
31+
32+
/**
33+
* Optional environment variable containing the address of a proxy instance
34+
* through which to forward requests from API routes. See next.config.js
35+
* for where it's setup.
36+
*
37+
* This is useful for debugging and development.
38+
*/
39+
MITMPROXY_ADDRESS?: string;
40+
}
41+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import { makeSplitgraphDb } from "@madatdata/core";
2+
3+
// TODO: fix plugin exports
4+
import { makeDefaultPluginList } from "@madatdata/db-splitgraph";
5+
import { defaultSplitgraphHost } from "@madatdata/core";
6+
7+
const SPLITGRAPH_API_KEY = process.env.SPLITGRAPH_API_KEY;
8+
const SPLITGRAPH_API_SECRET = process.env.SPLITGRAPH_API_SECRET;
9+
10+
if (!SPLITGRAPH_API_KEY || !SPLITGRAPH_API_SECRET) {
11+
throw new Error(
12+
"Environment variable SPLITGRAPH_API_KEY or SPLITGRAPH_API_SECRET is not set." +
13+
" See env-vars.d.ts for instructions."
14+
);
15+
}
16+
17+
const authenticatedCredential: Parameters<
18+
typeof makeSplitgraphDb
19+
>[0]["authenticatedCredential"] = {
20+
apiKey: SPLITGRAPH_API_KEY,
21+
apiSecret: SPLITGRAPH_API_SECRET,
22+
anonymous: false,
23+
};
24+
25+
// TODO: The access token can expire and silently fail?
26+
27+
export const makeAuthenticatedSplitgraphDb = () =>
28+
makeSplitgraphDb({
29+
authenticatedCredential,
30+
plugins: makeDefaultPluginList({
31+
graphqlEndpoint: defaultSplitgraphHost.baseUrls.gql,
32+
authenticatedCredential,
33+
}),
34+
});
35+
36+
// TODO: export this utility function from the library
37+
export const claimsFromJWT = (jwt?: string) => {
38+
if (!jwt) {
39+
return {};
40+
}
41+
42+
const [_header, claims, _signature] = jwt
43+
.split(".")
44+
.map(fromBase64)
45+
.slice(0, -1) // Signature is not parseable JSON
46+
.map((o) => JSON.parse(o));
47+
48+
return claims;
49+
};
50+
51+
const fromBase64 = (input: string) =>
52+
!!globalThis.Buffer ? Buffer.from(input, "base64").toString() : atob(input);
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
const { ProxyAgent, setGlobalDispatcher } = require("undici");
2+
3+
// If running `yarn dev-mitm`, then setup the proxy with MITMPROXY_ADDRESS
4+
// NOTE(FIXME): not all madatdata requests get sent through here for some reason
5+
const setupProxy = () => {
6+
if (!process.env.MITMPROXY_ADDRESS) {
7+
return;
8+
}
9+
10+
const MITM = process.env.MITMPROXY_ADDRESS;
11+
12+
console.log("MITM SETUP:", MITM);
13+
14+
if (!process.env.GLOBAL_AGENT_HTTP_PROXY) {
15+
process.env["GLOBAL_AGENT_HTTP_PROXY"] = MITM;
16+
}
17+
18+
process.env["NODE_TLS_REJECT_UNAUTHORIZED"] = "0";
19+
20+
const mitmProxyOpts = {
21+
uri: MITM,
22+
connect: {
23+
rejectUnauthorized: false,
24+
requestCert: false,
25+
},
26+
};
27+
28+
setGlobalDispatcher(new ProxyAgent(mitmProxyOpts));
29+
};
30+
31+
setupProxy();
32+
33+
module.exports = {};

examples/nextjs-import-airbyte-github-export-seafowl/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"private": true,
33
"scripts": {
44
"dev": "yarn next",
5+
"dev-mitm": "MITMPROXY_ADDRESS=http://localhost:7979 yarn next",
56
"build": "yarn next build",
67
"start": "yarn next start"
78
},
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import type { NextApiRequest, NextApiResponse } from "next";
2+
import { makeAuthenticatedSplitgraphDb } from "../../lib-backend/splitgraph-db";
3+
import type { DeferredSplitgraphImportTask } from "@madatdata/db-splitgraph/plugins/importers/splitgraph-base-import-plugin";
4+
5+
type ResponseData =
6+
| {
7+
completed: boolean;
8+
jobStatus: DeferredSplitgraphImportTask["response"]["jobStatus"];
9+
}
10+
| { error: string; completed: false };
11+
12+
/**
13+
* To manually send a request, example:
14+
15+
```bash
16+
curl -i \
17+
-H "Content-Type: application/json" http://localhost:3000/api/await-import-from-github \
18+
-d '{ "taskId": "xxxx", "splitgraphNamespace": "xxx", "splitgraphRepo": "yyy" }'
19+
```
20+
*/
21+
export default async function handler(
22+
req: NextApiRequest,
23+
res: NextApiResponse<ResponseData>
24+
) {
25+
const missing = [
26+
"taskId",
27+
"splitgraphNamespace",
28+
"splitgraphRepository",
29+
].filter((expKey) => !req.body[expKey]);
30+
if (missing.length > 0) {
31+
res.status(400).json({
32+
error: `Missing required keys: ${missing.join(", ")}`,
33+
completed: false,
34+
});
35+
return;
36+
}
37+
38+
const { taskId, splitgraphNamespace, splitgraphRepository } = req.body;
39+
40+
try {
41+
const maybeCompletedTask = await pollImport({
42+
splitgraphTaskId: taskId,
43+
splitgraphDestinationNamespace: splitgraphNamespace,
44+
splitgraphDestinationRepository: splitgraphRepository,
45+
});
46+
47+
if (maybeCompletedTask.error) {
48+
throw new Error(JSON.stringify(maybeCompletedTask.error));
49+
}
50+
51+
res.status(200).json(maybeCompletedTask);
52+
return;
53+
} catch (err) {
54+
res.status(400).json({
55+
error: err.message,
56+
completed: false,
57+
});
58+
return;
59+
}
60+
}
61+
62+
const pollImport = async ({
63+
splitgraphTaskId,
64+
splitgraphDestinationNamespace,
65+
splitgraphDestinationRepository,
66+
}: {
67+
splitgraphDestinationNamespace: string;
68+
splitgraphDestinationRepository: string;
69+
splitgraphTaskId: string;
70+
}) => {
71+
const db = makeAuthenticatedSplitgraphDb();
72+
73+
// NOTE: We must call this, or else requests will fail silently
74+
await db.fetchAccessToken();
75+
76+
const maybeCompletedTask = (await db.pollDeferredTask("csv", {
77+
taskId: splitgraphTaskId,
78+
namespace: splitgraphDestinationNamespace,
79+
repository: splitgraphDestinationRepository,
80+
})) as DeferredSplitgraphImportTask;
81+
82+
// NOTE: We do not include the jobLog, in case it could leak the GitHub PAT
83+
// (remember we're using our PAT on behalf of the users of this app)
84+
return {
85+
completed: maybeCompletedTask?.completed ?? false,
86+
jobStatus: maybeCompletedTask?.response.jobStatus,
87+
error: maybeCompletedTask?.error ?? undefined,
88+
};
89+
};
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import type { NextApiRequest, NextApiResponse } from "next";
2+
import {
3+
makeAuthenticatedSplitgraphDb,
4+
claimsFromJWT,
5+
} from "../../lib-backend/splitgraph-db";
6+
7+
const GITHUB_PAT_SECRET = process.env.GITHUB_PAT_SECRET;
8+
9+
type ResponseData =
10+
| {
11+
destination: {
12+
splitgraphNamespace: string;
13+
splitgraphRepository: string;
14+
};
15+
taskId: string;
16+
}
17+
| { error: string };
18+
19+
/**
20+
* To manually send a request, example:
21+
22+
```bash
23+
curl -i \
24+
-H "Content-Type: application/json" http://localhost:3000/api/start-import-from-github \
25+
-d '{ "githubSourceRepository": "splitgraph/seafowl", "splitgraphDestinationRepository": "import-via-nextjs" }'
26+
```
27+
*/
28+
export default async function handler(
29+
req: NextApiRequest,
30+
res: NextApiResponse<ResponseData>
31+
) {
32+
const db = makeAuthenticatedSplitgraphDb();
33+
const { username } = claimsFromJWT((await db.fetchAccessToken()).token);
34+
35+
const { githubSourceRepository } = req.body;
36+
37+
if (!githubSourceRepository) {
38+
res.status(400).json({ error: "githubSourceRepository is required" });
39+
return;
40+
}
41+
42+
const splitgraphDestinationRepository =
43+
req.body.splitgraphDestinationRepository ??
44+
`github-import-${githubSourceRepository.replaceAll("/", "-")}`;
45+
46+
try {
47+
const taskId = await startImport({
48+
db,
49+
githubSourceRepository,
50+
splitgraphDestinationRepository,
51+
githubStartDate: req.body.githubStartDate,
52+
});
53+
res.status(200).json({
54+
destination: {
55+
splitgraphNamespace: username,
56+
splitgraphRepository: splitgraphDestinationRepository,
57+
},
58+
taskId,
59+
});
60+
} catch (err) {
61+
res.status(400).json({
62+
error: err.message,
63+
});
64+
}
65+
}
66+
67+
const startImport = async ({
68+
db,
69+
githubSourceRepository,
70+
splitgraphDestinationRepository,
71+
githubStartDate,
72+
}: {
73+
db: ReturnType<typeof makeAuthenticatedSplitgraphDb>;
74+
githubSourceRepository: string;
75+
splitgraphDestinationRepository: string;
76+
/**
77+
* Optional start date for ingestion, must be in format like: 2021-06-01T00:00:00Z
78+
* Defaults to 2020-01-01T00:00:00Z
79+
* */
80+
githubStartDate?: string;
81+
}) => {
82+
const { username: splitgraphNamespace } = claimsFromJWT(
83+
(await db.fetchAccessToken()).token
84+
);
85+
86+
const { taskId } = await db.importData(
87+
"airbyte-github",
88+
{
89+
credentials: {
90+
credentials: {
91+
personal_access_token: GITHUB_PAT_SECRET,
92+
},
93+
},
94+
params: {
95+
repository: githubSourceRepository,
96+
start_date: githubStartDate ?? "2020-01-01T00:00:00Z",
97+
},
98+
},
99+
{
100+
namespace: splitgraphNamespace,
101+
repository: splitgraphDestinationRepository,
102+
tables: [
103+
{
104+
name: "stargazers",
105+
options: {
106+
airbyte_cursor_field: ["starred_at"],
107+
airbyte_primary_key_field: [],
108+
},
109+
schema: [],
110+
},
111+
],
112+
},
113+
{ defer: true }
114+
);
115+
116+
return taskId;
117+
};

examples/nextjs-import-airbyte-github-export-seafowl/tsconfig.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@
1515
"isolatedModules": true,
1616
"jsx": "preserve"
1717
},
18-
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
18+
"include": ["next-env.d.ts", "env-vars.d.ts", "**/*.ts", "**/*.tsx"],
1919
"exclude": ["node_modules"]
2020
}

0 commit comments

Comments
 (0)