Skip to content

Commit 2c4b9fd

Browse files
committed
feat: move trace storage to S3
1 parent 5bdd0ce commit 2c4b9fd

7 files changed

Lines changed: 1328 additions & 138 deletions

File tree

packages/api/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
"@aws-sdk/client-cloudwatch-logs": "^3.658.1",
1515
"@aws-sdk/client-dynamodb": "^3.637.0",
1616
"@aws-sdk/client-lambda": "^3.637.0",
17+
"@aws-sdk/client-s3": "^3.777.0",
1718
"@aws-sdk/lib-dynamodb": "^3.637.0",
1819
"@hono/node-server": "^1.12.2",
1920
"@includable/serverless-middleware": "^2.2.0",
21+
"@thi.ng/ksuid": "^3.2.83",
2022
"bcryptjs": "^2.4.3",
2123
"date-fns": "^3.6.0",
2224
"dynamodb-toolbox": "^1.3.8",

packages/api/serverless.yml

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ plugins:
88

99
params:
1010
default:
11-
DATA_RETENTION_DAYS: ${env:DATA_RETENTION_DAYS, 30}
11+
DATA_RETENTION_DAYS: ${env:DATA_RETENTION_DAYS, 14}
1212
CUSTOM_DOMAIN: ${env:CUSTOM_DOMAIN, ""}
1313
HAS_CUSTOM_DOMAIN: ${strToBool(${env:HAS_CUSTOM_DOMAIN, "false"})}
1414
TRACER_TOKEN: ${env:TRACER_TOKEN, ""}
@@ -22,6 +22,7 @@ custom:
2222
endpointType: regional
2323
apiType: http
2424
logRetentionInDays: 14
25+
bucketName: ${self:service}-storage-${sls:stage} # TODO: make this a unique name
2526

2627
package:
2728
individually: true
@@ -52,21 +53,10 @@ provider:
5253
TRACER_TOKEN: ${param:TRACER_TOKEN}
5354
LAMBDA_LAYER_ARN: !Ref TracerLambdaLayer
5455
AUTO_TRACE_EXCLUDE: 1
56+
STORAGE_BUCKET_NAME: ${self:custom.bucketName}
5557
httpApi:
5658
payload: "2.0"
5759
cors: true
58-
# authorizers:
59-
# WebAuthorizer:
60-
# identitySource: $request.header.Authorization
61-
# issuerUrl:
62-
# Fn::Join:
63-
# - ""
64-
# - - "https://cognito-idp."
65-
# - "${opt:region, self:provider.region}"
66-
# - ".amazonaws.com/"
67-
# - Ref: CognitoUserPool
68-
# audience:
69-
# Ref: CognitoUserPoolClient
7060
lambdaHashingVersion: 20201221
7161
iam:
7262
role:
@@ -99,6 +89,15 @@ provider:
9989
- apigateway:GET
10090
Resource:
10191
- "arn:aws:apigateway:${aws:region}::/apis"
92+
- Effect: Allow
93+
Action:
94+
- s3:GetObject
95+
- s3:PutObject
96+
- s3:DeleteObject
97+
- s3:ListBucket
98+
Resource:
99+
- arn:aws:s3:::${self:custom.bucketName}
100+
- arn:aws:s3:::${self:custom.bucketName}/*
102101

103102
layers:
104103
tracer:
@@ -109,12 +108,14 @@ layers:
109108
- nodejs16.x
110109
- nodejs18.x
111110
- nodejs20.x
111+
- nodejs22.x
112112

113113
functions:
114114
main:
115115
handler: src/index.handler
116116
timeout: 900
117-
memorySize: 2048
117+
memorySize: 1024
118+
reservedConcurrency: 1
118119
url: true
119120
events:
120121
- httpApi:
@@ -127,3 +128,4 @@ functions:
127128

128129
resources:
129130
- ${file(./src/infrastructure/database/table.yml)}
131+
- ${file(./src/infrastructure/bucket.yml)}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Resources:
2+
StorageBucket:
3+
Type: AWS::S3::Bucket
4+
Properties:
5+
BucketName: ${self:custom.bucketName}
6+
LifecycleConfiguration:
7+
Rules:
8+
- Id: ExpireObjectsRule
9+
Status: Enabled
10+
ExpirationInDays: ${param:DATA_RETENTION_DAYS}

packages/api/src/lib/invocations.js

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { put } from "./database";
1+
import { store } from "./storage";
22

33
const getStatusCodeFromSpan = (span) => {
44
if (!span.return_value) return null;
@@ -36,16 +36,11 @@ const getResultSummaryFromSpan = (span) => {
3636
return mainStatus;
3737
};
3838

39-
export const saveInvocation = async (span) => {
40-
return put(
41-
{
42-
...span,
43-
statusCode: getStatusCodeFromSpan(span),
44-
resultSummary: getResultSummaryFromSpan(span),
45-
pk: `function#${span.region}#${span.name}`,
46-
sk: `invocation#${span.started}#${span.id}`,
47-
type: "invocation",
48-
},
49-
true,
50-
);
39+
export const saveInvocation = async (span, allSpans) => {
40+
return store(["invocations", span.region, span.name, span.started, span.id], {
41+
...span,
42+
statusCode: getStatusCodeFromSpan(span),
43+
resultSummary: getResultSummaryFromSpan(span),
44+
spans: allSpans,
45+
});
5146
};

packages/api/src/lib/storage.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3";
2+
3+
const s3 = new S3Client();
4+
5+
export const store = async (key, data) => {
6+
const datePrefix = new Date().toISOString().split("T")[0];
7+
const bucketName = process.env.STORAGE_BUCKET_NAME;
8+
9+
if (Array.isArray(key)) key = key.join("/");
10+
11+
const params = {
12+
Bucket: bucketName,
13+
Key: `${datePrefix}/${key}`,
14+
Body: JSON.stringify(data),
15+
ContentType: "application/json",
16+
};
17+
18+
return s3.send(new PutObjectCommand(params));
19+
};

packages/api/src/routes/collector/index.js

Lines changed: 84 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,139 +1,113 @@
11
import { Hono } from "hono";
2+
import { defULID } from "@thi.ng/ksuid";
23

3-
import { getExpiryTime, put, update } from "../../lib/database";
44
import { saveHourlyStat } from "../../lib/stats";
55
import { getErrorKey } from "../../lib/errors";
6-
import { groupSpans } from "../../lib/spans";
76
import { saveInvocation } from "../../lib/invocations";
7+
import { store } from "../../lib/storage";
88

99
const app = new Hono();
1010

11+
const id = defULID();
12+
const getId = () => {
13+
return id.next();
14+
};
15+
16+
// Holds transactions for which we don't know the invocation ID yet
17+
// This is used to avoid sending the transaction to the database before we know the invocation ID
18+
const transactionCache = {};
19+
1120
app.post("/", async (c) => {
1221
const body = await c.req.json();
1322

14-
const groupedItems = {};
15-
1623
for (const span of body) {
1724
if (process.env.TRACER_TOKEN && span.token !== process.env.TRACER_TOKEN) {
1825
console.log(`Invalid token: ${span.token}`);
1926
continue;
2027
}
2128

22-
const pk = `transaction#${span.transactionId || span.transaction_id}`;
23-
if (!groupedItems[pk]) groupedItems[pk] = [];
29+
const groupKey = span.transactionId || span.transaction_id;
30+
if (!transactionCache[groupKey]) transactionCache[groupKey] = [];
2431

25-
if (span.type === "log") {
26-
// save log span
27-
groupedItems[pk].push({
28-
...span,
29-
transactionId: span.transactionId || span.transaction_id,
30-
type: "log",
31-
});
32-
continue;
33-
}
32+
transactionCache[groupKey].push(span);
33+
}
34+
35+
// Check transactions cache to see if there's any transactions we can flush
36+
for (const [transactionId, spans] of Object.entries(transactionCache)) {
37+
const invocationEndedSpan = spans.find(
38+
(span) =>
39+
span.type === "function" && span.ended && !span.id.includes("_started"),
40+
);
41+
42+
if (!invocationEndedSpan) {
43+
console.log(
44+
`No invocation ended span found for transaction ${spans[0].transactionId}`,
45+
);
46+
47+
// TODO: if we are close to running out of time, we should flush the transaction cache anyway
3448

35-
// ignore started spans and enrichments
36-
if (span.id.endsWith("_started") || span.type === "enrichment") {
3749
continue;
50+
} else {
51+
console.log(
52+
"Flushing transaction cache for",
53+
invocationEndedSpan.transactionId,
54+
);
3855
}
3956

40-
// save transaction span
41-
groupedItems[pk].push({
42-
...span,
43-
type: "span",
44-
spanType: span.type,
45-
});
46-
47-
if (
48-
span.type === "function" &&
49-
span.ended &&
50-
!span.id.includes("_started")
51-
) {
52-
// save function invocation details
53-
await saveInvocation(span);
54-
55-
// save error
56-
if (span.error) {
57-
const errorKey = getErrorKey(span.error);
58-
await update({
59-
Key: {
60-
pk: `function#${span.region}#${span.name}`,
61-
sk: `error#${errorKey}`,
62-
},
63-
UpdateExpression: `SET #error = :error, #lastInvocation = :lastInvocation, #lastSeen = :lastSeen, #expires = :expires, #type = :type, #name = :name, #region = :region`,
64-
ExpressionAttributeValues: {
65-
":error": span.error,
66-
":lastInvocation": `${span.started}/${span.id}`,
67-
":lastSeen": new Date(span.ended).toISOString(),
68-
":expires": getExpiryTime(),
69-
":type": "error",
70-
":name": span.name,
71-
":region": span.region,
72-
},
73-
ExpressionAttributeNames: {
74-
"#error": "error",
75-
"#lastInvocation": "lastInvocation",
76-
"#lastSeen": "lastSeen",
77-
"#expires": "_expires",
78-
"#type": "type",
79-
"#name": "name",
80-
"#region": "region",
81-
},
82-
});
83-
await saveHourlyStat(span.region, span.name + ".error." + errorKey, 1);
84-
}
85-
86-
// save function meta data
87-
try {
88-
await update({
89-
Key: {
90-
pk: `function#${span.region}#${span.name}`,
91-
sk: `function#${span.region}`,
92-
},
93-
UpdateExpression: `SET lastInvocation = :lastInvocation, memoryAllocated = :memoryAllocated, #timeout = :timeout, traceStatus = :traceStatus, #expires = :expires`,
94-
ExpressionAttributeValues: {
95-
":lastInvocation": span.started,
96-
":memoryAllocated": span.memoryAllocated,
97-
":timeout": span.maxFinishTime - span.started,
98-
":traceStatus": "enabled",
99-
":expires": getExpiryTime(),
100-
},
101-
ExpressionAttributeNames: {
102-
"#timeout": "timeout",
103-
"#expires": "_expires",
104-
},
105-
});
106-
} catch (e) {
107-
console.log(e);
108-
}
109-
110-
// save stats
111-
const duration = span.ended - span.started;
112-
await saveHourlyStat(span.region, span.name + ".invocations", 1);
113-
await saveHourlyStat(span.region, span.name + ".duration", duration);
114-
await saveHourlyStat("global", "invocations", 1);
115-
if (span.error) {
116-
await saveHourlyStat(span.region, span.name + ".errors", 1);
117-
await saveHourlyStat("global", "errors", 1);
118-
}
57+
// save function invocation details
58+
await saveInvocation(invocationEndedSpan, spans);
59+
60+
// const duration = invocationEndedSpan.ended - invocationEndedSpan.started;
61+
await saveHourlyStat(
62+
invocationEndedSpan.region,
63+
invocationEndedSpan.name + ".invocations",
64+
1,
65+
);
66+
// await saveHourlyStat(
67+
// invocationEndedSpan.region,
68+
// invocationEndedSpan.name + ".duration",
69+
// duration,
70+
// );
71+
await saveHourlyStat("global", "invocations", 1);
72+
if (invocationEndedSpan.error) {
73+
await saveHourlyStat(
74+
invocationEndedSpan.region,
75+
invocationEndedSpan.name + ".errors",
76+
1,
77+
);
78+
await saveHourlyStat("global", "errors", 1);
11979
}
120-
}
12180

122-
const itemsToSave = [];
123-
for (let [pk, items] of Object.entries(groupedItems)) {
124-
items = groupSpans(items);
125-
while (items.length) {
126-
const chunk = items.splice(0, 100);
127-
itemsToSave.push({
128-
pk,
129-
sk: `spans#${chunk[0].started || chunk[0].sending_time}#${chunk[0].id}`,
130-
type: "spans",
131-
spans: chunk,
132-
});
81+
// save error
82+
if (invocationEndedSpan.error) {
83+
const errorKey = getErrorKey(invocationEndedSpan.error);
84+
await store(
85+
[
86+
"errors",
87+
invocationEndedSpan.region,
88+
invocationEndedSpan.name,
89+
errorKey,
90+
getId(),
91+
],
92+
{
93+
error: invocationEndedSpan.error,
94+
lastInvocation: `${invocationEndedSpan.started}/${invocationEndedSpan.id}`,
95+
lastSeen: new Date(invocationEndedSpan.ended).toISOString(),
96+
type: "error",
97+
name: invocationEndedSpan.name,
98+
region: invocationEndedSpan.region,
99+
},
100+
);
101+
await saveHourlyStat(
102+
invocationEndedSpan.region,
103+
invocationEndedSpan.name + ".error." + errorKey,
104+
1,
105+
);
133106
}
134-
}
135107

136-
await Promise.all(itemsToSave.map((item) => put(item), true));
108+
// Delete the transaction from the cache
109+
delete transactionCache[transactionId];
110+
}
137111

138112
return c.json({ success: true });
139113
});

0 commit comments

Comments
 (0)