forked from mbonig/website
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.ts
More file actions
153 lines (134 loc) · 4.63 KB
/
index.ts
File metadata and controls
153 lines (134 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import { Context, Handler } from "aws-lambda";
import { Browser, Page, PuppeteerLaunchOptions } from "puppeteer";
import { PuppeteerExtra } from "puppeteer-extra";
import { S3Client, PutObjectCommand } from "@aws-sdk/client-s3";
interface Link {
id: string;
url: string;
comment: string;
}
const s3 = new S3Client({ region: process.env.AWS_REGION });
export const handler: Handler = async (
event: Link,
context: Context,
): Promise<any> => {
let browser: Browser | null = null;
try {
console.log("event:", event);
const puppeteer: PuppeteerExtra = require("puppeteer-extra");
const stealthPlugin = require("puppeteer-extra-plugin-stealth");
puppeteer.use(stealthPlugin());
const chromium = require("@sparticuz/chromium");
const browserPath = await chromium.executablePath();
console.log({path: browserPath})
const launchOptions: PuppeteerLaunchOptions = context.functionName
? {
headless: true,
executablePath: browserPath,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--single-process",
"--incognito",
"--disable-client-side-phishing-detection",
"--disable-software-rasterizer",
],
}
: {
headless: false,
executablePath: browserPath,
};
console.log('launch', {launchOptions})
browser = await puppeteer.launch(launchOptions);
console.log('launched')
const page: Page = await browser.newPage();
console.log('page')
// Set viewport to a common desktop resolution
await page.setViewport({ width: 1920, height: 1080 });
console.log('viewport set')
await page.goto(event.url);
console.log('page loaded')
await new Promise((resolve) => setTimeout(resolve, 5000));
console.log('page content')
// Take viewport screenshot
const viewportScreenshot = await page.screenshot({ encoding: 'binary' });
console.log('viewport screenshot taken')
// Take full page screenshot
const fullPageScreenshot = await page.screenshot({ encoding: 'binary', fullPage: true });
console.log('full page screenshot taken')
const content = await page.content();
// Extract Open Graph image
const ogImageUrl = await page.evaluate(() => {
const metaTag = document.querySelector('meta[property="og:image"]');
return metaTag ? metaTag.getAttribute('content') : null;
});
if (ogImageUrl) {
const ogImageResponse = await fetch(ogImageUrl);
const ogImageBuffer = Buffer.from(await ogImageResponse.arrayBuffer());
// Save Open Graph image to S3
const ogImageParams = {
Bucket: process.env.BUCKET_NAME,
Key: `${event.id}_og_image.png`,
Body: ogImageBuffer,
ContentType: 'image/png'
};
await s3.send(new PutObjectCommand(ogImageParams));
console.log('Open Graph image saved to S3');
}
// Save HTML to S3
const htmlParams = {
Bucket: process.env.BUCKET_NAME,
Key: `${event.id}.html`,
Body: content,
ContentType: 'text/html'
};
await s3.send(new PutObjectCommand(htmlParams));
// Save viewport screenshot to S3
const viewportScreenshotParams = {
Bucket: process.env.BUCKET_NAME,
Key: `${event.id}_viewport.png`,
Body: viewportScreenshot,
ContentType: 'image/png'
};
await s3.send(new PutObjectCommand(viewportScreenshotParams));
// Save full page screenshot to S3
const fullPageScreenshotParams = {
Bucket: process.env.BUCKET_NAME,
Key: `${event.id}_fullpage.png`,
Body: fullPageScreenshot,
ContentType: 'image/png'
};
await s3.send(new PutObjectCommand(fullPageScreenshotParams));
return {
statusCode: 200,
body: JSON.stringify({ message: "HTML and screenshots saved to S3", id: event.id }),
};
} catch (e: any) {
console.log("Error in Lambda Handler:", e);
return {
statusCode: 500,
body: JSON.stringify({ error: e.message }),
};
} finally {
if (browser) {
try {
await Promise.race([
browser.close(),
new Promise((_, reject) => setTimeout(() => reject(new Error("Browser close timeout")), 10000))
]);
} catch (closeError) {
console.log("Error closing browser:", closeError);
}
}
// Clean up temporary files
const fs = require('fs').promises;
try {
await fs.rm('/tmp', { recursive: true, force: true });
await fs.mkdir('/tmp');
} catch (cleanupError) {
console.log("Error cleaning up temporary files:", cleanupError);
}
}
};