-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathcluster.js
More file actions
107 lines (93 loc) · 3.29 KB
/
cluster.js
File metadata and controls
107 lines (93 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
const { Cluster } = require('puppeteer-cluster');
const express = require('express');
let servedRequests = 0;
let errorCount = 0;
const app = express();
const port = 3000;
app.use(express.json());
// Function to log server stats
const logServerStats = () => {
console.log(`Served Requests: ${servedRequests}`);
console.log(`Error Count: ${errorCount}`);
};
// Log server stats every minute (60,000 milliseconds)
setInterval(logServerStats, 60000);
// Define your launch options here
const launchOptions = {
headless: "new",
args: [
'--no-sandbox',
'--disable-gpu',
'--disable-dev-shm-usage',
'--disable-setuid-sandbox',
'--no-first-run',
'--no-zygote',
'--deterministic-fetch',
'--disable-features=IsolateOrigins',
'--disable-site-isolation-trials',
// '--single-process',
],
};
if (process.env.CHROME_EXECUTABLE_PATH) {
launchOptions.executablePath = process.env.CHROME_EXECUTABLE_PATH;
};
let max_concurrency = 2;
if (process.env.MAX_CONCURRENCY) {
max_concurrency = parseInt(process.env.MAX_CONCURRENCY, 10);
};
(async () => {
// Create a cluster with N workers
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: max_concurrency,
puppeteerOptions: launchOptions,
});
// Define a task
cluster.task(async ({ page, data: {url, headers} }) => {
const startTime = Date.now();
if (headers) {
for (const [name, value] of Object.entries(headers)) {
await page.setExtraHTTPHeaders({ [name]: value });
}
}
const response = await page.goto(url, {timeout: 60000});
const status_code = response.status()
// const pageBody = await page.evaluate(() => document.body.innerHTML);
const finalUrl = page.url();
const pageBody = await page.content()
const endTime = Date.now();
const loadTime = endTime - startTime;
let url_string = "'" + url + "'"
if(finalUrl != url)
url_string = "'" + url + "' -> '" + finalUrl + "'"
tpl = `[DEBUG] Fetched ${url_string} status: ${status_code} (${loadTime/1000}s)`
console.log(tpl)
servedRequests++;
return {page: pageBody, status: status_code, headers: response.headers()};
});
// Define a route for receiving URLs via POST requests
app.post('/render', async (req, res) => {
const { url, headers } = req.body;
if (!url) {
return res.status(400).json({ error: 'URL parameter is required.' });
}
try {
const result = await cluster.execute({url, headers});
res.status(200).json(result);
} catch (err) {
errorCount++;
console.debug("[DEBUG] Could not get '" + url + "' Error: " + err)
res.status(500).json({ error: 'An error occurred while processing the URL.' + err });
}
});
// Start the Express server
app.listen(port, () => {
console.log(`Server is running on port ${port}`);
});
// Shutdown the cluster and close Express server on process termination
process.on('SIGINT', async () => {
await cluster.idle();
await cluster.close();
process.exit();
});
})();