22/**
33 * Turing ES Documentation β PDF Generator
44 *
5- * Orchestrates three stages :
6- * 1. Render the branded cover (HTML β PDF via Puppeteer )
7- * 2. Generate documentation pages (mr-pdf)
8- * 3. Merge cover + docs into a single PDF ( pdf-lib)
5+ * Uses a single Puppeteer browser instance to :
6+ * 1. Render the branded cover (HTML β PDF)
7+ * 2. Crawl every documentation page following "next" links, printing each to PDF
8+ * 3. Merge everything into a single PDF with pdf-lib
99 *
1010 * Prerequisites:
11- * - Docusaurus dev/ serve running on localhost:3000
12- * - npm dependencies installed (puppeteer, mr- pdf, pdf -lib, wait-on )
11+ * - Docusaurus serve running on localhost:3000
12+ * - npm dependencies installed (puppeteer, pdf-lib)
1313 */
1414
15- import { execFileSync } from 'node:child_process' ;
16- import { readFileSync , writeFileSync , unlinkSync } from 'node:fs' ;
15+ import { readFileSync , writeFileSync } from 'node:fs' ;
1716import { fileURLToPath } from 'node:url' ;
1817import { dirname , join } from 'node:path' ;
1918import { PDFDocument } from 'pdf-lib' ;
@@ -22,133 +21,149 @@ const __filename = fileURLToPath(import.meta.url);
2221const __dirname = dirname ( __filename ) ;
2322const ROOT = join ( __dirname , '..' ) ;
2423
25- const COVER_HTML = join ( __dirname , 'pdf-cover.html' ) ;
26- const STYLE_CSS = join ( __dirname , 'pdf-style.css' ) ;
27- const COVER_PDF = join ( ROOT , '.cover-tmp.pdf' ) ;
28- const DOCS_PDF = join ( ROOT , '.docs-tmp.pdf' ) ;
29- const OUTPUT_PDF = join ( ROOT , 'turing-es-2026.1-documentation.pdf' ) ;
30- const BASE_URL = process . env . PDF_BASE_URL || 'http://localhost:3000' ;
31- const ENTRY_PATH = '/turing/getting-started/intro' ;
24+ const COVER_HTML = join ( __dirname , 'pdf-cover.html' ) ;
25+ const STYLE_CSS = join ( __dirname , 'pdf-style.css' ) ;
26+ const OUTPUT_PDF = join ( ROOT , 'turing-es-2026.1-documentation.pdf' ) ;
27+ const BASE_URL = process . env . PDF_BASE_URL || 'http://localhost:3000' ;
28+ const ENTRY_PATH = '/turing/getting-started/intro' ;
29+
30+ const HEADER_HTML = [
31+ '<div style="width:100%;padding:0 15mm;font-family:system-ui,sans-serif;' ,
32+ 'font-size:8px;display:flex;justify-content:space-between;align-items:center;' ,
33+ 'border-bottom:0.5px solid #FED7AA;padding-bottom:3px;margin-bottom:4px;">' ,
34+ '<span style="color:#C2410C;font-weight:700;letter-spacing:0.08em;">TURING ES</span>' ,
35+ '<span style="color:#94a3b8;">Documentation</span>' ,
36+ '</div>' ,
37+ ] . join ( '' ) ;
38+
39+ const FOOTER_HTML = [
40+ '<div style="width:100%;padding:0 15mm;font-family:system-ui,sans-serif;' ,
41+ 'font-size:7px;display:flex;justify-content:space-between;align-items:center;' ,
42+ 'border-top:0.5px solid #e2e8f0;padding-top:3px;margin-top:4px;">' ,
43+ '<span style="color:#94a3b8;">viglet.com</span>' ,
44+ '<span style="color:#64748b;">' ,
45+ '<span class="pageNumber"></span> / <span class="totalPages"></span>' ,
46+ '</span></div>' ,
47+ ] . join ( '' ) ;
48+
49+ let browser ;
50+
51+ async function launchBrowser ( ) {
52+ const puppeteer = await import ( 'puppeteer' ) ;
53+ browser = await puppeteer . launch ( {
54+ headless : 'new' ,
55+ args : [ '--no-sandbox' , '--disable-setuid-sandbox' , '--disable-dev-shm-usage' ] ,
56+ } ) ;
57+ }
3258
3359/* ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
34- Stage 1 β Cover (Puppeteer )
60+ Stage 1 β Cover (HTML β PDF )
3561 ββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
3662async function generateCover ( ) {
3763 console . log ( ' [1/3] Rendering cover pages β¦' ) ;
3864
39- // Dynamic import so the script still parses even if puppeteer
40- // is only available through the mermaid-cli transitive dep.
41- const puppeteer = await import ( 'puppeteer' ) ;
42- const browser = await puppeteer . launch ( {
43- headless : 'new' ,
44- args : [ '--no-sandbox' , '--disable-setuid-sandbox' ] ,
45- } ) ;
46-
4765 const page = await browser . newPage ( ) ;
4866 const html = readFileSync ( COVER_HTML , 'utf-8' ) ;
4967
5068 await page . setContent ( html , { waitUntil : 'networkidle0' } ) ;
51-
52- // Wait for Google Fonts to load
5369 await page . evaluateHandle ( 'document.fonts.ready' ) ;
5470
55- await page . pdf ( {
56- path : COVER_PDF ,
71+ const pdfBuffer = await page . pdf ( {
5772 format : 'A4' ,
5873 printBackground : true ,
5974 preferCSSPageSize : false ,
6075 } ) ;
6176
62- await browser . close ( ) ;
77+ await page . close ( ) ;
6378 console . log ( ' Cover rendered β' ) ;
79+ return pdfBuffer ;
6480}
6581
6682/* ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
67- Stage 2 β Documentation pages (mr-pdf)
83+ Stage 2 β Crawl doc pages & print each to PDF
6884 ββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
69- function generateDocs ( ) {
85+ async function generateDocs ( ) {
7086 console . log ( ' [2/3] Generating documentation pages β¦' ) ;
7187
7288 const cssContent = readFileSync ( STYLE_CSS , 'utf-8' ) ;
73-
74- const headerHtml = [
75- '<div style="width:100%;padding:0 15mm;font-family:system-ui,sans-serif;' ,
76- 'font-size:8px;display:flex;justify-content:space-between;align-items:center;' ,
77- 'border-bottom:0.5px solid #FED7AA;padding-bottom:3px;margin-bottom:4px;">' ,
78- '<span style="color:#C2410C;font-weight:700;letter-spacing:0.08em;">TURING ES</span>' ,
79- '<span style="color:#94a3b8;">Documentation</span>' ,
80- '</div>' ,
81- ] . join ( '' ) ;
82-
83- const footerHtml = [
84- '<div style="width:100%;padding:0 15mm;font-family:system-ui,sans-serif;' ,
85- 'font-size:7px;display:flex;justify-content:space-between;align-items:center;' ,
86- 'border-top:0.5px solid #e2e8f0;padding-top:3px;margin-top:4px;">' ,
87- '<span style="color:#94a3b8;">viglet.com</span>' ,
88- '<span style="color:#64748b;">' ,
89- '<span class="pageNumber"></span> / <span class="totalPages"></span>' ,
90- '</span></div>' ,
91- ] . join ( '' ) ;
92-
93- const args = [
94- 'mr-pdf' ,
95- '--initialDocURLs' , `${ BASE_URL } ${ ENTRY_PATH } ` ,
96- '--contentSelector' , 'article' ,
97- '--paginationSelector' , 'a.pagination-nav__link--next' ,
98- '--pdfFormat' , 'A4' ,
99- '--pdfMargin' , '25,15,20,15' ,
100- '--cssStyle' , cssContent ,
101- '--headerTemplate' , headerHtml ,
102- '--footerTemplate' , footerHtml ,
103- '--disableTOC' ,
104- '--outputPDFFilename' , DOCS_PDF ,
105- ] ;
106-
107- execFileSync ( 'npx' , args , {
108- cwd : ROOT ,
109- stdio : 'inherit' ,
110- timeout : 300_000 , // 5 min max
111- } ) ;
112-
113- console . log ( ' Documentation pages generated β' ) ;
89+ const page = await browser . newPage ( ) ;
90+ const pagePDFs = [ ] ;
91+ let url = `${ BASE_URL } ${ ENTRY_PATH } ` ;
92+ const visited = new Set ( ) ;
93+
94+ while ( url ) {
95+ // Normalise to avoid revisiting with trailing slashes etc.
96+ const normalized = url . replace ( / \/ + $ / , '' ) ;
97+ if ( visited . has ( normalized ) ) break ;
98+ visited . add ( normalized ) ;
99+
100+ const pageNum = visited . size ;
101+ process . stdout . write ( ` [${ pageNum } ] ${ normalized . replace ( BASE_URL , '' ) } β¦ ` ) ;
102+
103+ await page . goto ( url , { waitUntil : 'networkidle0' , timeout : 30_000 } ) ;
104+
105+ // Inject our PDF stylesheet
106+ await page . addStyleTag ( { content : cssContent } ) ;
107+
108+ // Small delay for styles to apply
109+ await new Promise ( ( r ) => setTimeout ( r , 300 ) ) ;
110+
111+ // Print this page to PDF
112+ const pdfBuf = await page . pdf ( {
113+ format : 'A4' ,
114+ printBackground : true ,
115+ displayHeaderFooter : true ,
116+ headerTemplate : HEADER_HTML ,
117+ footerTemplate : FOOTER_HTML ,
118+ margin : { top : '25mm' , bottom : '20mm' , left : '15mm' , right : '15mm' } ,
119+ } ) ;
120+
121+ pagePDFs . push ( pdfBuf ) ;
122+ console . log ( 'β' ) ;
123+
124+ // Find the "next" pagination link
125+ url = await page . evaluate ( ( ) => {
126+ const next = document . querySelector ( 'a.pagination-nav__link--next' ) ;
127+ return next ? next . href : null ;
128+ } ) ;
129+ }
130+
131+ await page . close ( ) ;
132+ console . log ( ` ${ pagePDFs . length } pages generated β` ) ;
133+ return pagePDFs ;
114134}
115135
116136/* ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
117- Stage 3 β Merge (pdf-lib)
137+ Stage 3 β Merge all PDFs (pdf-lib)
118138 ββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
119- async function mergePDFs ( ) {
120- console . log ( ' [3/3] Merging cover + documentation β¦' ) ;
121-
122- const coverBytes = readFileSync ( COVER_PDF ) ;
123- const docsBytes = readFileSync ( DOCS_PDF ) ;
139+ async function mergePDFs ( coverBuf , docBuffers ) {
140+ console . log ( ' [3/3] Merging PDFs β¦' ) ;
124141
125- const merged = await PDFDocument . create ( ) ;
142+ const merged = await PDFDocument . create ( ) ;
126143
127- // Copy cover pages (cover + inner title)
128- const coverDoc = await PDFDocument . load ( coverBytes ) ;
144+ // Cover pages
145+ const coverDoc = await PDFDocument . load ( coverBuf ) ;
129146 const coverPages = await merged . copyPages ( coverDoc , coverDoc . getPageIndices ( ) ) ;
130147 coverPages . forEach ( ( p ) => merged . addPage ( p ) ) ;
131148
132- // Copy documentation pages
133- const docsDoc = await PDFDocument . load ( docsBytes ) ;
134- const docsPages = await merged . copyPages ( docsDoc , docsDoc . getPageIndices ( ) ) ;
135- docsPages . forEach ( ( p ) => merged . addPage ( p ) ) ;
149+ // Documentation pages
150+ for ( const buf of docBuffers ) {
151+ const doc = await PDFDocument . load ( buf ) ;
152+ const pages = await merged . copyPages ( doc , doc . getPageIndices ( ) ) ;
153+ pages . forEach ( ( p ) => merged . addPage ( p ) ) ;
154+ }
136155
137156 // Metadata
138157 merged . setTitle ( 'Turing ES Documentation' ) ;
139158 merged . setSubject ( 'Enterprise Search Platform β v2026.1' ) ;
140159 merged . setAuthor ( 'Viglet' ) ;
141160 merged . setCreator ( 'Viglet PDF Generator' ) ;
142- merged . setProducer ( 'pdf-lib + Puppeteer + mr-pdf ' ) ;
161+ merged . setProducer ( 'pdf-lib + Puppeteer' ) ;
143162 merged . setCreationDate ( new Date ( ) ) ;
144163
145164 const mergedBytes = await merged . save ( ) ;
146165 writeFileSync ( OUTPUT_PDF , mergedBytes ) ;
147166
148- // Clean up temp files
149- try { unlinkSync ( COVER_PDF ) ; } catch { /* ignore */ }
150- try { unlinkSync ( DOCS_PDF ) ; } catch { /* ignore */ }
151-
152167 const sizeMB = ( mergedBytes . length / 1_048_576 ) . toFixed ( 2 ) ;
153168 console . log ( ` Merged PDF saved (${ sizeMB } MB) β` ) ;
154169}
@@ -163,17 +178,22 @@ async function main() {
163178 console . log ( ' ββββββββββββββββββββββββββββββββββββββββββββ' ) ;
164179 console . log ( ) ;
165180
166- await generateCover ( ) ;
167- generateDocs ( ) ;
168- await mergePDFs ( ) ;
181+ await launchBrowser ( ) ;
182+
183+ const coverBuf = await generateCover ( ) ;
184+ const docBufs = await generateDocs ( ) ;
185+ await mergePDFs ( coverBuf , docBufs ) ;
186+
187+ await browser . close ( ) ;
169188
170189 console . log ( ) ;
171190 console . log ( ` β
${ OUTPUT_PDF } ` ) ;
172191 console . log ( ) ;
173192}
174193
175- main ( ) . catch ( ( err ) => {
194+ main ( ) . catch ( async ( err ) => {
176195 console . error ( '\n β PDF generation failed:\n' ) ;
177196 console . error ( err ) ;
197+ if ( browser ) await browser . close ( ) . catch ( ( ) => { } ) ;
178198 process . exit ( 1 ) ;
179199} ) ;
0 commit comments