66 * 1. Render the branded cover (HTML → PDF)
77 * 2. Crawl every documentation page following "next" links, printing each to PDF
88 * 3. Merge everything into a single PDF with pdf-lib
9+ * 4. Rewrite internal links as in-PDF GoTo hyperlinks
910 *
1011 * Prerequisites:
1112 * - Docusaurus serve running on localhost:3000
1516import { readFileSync , writeFileSync } from 'node:fs' ;
1617import { fileURLToPath } from 'node:url' ;
1718import { dirname , join } from 'node:path' ;
18- import { PDFDocument } from 'pdf-lib' ;
19+ import { PDFDocument , PDFName , PDFDict , PDFString , PDFHexString , PDFArray } from 'pdf-lib' ;
1920
2021const __filename = fileURLToPath ( import . meta. url ) ;
2122const __dirname = dirname ( __filename ) ;
2223const ROOT = join ( __dirname , '..' ) ;
2324
2425const COVER_HTML = join ( __dirname , 'pdf-cover.html' ) ;
2526const STYLE_CSS = join ( __dirname , 'pdf-style.css' ) ;
27+ const FAVICON = join ( ROOT , 'static' , 'img' , 'favicon.png' ) ;
2628const OUTPUT_PDF = join ( ROOT , 'turing-es-2026.1-documentation.pdf' ) ;
2729const BASE_URL = process . env . PDF_BASE_URL || 'http://localhost:3000' ;
30+ const PROD_URL = process . env . PDF_PROD_URL || 'https://docs.viglet.com' ;
2831const ENTRY_PATH = '/turing/getting-started/intro' ;
2932
3033const HEADER_HTML = [
@@ -60,10 +63,15 @@ async function launchBrowser() {
6063 Stage 1 — Cover (HTML → PDF)
6164 ────────────────────────────────────────────────────── */
6265async function generateCover ( ) {
63- console . log ( ' [1/3 ] Rendering cover pages …' ) ;
66+ console . log ( ' [1/4 ] Rendering cover pages …' ) ;
6467
6568 const page = await browser . newPage ( ) ;
66- const html = readFileSync ( COVER_HTML , 'utf-8' ) ;
69+
70+ // Embed favicon as base64 data URI in the cover HTML
71+ const faviconB64 = readFileSync ( FAVICON ) . toString ( 'base64' ) ;
72+ const faviconDataUri = `data:image/png;base64,${ faviconB64 } ` ;
73+ const html = readFileSync ( COVER_HTML , 'utf-8' )
74+ . replaceAll ( 'FAVICON_DATA_URI' , faviconDataUri ) ;
6775
6876 await page . setContent ( html , { waitUntil : 'networkidle0' } ) ;
6977 await page . evaluateHandle ( 'document.fonts.ready' ) ;
@@ -81,33 +89,43 @@ async function generateCover() {
8189
8290/* ──────────────────────────────────────────────────────
8391 Stage 2 — Crawl doc pages & print each to PDF
92+ Returns { buffers: Buffer[], pageMap: Map<string, number> }
93+ pageMap maps URL path → starting page index in merged PDF
8494 ────────────────────────────────────────────────────── */
85- async function generateDocs ( ) {
86- console . log ( ' [2/3 ] Generating documentation pages …' ) ;
95+ async function generateDocs ( coverPageCount ) {
96+ console . log ( ' [2/4 ] Generating documentation pages …' ) ;
8797
8898 const cssContent = readFileSync ( STYLE_CSS , 'utf-8' ) ;
8999 const page = await browser . newPage ( ) ;
90- const pagePDFs = [ ] ;
100+ const buffers = [ ] ;
101+ const pageMap = new Map ( ) ; // path → page index in final PDF
91102 let url = `${ BASE_URL } ${ ENTRY_PATH } ` ;
92103 const visited = new Set ( ) ;
104+ let cumulativePages = coverPageCount ;
93105
94106 while ( url ) {
95- // Normalise to avoid revisiting with trailing slashes etc.
96107 const normalized = url . replace ( / \/ + $ / , '' ) ;
97108 if ( visited . has ( normalized ) ) break ;
98109 visited . add ( normalized ) ;
99110
100- const pageNum = visited . size ;
101- process . stdout . write ( ` [${ pageNum } ] ${ normalized . replace ( BASE_URL , '' ) } … ` ) ;
111+ const docNum = visited . size ;
112+ const path = normalized . replace ( BASE_URL , '' ) ;
113+ process . stdout . write ( ` [${ docNum } ] ${ path } … ` ) ;
102114
103115 await page . goto ( url , { waitUntil : 'networkidle0' , timeout : 30_000 } ) ;
104116
117+ // Capture next URL BEFORE modifying the DOM
118+ const nextUrl = await page . evaluate ( ( ) => {
119+ const next = document . querySelector ( 'a.pagination-nav__link--next' ) ;
120+ return next ?. href ?? null ;
121+ } ) ;
122+
105123 // Inject our PDF stylesheet
106124 await page . addStyleTag ( { content : cssContent } ) ;
107125
108- // Remove chrome elements from the DOM entirely
126+ // Remove chrome elements from the DOM
109127 await page . evaluate ( ( ) => {
110- const selectors = [
128+ const remove = [
111129 '.navbar' , '.nav-root' , 'nav.navbar' ,
112130 'footer' , '.footer' ,
113131 '.pagination-nav' ,
@@ -120,11 +138,14 @@ async function generateDocs() {
120138 '.theme-doc-version-badge' ,
121139 '.col--3' ,
122140 ] ;
123- for ( const sel of selectors ) {
141+ for ( const sel of remove ) {
124142 document . querySelectorAll ( sel ) . forEach ( ( el ) => el . remove ( ) ) ;
125143 }
126144 } ) ;
127145
146+ // Record page mapping BEFORE printing
147+ pageMap . set ( path , cumulativePages ) ;
148+
128149 // Print this page to PDF
129150 const pdfBuf = await page . pdf ( {
130151 format : 'A4' ,
@@ -135,26 +156,27 @@ async function generateDocs() {
135156 margin : { top : '25mm' , bottom : '20mm' , left : '15mm' , right : '15mm' } ,
136157 } ) ;
137158
138- pagePDFs . push ( pdfBuf ) ;
139- console . log ( '✓' ) ;
159+ // Count how many PDF pages this doc produced
160+ const tmpDoc = await PDFDocument . load ( pdfBuf ) ;
161+ const pageCount = tmpDoc . getPageCount ( ) ;
162+ cumulativePages += pageCount ;
140163
141- // Find the "next" pagination link
142- url = await page . evaluate ( ( ) => {
143- const next = document . querySelector ( 'a.pagination-nav__link--next' ) ;
144- return next ? next . href : null ;
145- } ) ;
164+ buffers . push ( pdfBuf ) ;
165+ console . log ( `✓ (${ pageCount } p)` ) ;
166+
167+ url = nextUrl ;
146168 }
147169
148170 await page . close ( ) ;
149- console . log ( ` ${ pagePDFs . length } pages generated ✓` ) ;
150- return pagePDFs ;
171+ console . log ( ` ${ buffers . length } sections generated ✓` ) ;
172+ return { buffers , pageMap } ;
151173}
152174
153175/* ──────────────────────────────────────────────────────
154176 Stage 3 — Merge all PDFs (pdf-lib)
155177 ────────────────────────────────────────────────────── */
156178async function mergePDFs ( coverBuf , docBuffers ) {
157- console . log ( ' [3/3 ] Merging PDFs …' ) ;
179+ console . log ( ' [3/4 ] Merging PDFs …' ) ;
158180
159181 const merged = await PDFDocument . create ( ) ;
160182
@@ -178,11 +200,91 @@ async function mergePDFs(coverBuf, docBuffers) {
178200 merged . setProducer ( 'pdf-lib + Puppeteer' ) ;
179201 merged . setCreationDate ( new Date ( ) ) ;
180202
181- const mergedBytes = await merged . save ( ) ;
182- writeFileSync ( OUTPUT_PDF , mergedBytes ) ;
203+ return merged ;
204+ }
183205
184- const sizeMB = ( mergedBytes . length / 1_048_576 ) . toFixed ( 2 ) ;
185- console . log ( ` Merged PDF saved (${ sizeMB } MB) ✓` ) ;
206+ /* ──────────────────────────────────────────────────────
207+ Stage 4 — Rewrite internal links as in-PDF GoTo
208+ ────────────────────────────────────────────────────── */
209+
210+ /** Build lookup table: URL path → page index in merged PDF */
211+ function buildPathLookup ( pageMap ) {
212+ const lookup = new Map ( ) ;
213+ for ( const [ path , pageIdx ] of pageMap ) {
214+ const clean = path . replace ( / \/ + $ / , '' ) ;
215+ lookup . set ( clean , pageIdx ) ;
216+ if ( clean . endsWith ( '/intro' ) ) {
217+ lookup . set ( clean . replace ( / \/ i n t r o $ / , '' ) , pageIdx ) ;
218+ }
219+ }
220+ return lookup ;
221+ }
222+
223+ /** Extract URI string from a PDF annotation's action dict, or null */
224+ function extractAnnotUri ( annot , context ) {
225+ const aRef = annot . get ( PDFName . of ( 'A' ) ) ;
226+ if ( ! aRef ) return null ;
227+ const aDict = context . lookup ( aRef ) ;
228+ if ( ! ( aDict instanceof PDFDict ) ) return null ;
229+
230+ const sName = aDict . get ( PDFName . of ( 'S' ) ) ;
231+ if ( ! sName || sName . toString ( ) !== '/URI' ) return null ;
232+
233+ const uriObj = aDict . get ( PDFName . of ( 'URI' ) ) ;
234+ if ( ! uriObj ) return null ;
235+ if ( uriObj instanceof PDFString || uriObj instanceof PDFHexString ) {
236+ return { uri : uriObj . decodeText ( ) , aDict } ;
237+ }
238+ return null ;
239+ }
240+
241+ /** Resolve a URI to an internal doc path, or null */
242+ function resolveInternalPath ( uri ) {
243+ let path = null ;
244+ if ( uri . startsWith ( BASE_URL ) ) path = uri . slice ( BASE_URL . length ) ;
245+ else if ( uri . startsWith ( PROD_URL ) ) path = uri . slice ( PROD_URL . length ) ;
246+ if ( ! path ) return null ;
247+ const hashIdx = path . indexOf ( '#' ) ;
248+ return ( hashIdx >= 0 ? path . slice ( 0 , hashIdx ) : path ) . replace ( / \/ + $ / , '' ) ;
249+ }
250+
251+ async function rewriteInternalLinks ( merged , pageMap ) {
252+ console . log ( ' [4/4] Rewriting internal links …' ) ;
253+
254+ const context = merged . context ;
255+ const pathToPage = buildPathLookup ( pageMap ) ;
256+ let rewritten = 0 ;
257+
258+ for ( let i = 0 ; i < merged . getPageCount ( ) ; i ++ ) {
259+ const annotsRef = merged . getPage ( i ) . node . get ( PDFName . of ( 'Annots' ) ) ;
260+ if ( ! annotsRef ) continue ;
261+
262+ const annots = context . lookup ( annotsRef ) ;
263+ if ( ! ( annots instanceof PDFArray ) ) continue ;
264+
265+ for ( let j = 0 ; j < annots . size ( ) ; j ++ ) {
266+ const annot = context . lookup ( annots . get ( j ) ) ;
267+ if ( ! ( annot instanceof PDFDict ) ) continue ;
268+
269+ const result = extractAnnotUri ( annot , context ) ;
270+ if ( ! result ) continue ;
271+
272+ const pathOnly = resolveInternalPath ( result . uri ) ;
273+ if ( ! pathOnly ) continue ;
274+
275+ const targetIdx = pathToPage . get ( pathOnly ) ;
276+ if ( targetIdx === undefined ) continue ;
277+
278+ // Rewrite URI action → GoTo action
279+ const targetRef = merged . getPage ( targetIdx ) . ref ;
280+ result . aDict . set ( PDFName . of ( 'S' ) , PDFName . of ( 'GoTo' ) ) ;
281+ result . aDict . delete ( PDFName . of ( 'URI' ) ) ;
282+ result . aDict . set ( PDFName . of ( 'D' ) , context . obj ( [ targetRef , PDFName . of ( 'Fit' ) ] ) ) ;
283+ rewritten ++ ;
284+ }
285+ }
286+
287+ console . log ( ` ${ rewritten } links rewritten as in-PDF navigation ✓` ) ;
186288}
187289
188290/* ──────────────────────────────────────────────────────
@@ -197,14 +299,29 @@ async function main() {
197299
198300 await launchBrowser ( ) ;
199301
200- const coverBuf = await generateCover ( ) ;
201- const docBufs = await generateDocs ( ) ;
202- await mergePDFs ( coverBuf , docBufs ) ;
302+ // Stage 1 — Cover
303+ const coverBuf = await generateCover ( ) ;
304+ const coverDoc = await PDFDocument . load ( coverBuf ) ;
305+ const coverPageCount = coverDoc . getPageCount ( ) ;
306+
307+ // Stage 2 — Crawl & print docs
308+ const { buffers, pageMap } = await generateDocs ( coverPageCount ) ;
309+
310+ // Stage 3 — Merge
311+ const merged = await mergePDFs ( coverBuf , buffers ) ;
312+
313+ // Stage 4 — Rewrite internal links
314+ await rewriteInternalLinks ( merged , pageMap ) ;
315+
316+ // Save final PDF
317+ const mergedBytes = await merged . save ( ) ;
318+ writeFileSync ( OUTPUT_PDF , mergedBytes ) ;
203319
204320 await browser . close ( ) ;
205321
322+ const sizeMB = ( mergedBytes . length / 1_048_576 ) . toFixed ( 2 ) ;
206323 console . log ( ) ;
207- console . log ( ` ✅ ${ OUTPUT_PDF } ` ) ;
324+ console . log ( ` ✅ ${ OUTPUT_PDF } ( ${ sizeMB } MB) ` ) ;
208325 console . log ( ) ;
209326}
210327
0 commit comments