Skip to content

Commit 4d572f1

Browse files
committed
Rewrite internal PDF links and embed favicon
Embed the site favicon into the generated cover and update the PDF generation flow to rewrite internal documentation links as in-PDF GoTo actions. Changes: inject favicon data URI into pdf-cover.html, adjust cover styles/branding, add PROD_URL, track per-document page offsets while crawling, merge PDFs into a single PDFDocument, and rewrite URI annotations to GoTo destinations using pdf-lib internals (buildPathLookup, extractAnnotUri, rewriteInternalLinks). Also update stage logging/counts and remove an unused sample image (static/img/sample-app.png).
1 parent a1f8ec6 commit 4d572f1

3 files changed

Lines changed: 157 additions & 57 deletions

File tree

scripts/gen-pdf.mjs

Lines changed: 147 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* 1. Render the branded cover (HTML → PDF)
77
* 2. Crawl every documentation page following "next" links, printing each to PDF
88
* 3. Merge everything into a single PDF with pdf-lib
9+
* 4. Rewrite internal links as in-PDF GoTo hyperlinks
910
*
1011
* Prerequisites:
1112
* - Docusaurus serve running on localhost:3000
@@ -15,16 +16,18 @@
1516
import { readFileSync, writeFileSync } from 'node:fs';
1617
import { fileURLToPath } from 'node:url';
1718
import { dirname, join } from 'node:path';
18-
import { PDFDocument } from 'pdf-lib';
19+
import { PDFDocument, PDFName, PDFDict, PDFString, PDFHexString, PDFArray } from 'pdf-lib';
1920

2021
const __filename = fileURLToPath(import.meta.url);
2122
const __dirname = dirname(__filename);
2223
const ROOT = join(__dirname, '..');
2324

2425
const COVER_HTML = join(__dirname, 'pdf-cover.html');
2526
const STYLE_CSS = join(__dirname, 'pdf-style.css');
27+
const FAVICON = join(ROOT, 'static', 'img', 'favicon.png');
2628
const OUTPUT_PDF = join(ROOT, 'turing-es-2026.1-documentation.pdf');
2729
const BASE_URL = process.env.PDF_BASE_URL || 'http://localhost:3000';
30+
const PROD_URL = process.env.PDF_PROD_URL || 'https://docs.viglet.com';
2831
const ENTRY_PATH = '/turing/getting-started/intro';
2932

3033
const HEADER_HTML = [
@@ -60,10 +63,15 @@ async function launchBrowser() {
6063
Stage 1 — Cover (HTML → PDF)
6164
────────────────────────────────────────────────────── */
6265
async function generateCover() {
63-
console.log(' [1/3] Rendering cover pages …');
66+
console.log(' [1/4] Rendering cover pages …');
6467

6568
const page = await browser.newPage();
66-
const html = readFileSync(COVER_HTML, 'utf-8');
69+
70+
// Embed favicon as base64 data URI in the cover HTML
71+
const faviconB64 = readFileSync(FAVICON).toString('base64');
72+
const faviconDataUri = `data:image/png;base64,${faviconB64}`;
73+
const html = readFileSync(COVER_HTML, 'utf-8')
74+
.replaceAll('FAVICON_DATA_URI', faviconDataUri);
6775

6876
await page.setContent(html, { waitUntil: 'networkidle0' });
6977
await page.evaluateHandle('document.fonts.ready');
@@ -81,33 +89,43 @@ async function generateCover() {
8189

8290
/* ──────────────────────────────────────────────────────
8391
Stage 2 — Crawl doc pages & print each to PDF
92+
Returns { buffers: Buffer[], pageMap: Map<string, number> }
93+
pageMap maps URL path → starting page index in merged PDF
8494
────────────────────────────────────────────────────── */
85-
async function generateDocs() {
86-
console.log(' [2/3] Generating documentation pages …');
95+
async function generateDocs(coverPageCount) {
96+
console.log(' [2/4] Generating documentation pages …');
8797

8898
const cssContent = readFileSync(STYLE_CSS, 'utf-8');
8999
const page = await browser.newPage();
90-
const pagePDFs = [];
100+
const buffers = [];
101+
const pageMap = new Map(); // path → page index in final PDF
91102
let url = `${BASE_URL}${ENTRY_PATH}`;
92103
const visited = new Set();
104+
let cumulativePages = coverPageCount;
93105

94106
while (url) {
95-
// Normalise to avoid revisiting with trailing slashes etc.
96107
const normalized = url.replace(/\/+$/, '');
97108
if (visited.has(normalized)) break;
98109
visited.add(normalized);
99110

100-
const pageNum = visited.size;
101-
process.stdout.write(` [${pageNum}] ${normalized.replace(BASE_URL, '')} … `);
111+
const docNum = visited.size;
112+
const path = normalized.replace(BASE_URL, '');
113+
process.stdout.write(` [${docNum}] ${path} … `);
102114

103115
await page.goto(url, { waitUntil: 'networkidle0', timeout: 30_000 });
104116

117+
// Capture next URL BEFORE modifying the DOM
118+
const nextUrl = await page.evaluate(() => {
119+
const next = document.querySelector('a.pagination-nav__link--next');
120+
return next?.href ?? null;
121+
});
122+
105123
// Inject our PDF stylesheet
106124
await page.addStyleTag({ content: cssContent });
107125

108-
// Remove chrome elements from the DOM entirely
126+
// Remove chrome elements from the DOM
109127
await page.evaluate(() => {
110-
const selectors = [
128+
const remove = [
111129
'.navbar', '.nav-root', 'nav.navbar',
112130
'footer', '.footer',
113131
'.pagination-nav',
@@ -120,11 +138,14 @@ async function generateDocs() {
120138
'.theme-doc-version-badge',
121139
'.col--3',
122140
];
123-
for (const sel of selectors) {
141+
for (const sel of remove) {
124142
document.querySelectorAll(sel).forEach((el) => el.remove());
125143
}
126144
});
127145

146+
// Record page mapping BEFORE printing
147+
pageMap.set(path, cumulativePages);
148+
128149
// Print this page to PDF
129150
const pdfBuf = await page.pdf({
130151
format: 'A4',
@@ -135,26 +156,27 @@ async function generateDocs() {
135156
margin: { top: '25mm', bottom: '20mm', left: '15mm', right: '15mm' },
136157
});
137158

138-
pagePDFs.push(pdfBuf);
139-
console.log('✓');
159+
// Count how many PDF pages this doc produced
160+
const tmpDoc = await PDFDocument.load(pdfBuf);
161+
const pageCount = tmpDoc.getPageCount();
162+
cumulativePages += pageCount;
140163

141-
// Find the "next" pagination link
142-
url = await page.evaluate(() => {
143-
const next = document.querySelector('a.pagination-nav__link--next');
144-
return next ? next.href : null;
145-
});
164+
buffers.push(pdfBuf);
165+
console.log(`✓ (${pageCount}p)`);
166+
167+
url = nextUrl;
146168
}
147169

148170
await page.close();
149-
console.log(` ${pagePDFs.length} pages generated ✓`);
150-
return pagePDFs;
171+
console.log(` ${buffers.length} sections generated ✓`);
172+
return { buffers, pageMap };
151173
}
152174

153175
/* ──────────────────────────────────────────────────────
154176
Stage 3 — Merge all PDFs (pdf-lib)
155177
────────────────────────────────────────────────────── */
156178
async function mergePDFs(coverBuf, docBuffers) {
157-
console.log(' [3/3] Merging PDFs …');
179+
console.log(' [3/4] Merging PDFs …');
158180

159181
const merged = await PDFDocument.create();
160182

@@ -178,11 +200,91 @@ async function mergePDFs(coverBuf, docBuffers) {
178200
merged.setProducer('pdf-lib + Puppeteer');
179201
merged.setCreationDate(new Date());
180202

181-
const mergedBytes = await merged.save();
182-
writeFileSync(OUTPUT_PDF, mergedBytes);
203+
return merged;
204+
}
183205

184-
const sizeMB = (mergedBytes.length / 1_048_576).toFixed(2);
185-
console.log(` Merged PDF saved (${sizeMB} MB) ✓`);
206+
/* ──────────────────────────────────────────────────────
207+
Stage 4 — Rewrite internal links as in-PDF GoTo
208+
────────────────────────────────────────────────────── */
209+
210+
/** Build lookup table: URL path → page index in merged PDF */
211+
function buildPathLookup(pageMap) {
212+
const lookup = new Map();
213+
for (const [path, pageIdx] of pageMap) {
214+
const clean = path.replace(/\/+$/, '');
215+
lookup.set(clean, pageIdx);
216+
if (clean.endsWith('/intro')) {
217+
lookup.set(clean.replace(/\/intro$/, ''), pageIdx);
218+
}
219+
}
220+
return lookup;
221+
}
222+
223+
/** Extract URI string from a PDF annotation's action dict, or null */
224+
function extractAnnotUri(annot, context) {
225+
const aRef = annot.get(PDFName.of('A'));
226+
if (!aRef) return null;
227+
const aDict = context.lookup(aRef);
228+
if (!(aDict instanceof PDFDict)) return null;
229+
230+
const sName = aDict.get(PDFName.of('S'));
231+
if (!sName || sName.toString() !== '/URI') return null;
232+
233+
const uriObj = aDict.get(PDFName.of('URI'));
234+
if (!uriObj) return null;
235+
if (uriObj instanceof PDFString || uriObj instanceof PDFHexString) {
236+
return { uri: uriObj.decodeText(), aDict };
237+
}
238+
return null;
239+
}
240+
241+
/** Resolve a URI to an internal doc path, or null */
242+
function resolveInternalPath(uri) {
243+
let path = null;
244+
if (uri.startsWith(BASE_URL)) path = uri.slice(BASE_URL.length);
245+
else if (uri.startsWith(PROD_URL)) path = uri.slice(PROD_URL.length);
246+
if (!path) return null;
247+
const hashIdx = path.indexOf('#');
248+
return (hashIdx >= 0 ? path.slice(0, hashIdx) : path).replace(/\/+$/, '');
249+
}
250+
251+
async function rewriteInternalLinks(merged, pageMap) {
252+
console.log(' [4/4] Rewriting internal links …');
253+
254+
const context = merged.context;
255+
const pathToPage = buildPathLookup(pageMap);
256+
let rewritten = 0;
257+
258+
for (let i = 0; i < merged.getPageCount(); i++) {
259+
const annotsRef = merged.getPage(i).node.get(PDFName.of('Annots'));
260+
if (!annotsRef) continue;
261+
262+
const annots = context.lookup(annotsRef);
263+
if (!(annots instanceof PDFArray)) continue;
264+
265+
for (let j = 0; j < annots.size(); j++) {
266+
const annot = context.lookup(annots.get(j));
267+
if (!(annot instanceof PDFDict)) continue;
268+
269+
const result = extractAnnotUri(annot, context);
270+
if (!result) continue;
271+
272+
const pathOnly = resolveInternalPath(result.uri);
273+
if (!pathOnly) continue;
274+
275+
const targetIdx = pathToPage.get(pathOnly);
276+
if (targetIdx === undefined) continue;
277+
278+
// Rewrite URI action → GoTo action
279+
const targetRef = merged.getPage(targetIdx).ref;
280+
result.aDict.set(PDFName.of('S'), PDFName.of('GoTo'));
281+
result.aDict.delete(PDFName.of('URI'));
282+
result.aDict.set(PDFName.of('D'), context.obj([targetRef, PDFName.of('Fit')]));
283+
rewritten++;
284+
}
285+
}
286+
287+
console.log(` ${rewritten} links rewritten as in-PDF navigation ✓`);
186288
}
187289

188290
/* ──────────────────────────────────────────────────────
@@ -197,14 +299,29 @@ async function main() {
197299

198300
await launchBrowser();
199301

200-
const coverBuf = await generateCover();
201-
const docBufs = await generateDocs();
202-
await mergePDFs(coverBuf, docBufs);
302+
// Stage 1 — Cover
303+
const coverBuf = await generateCover();
304+
const coverDoc = await PDFDocument.load(coverBuf);
305+
const coverPageCount = coverDoc.getPageCount();
306+
307+
// Stage 2 — Crawl & print docs
308+
const { buffers, pageMap } = await generateDocs(coverPageCount);
309+
310+
// Stage 3 — Merge
311+
const merged = await mergePDFs(coverBuf, buffers);
312+
313+
// Stage 4 — Rewrite internal links
314+
await rewriteInternalLinks(merged, pageMap);
315+
316+
// Save final PDF
317+
const mergedBytes = await merged.save();
318+
writeFileSync(OUTPUT_PDF, mergedBytes);
203319

204320
await browser.close();
205321

322+
const sizeMB = (mergedBytes.length / 1_048_576).toFixed(2);
206323
console.log();
207-
console.log(` ✅ ${OUTPUT_PDF}`);
324+
console.log(` ✅ ${OUTPUT_PDF} (${sizeMB} MB)`);
208325
console.log();
209326
}
210327

scripts/pdf-cover.html

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -202,22 +202,16 @@
202202
gap: 4mm;
203203
}
204204

205-
.product-dot {
206-
width: 10px;
207-
height: 10px;
208-
border-radius: 2px;
209-
display: inline-block;
205+
.brand-logo {
206+
width: 24px;
207+
height: 24px;
210208
}
211-
.dot-turing { background: #4169E1; }
212-
.dot-shio { background: #FF6347; }
213-
.dot-dumont { background: #006400; }
214209

215210
.brand-name {
216211
font-size: 11pt;
217212
font-weight: 700;
218-
color: #64748b;
213+
color: #94a3b8;
219214
letter-spacing: 0.08em;
220-
margin-left: 2mm;
221215
}
222216
.brand-url {
223217
font-size: 9pt;
@@ -421,14 +415,9 @@
421415
align-items: center;
422416
gap: 3mm;
423417
}
424-
.inner-brand-squares {
425-
display: flex;
426-
gap: 1.5px;
427-
}
428-
.inner-brand-sq {
429-
width: 8px;
430-
height: 8px;
431-
border-radius: 1.5px;
418+
.inner-brand-logo {
419+
width: 28px;
420+
height: 28px;
432421
}
433422
.inner-brand-text {
434423
font-size: 12pt;
@@ -657,10 +646,8 @@
657646

658647
<!-- Bottom branding -->
659648
<div class="bottom-brand">
660-
<span class="product-dot dot-dumont"></span>
661-
<span class="product-dot dot-shio"></span>
662-
<span class="product-dot dot-turing"></span>
663-
<span class="brand-name">VIGLET</span>
649+
<img class="brand-logo" src="FAVICON_DATA_URI" alt="Viglet" />
650+
<span class="brand-name">viglet</span>
664651
<span class="brand-url">viglet.com</span>
665652
</div>
666653
</div>
@@ -738,11 +725,7 @@
738725
<!-- Bottom -->
739726
<div class="inner-bottom">
740727
<div class="inner-brand">
741-
<div class="inner-brand-squares">
742-
<div class="inner-brand-sq" style="background:#006400;"></div>
743-
<div class="inner-brand-sq" style="background:#FF6347;"></div>
744-
<div class="inner-brand-sq" style="background:#4169E1;"></div>
745-
</div>
728+
<img class="inner-brand-logo" src="FAVICON_DATA_URI" alt="Viglet" />
746729
<span class="inner-brand-text">viglet</span>
747730
</div>
748731
<span class="inner-url">docs.viglet.com</span>

static/img/sample-app.png

-190 KB
Binary file not shown.

0 commit comments

Comments
 (0)