-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathbuild_pdf_book.py
More file actions
96 lines (78 loc) · 2.65 KB
/
build_pdf_book.py
File metadata and controls
96 lines (78 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
This script is used to build the pdf book from DeepChem Tutorials.
Requirements:
- pdfunite
- pdfkit
- mdpdf
Example Usage:
- Run the script "fetch_tutorials.py" // It will fetch all the tutorials.
- Run the script "build_pdf_book.py"
- It may cause error, mostly due to the type of graphic used in some tutorials
which donot compile properly, remove them from the website-render-order or fix
them, and run this script again.
NOTE:
- NO FILES OR DIRECTORIES HAVE TO BE CREATED MANUALLY. The script will create the required directories and files.
- Run scripts in the Top-Level folder.
"""
import os
import pandas as pd
import pdfkit
from utils import numeric_sorter
from typing import List
INFO_PATH = "./website-render-order/"
DATA_PATH = "./html-notebooks/"
PDF_PATH = "./storage/"
files = os.listdir(INFO_PATH)
files = sorted(files)
files_list = numeric_sorter(files)
def html_to_pdf():
"""
Converts HTML files to PDF files.
Raises
------
ProtocolUnknownError
If it faces some unknown kind of graphic.
"""
for i in files_list:
chapter = pd.read_csv(INFO_PATH + "-".join(i))
for j in chapter["File Name"]:
print(i, j)
try:
pdfkit.from_file(DATA_PATH + j[:-5] + "html", PDF_PATH + j[:-5] + "pdf")
except:
pass
def merge_pdf():
"""Merges the compiled PDFs."""
command = "pdfunite "
for i in files_list:
chapter = pd.read_csv(INFO_PATH + "-".join(i))
for j in chapter["File Name"]:
print(i, j)
command = command + PDF_PATH + j[:-5] + "pdf "
os.system(command + "merged.pdf")
def merge_pdf_pages(a: List[str]):
"""Merges the PDFs.
Usage include adding title page, ending, etc.
Parameters
----------
a: List[str]
List of addresses of pdf to merge. In correct order.
"""
command = "pdfunite "
for i in a:
command = command + i + ' '
os.system(command + "storage/merged.pdf")
def compile_information_pages():
"""Converts the Acknowledgent page and content page from
Html to pdf, then they can be merged with the content
pdf using `merge_pdf_pages` function.
"""
pdfkit.from_file('title.html', 'storage/title.pdf')
pdfkit.from_file('contents.html', 'storage/contents.pdf')
pdfkit.from_file('acknowledgement.html', 'storage/acknowledgement.pdf')
if __name__ == "__main__":
os.system("mkdir " + PDF_PATH)
html_to_pdf()
merge_pdf()
compile_information_pages()
merge_pdf_pages(['storage/title.pdf', 'storage/acknowledgement.pdf', 'storage/contents.pdf', 'storage/full_pdf.pdf'])