Skip to content

Commit 0341a97

Browse files
committed
Update
1 parent 1bf2d66 commit 0341a97

2 files changed

Lines changed: 156 additions & 10 deletions

File tree

file/ptools/main.py

Lines changed: 111 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3-
import markdown
3+
import markdown_it
44
import platform
55
import importlib.metadata
66
from rich.logging import RichHandler
@@ -9,6 +9,8 @@
99
from rich.text import Text
1010
import logging
1111
import os
12+
from lxml import html as html2
13+
from lxml import etree
1214

1315
__version__: str = "Beta 0.1"
1416

@@ -25,41 +27,140 @@ def main() -> int:
2527
logging.basicConfig(
2628
level=logging.DEBUG, format="%(message)s", handlers=[RichHandler()]
2729
)
30+
2831
logging.info("Starting main process.")
2932
logging.debug(f"Platform: {platform.platform()}")
3033
logging.debug(f"Python version: {platform.python_version()}")
31-
logging.debug(f"markdown module version: {markdown.__version__}")
34+
logging.debug(f"markdown-it module version: {markdown_it.__version__}")
3235
logging.debug(f"rich module version: {importlib.metadata.version('rich')}")
3336
logging.debug(f"PTools module version: {__version__}")
34-
input_paths: set[str] = set(console.input(
35-
"Input your markdown file [bold]path[/bold] " '("|" to split): '
36-
).split("|"))
37+
38+
input_paths: set[str] = set(
39+
console.input(
40+
"Input your markdown file [bold]path[/bold] " '("|" to split): '
41+
).split("|")
42+
)
3743
logging.debug(f"Input paths: {input_paths}")
3844
# Does the file exist? Is it a file?
3945
vinput_paths: list[str] = []
4046
for path in input_paths:
4147
if not os.path.exists(path):
42-
logging.warning(f"File not found: {path}")
48+
logging.warning(f'File not found: "{path}"')
4349
elif not os.path.isfile(path):
4450
logging.warning(f"Path is not a file: {path}")
4551
elif not (path.endswith(".md") or path.endswith(".markdown")):
46-
logging.warning(f"Path is not a markdown file: {path}")
52+
logging.warning(f'Path is not a markdown file: "{path}"')
4753
else:
4854
vinput_paths.append(path)
4955
logging.debug(f"Valid paths: {vinput_paths}")
5056
if not vinput_paths:
5157
logging.error("No valid input files.")
5258
return 1
5359
del input_paths
60+
5461
output_dir: str = console.input(
55-
"Input your output directory [bold]path[/bold]: "
62+
"Input your output directory [bold]path[/bold]: " # ignore
5663
)
5764
if not os.path.exists(output_dir):
58-
logging.error(f"Output directory not found: {output_dir}")
65+
logging.error(f'Output directory not found: "{output_dir}"')
5966
return 1
6067
elif not os.path.isdir(output_dir):
61-
logging.error(f"Output path is not a directory: {output_dir}")
68+
logging.error(f'Output path is not a directory: "{output_dir}"')
6269
return 1
70+
logging.debug(f'Output directory: "{output_dir}"')
71+
72+
template: str = console.input(
73+
"Input your HTML template file [bold]path[/bold] "
74+
"(optional, press Enter to skip): "
75+
)
76+
if not os.path.exists(template):
77+
logging.error(f'Template file not found: "{template}"')
78+
template = ""
79+
elif not os.path.isfile(template):
80+
logging.error(f'Template path is not a file: "{template}"')
81+
template = ""
82+
elif not (template.endswith(".html") or template.endswith(".htm")):
83+
logging.error(f'Template path is not a HTML file: "{template}"')
84+
template = ""
85+
if template:
86+
logging.debug(f'Template file: "{template}"')
87+
with open(template, "r", encoding="utf-8") as f:
88+
template_content: str = f.read()
89+
90+
logging.info("Strarting markdown to HTML conversion.")
91+
md = markdown_it.MarkdownIt("gfm-like", {"typographer": True})
92+
md.enable(["replacements", "smartquotes"])
93+
for path in vinput_paths:
94+
with open(path, "r", encoding="utf-8") as f:
95+
content: str = f.read()
96+
html: str = md.render(content)
97+
output_path: str = os.path.join(
98+
output_dir, os.path.basename(path).replace(".md", ".html")
99+
)
100+
if template:
101+
title = html2.fromstring(html).xpath(".//h1")
102+
title = title[0].text_content() if title else "Untitled"
103+
template_content = template_content.replace("%%title%%", title)
104+
html = template_content.replace("%%content%%", html)
105+
with open(output_path, "w", encoding="utf-8") as f:
106+
f.write(html)
107+
# pretty print
108+
pretty_input: str = console.input(
109+
"Is it necessary to format the output HTML file?(Y/N): "
110+
)
111+
if pretty_input.lower() in ["y", "yes"]:
112+
with open(output_path, "r", encoding="utf-8") as f:
113+
raw_html = f.read()
114+
115+
def pretty_print_html(html_str: str) -> str:
116+
"""智能格式化HTML,保留完整结构和DOCTYPE。"""
117+
from io import StringIO
118+
try:
119+
# 使用 etree.HTMLParser 解析,它能保留 DOCTYPE
120+
parser = etree.HTMLParser(remove_blank_text=False) # 保留空白以便格式化
121+
tree = etree.parse(StringIO(html_str), parser)
122+
doctype = tree.docinfo.doctype if tree.docinfo.doctype else ''
123+
root = tree.getroot()
124+
# 格式化根元素
125+
formatted_root = etree.tostring(
126+
root,
127+
encoding='unicode',
128+
pretty_print=True,
129+
method='html'
130+
)
131+
# 如果存在 DOCTYPE,则拼接到前面
132+
if doctype:
133+
return doctype + '\n' + formatted_root
134+
else:
135+
return formatted_root
136+
except Exception as e:
137+
# 如果解析为完整文档失败(例如纯片段),回退到片段处理
138+
logging.warning(f"完整文档解析失败,尝试片段模式: {e}")
139+
try:
140+
fragments = html2.fragments_fromstring(html_str)
141+
pretty_parts = []
142+
for frag in fragments:
143+
if isinstance(frag, str):
144+
pretty_parts.append(frag)
145+
else:
146+
pretty_parts.append(
147+
etree.tostring(
148+
frag,
149+
encoding='unicode',
150+
pretty_print=True,
151+
method='html'
152+
)
153+
)
154+
return ''.join(pretty_parts)
155+
except Exception as e2:
156+
logging.error(f"片段解析也失败,返回原始内容: {e2}")
157+
return html_str
158+
159+
pretty_html = pretty_print_html(raw_html)
160+
with open(output_path, "w", encoding="utf-8") as f:
161+
f.write(pretty_html)
162+
console.print(pretty_html)
163+
logging.info(f'Converted "{path}" to "{output_path}". OK!')
63164
logging.info("Finished main process.")
64165
return 0
65166

template.html

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<!-- -*- conding: utf-8 -*- -->
2+
<!-- 2026.2.12 -->
3+
<!DOCTYPE html>
4+
<html lang="zh-CN">
5+
<head>
6+
<meta charset="UTF-8">
7+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
8+
<meta name="renderer" content="webkit">
9+
<meta name="force-rendering" content="webkit">
10+
<meta name="viewport" content="width=device-width, initial-scale=1">
11+
<meta name="author" content="红蓝灯">
12+
<meta name="keywords" content="红蓝灯, RBL">
13+
<meta http-equiv="Content-Language" content="zh-CN">
14+
<meta name="robots" content="all">
15+
<meta name="description" content="这里是红蓝灯的秘密基地,欢迎你的到来!">
16+
<title>%%title%% - 红蓝灯的秘密基地</title>
17+
<script src="include/prism.js"></script>
18+
<script src="include/script.js"></script>
19+
<link href="include/prism.css" rel="stylesheet">
20+
<link href="include/style.css" rel="stylesheet">
21+
<link href="include/logo.ico" rel="icon">
22+
</head>
23+
<body class="line-numbers">
24+
<nav id="topnav">
25+
<div class="nav-inner">
26+
<a href="index.html">首页</a>
27+
<a href="download.html">下载</a>
28+
<a href="articles.html">文章</a>
29+
<a href="about.html">关于</a>
30+
<a href="https://github.com/CN-RBL" target="_blank">我的GitHub</a>
31+
<a href="https://space.bilibili.com/3546389388724715" target="_blank">我的B站</a>
32+
</div>
33+
</nav>
34+
<div id="nav-spacer" aria-hidden="true"></div>
35+
<div id="main">
36+
%%content%%
37+
</div>
38+
<div id="copyright">
39+
<p class="chinese">© 2026 红蓝灯 保留所有权利才怪</p>
40+
<p class="english">© 2026 RBL All rights reserved? Hardly.</p>
41+
<p class="english" style="text-align: center;">Best view at 1528x740</p>
42+
<p class="english" style="text-align: center;">Power by PTools</p>
43+
</div>
44+
</body>
45+
</html>

0 commit comments

Comments
 (0)