Update

CN-RBL · CN-RBL · commit 0341a97d47e6 · 2026-02-14T23:03:53.000+08:00
diff --git a/file/ptools/main.py b/file/ptools/main.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-import markdown
+import markdown_it
 import platform
 import importlib.metadata
 from rich.logging import RichHandler
@@ -9,6 +9,8 @@
 from rich.text import Text
 import logging
 import os
+from lxml import html as html2
+from lxml import etree
 
 __version__: str = "Beta 0.1"
 
@@ -25,41 +27,140 @@ def main() -> int:
     logging.basicConfig(
         level=logging.DEBUG, format="%(message)s", handlers=[RichHandler()]
     )
+
     logging.info("Starting main process.")
     logging.debug(f"Platform: {platform.platform()}")
     logging.debug(f"Python version: {platform.python_version()}")
-    logging.debug(f"markdown module version: {markdown.__version__}")
+    logging.debug(f"markdown-it module version: {markdown_it.__version__}")
     logging.debug(f"rich module version: {importlib.metadata.version('rich')}")
     logging.debug(f"PTools module version: {__version__}")
-    input_paths: set[str] = set(console.input(
-        "Input your markdown file [bold]path[/bold] " '("|" to split): '
-    ).split("|"))
+
+    input_paths: set[str] = set(
+        console.input(
+            "Input your markdown file [bold]path[/bold] " '("|" to split): '
+        ).split("|")
+    )
     logging.debug(f"Input paths: {input_paths}")
     # Does the file exist? Is it a file?
     vinput_paths: list[str] = []
     for path in input_paths:
         if not os.path.exists(path):
-            logging.warning(f"File not found: {path}")
+            logging.warning(f'File not found: "{path}"')
         elif not os.path.isfile(path):
             logging.warning(f"Path is not a file: {path}")
         elif not (path.endswith(".md") or path.endswith(".markdown")):
-            logging.warning(f"Path is not a markdown file: {path}")
+            logging.warning(f'Path is not a markdown file: "{path}"')
         else:
             vinput_paths.append(path)
     logging.debug(f"Valid paths: {vinput_paths}")
     if not vinput_paths:
         logging.error("No valid input files.")
         return 1
     del input_paths
+
     output_dir: str = console.input(
-        "Input your output directory [bold]path[/bold]: "
+        "Input your output directory [bold]path[/bold]: "  # ignore
     )
     if not os.path.exists(output_dir):
-        logging.error(f"Output directory not found: {output_dir}")
+        logging.error(f'Output directory not found: "{output_dir}"')
         return 1
     elif not os.path.isdir(output_dir):
-        logging.error(f"Output path is not a directory: {output_dir}")
+        logging.error(f'Output path is not a directory: "{output_dir}"')
         return 1
+    logging.debug(f'Output directory: "{output_dir}"')
+
+    template: str = console.input(
+        "Input your HTML template file [bold]path[/bold] "
+        "(optional, press Enter to skip): "
+    )
+    if not os.path.exists(template):
+        logging.error(f'Template file not found: "{template}"')
+        template = ""
+    elif not os.path.isfile(template):
+        logging.error(f'Template path is not a file: "{template}"')
+        template = ""
+    elif not (template.endswith(".html") or template.endswith(".htm")):
+        logging.error(f'Template path is not a HTML file: "{template}"')
+        template = ""
+    if template:
+        logging.debug(f'Template file: "{template}"')
+        with open(template, "r", encoding="utf-8") as f:
+            template_content: str = f.read()
+
+    logging.info("Strarting markdown to HTML conversion.")
+    md = markdown_it.MarkdownIt("gfm-like", {"typographer": True})
+    md.enable(["replacements", "smartquotes"])
+    for path in vinput_paths:
+        with open(path, "r", encoding="utf-8") as f:
+            content: str = f.read()
+        html: str = md.render(content)
+        output_path: str = os.path.join(
+            output_dir, os.path.basename(path).replace(".md", ".html")
+        )
+        if template:
+            title = html2.fromstring(html).xpath(".//h1")
+            title = title[0].text_content() if title else "Untitled"
+            template_content = template_content.replace("%%title%%", title)
+            html = template_content.replace("%%content%%", html)
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(html)
+                # pretty print
+        pretty_input: str = console.input(
+            "Is it necessary to format the output HTML file?(Y/N): "
+        )
+        if pretty_input.lower() in ["y", "yes"]:
+            with open(output_path, "r", encoding="utf-8") as f:
+                raw_html = f.read()
+
+            def pretty_print_html(html_str: str) -> str:
+                """智能格式化HTML，保留完整结构和DOCTYPE。"""
+                from io import StringIO
+                try:
+                    # 使用 etree.HTMLParser 解析，它能保留 DOCTYPE
+                    parser = etree.HTMLParser(remove_blank_text=False)  # 保留空白以便格式化
+                    tree = etree.parse(StringIO(html_str), parser)
+                    doctype = tree.docinfo.doctype if tree.docinfo.doctype else ''
+                    root = tree.getroot()
+                    # 格式化根元素
+                    formatted_root = etree.tostring(
+                        root,
+                        encoding='unicode',
+                        pretty_print=True,
+                        method='html'
+                    )
+                    # 如果存在 DOCTYPE，则拼接到前面
+                    if doctype:
+                        return doctype + '\n' + formatted_root
+                    else:
+                        return formatted_root
+                except Exception as e:
+                    # 如果解析为完整文档失败（例如纯片段），回退到片段处理
+                    logging.warning(f"完整文档解析失败，尝试片段模式: {e}")
+                    try:
+                        fragments = html2.fragments_fromstring(html_str)
+                        pretty_parts = []
+                        for frag in fragments:
+                            if isinstance(frag, str):
+                                pretty_parts.append(frag)
+                            else:
+                                pretty_parts.append(
+                                    etree.tostring(
+                                        frag,
+                                        encoding='unicode',
+                                        pretty_print=True,
+                                        method='html'
+                                    )
+                                )
+                        return ''.join(pretty_parts)
+                    except Exception as e2:
+                        logging.error(f"片段解析也失败，返回原始内容: {e2}")
+                        return html_str
+
+            pretty_html = pretty_print_html(raw_html)
+            with open(output_path, "w", encoding="utf-8") as f:
+                f.write(pretty_html)
+            console.print(pretty_html)
+        logging.info(f'Converted "{path}" to "{output_path}". OK!')
     logging.info("Finished main process.")
     return 0
 
diff --git a/template.html b/template.html
@@ -0,0 +1,45 @@
+<!-- -*- conding: utf-8 -*- -->
+<!-- 2026.2.12 -->
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="renderer" content="webkit">
+    <meta name="force-rendering" content="webkit">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <meta name="author" content="红蓝灯">
+    <meta name="keywords" content="红蓝灯, RBL">
+    <meta http-equiv="Content-Language" content="zh-CN">
+    <meta name="robots" content="all">
+    <meta name="description" content="这里是红蓝灯的秘密基地，欢迎你的到来！">
+    <title>%%title%% - 红蓝灯的秘密基地</title>
+    <script src="include/prism.js"></script>
+    <script src="include/script.js"></script>
+    <link href="include/prism.css" rel="stylesheet">
+    <link href="include/style.css" rel="stylesheet">
+    <link href="include/logo.ico" rel="icon">
+</head>
+<body class="line-numbers">
+    <nav id="topnav">
+        <div class="nav-inner">
+            <a href="index.html">首页</a>
+            <a href="download.html">下载</a>
+            <a href="articles.html">文章</a>
+            <a href="about.html">关于</a>
+            <a href="https://github.com/CN-RBL" target="_blank">我的GitHub</a>
+            <a href="https://space.bilibili.com/3546389388724715" target="_blank">我的B站</a>
+        </div>
+    </nav>
+    <div id="nav-spacer" aria-hidden="true"></div>
+    <div id="main">
+        %%content%%
+    </div>
+    <div id="copyright">
+        <p class="chinese">© 2026 红蓝灯 保留所有权利才怪</p>
+        <p class="english">© 2026 RBL All rights reserved? Hardly.</p>
+        <p class="english" style="text-align: center;">Best view at 1528x740</p>
+        <p class="english" style="text-align: center;">Power by PTools</p>
+    </div>
+</body>
+</html>