strictdoc-project
diff --git a/‎.github/workflows/ci-mac.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci-mac.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/ci_fuzz_linux.yml‎
Lines changed: 62 additions & 0 deletions b/‎.github/workflows/ci_fuzz_linux.yml‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎html2pdf4doc/html2pdf4doc.py‎
Lines changed: 1 addition & 0 deletions b/‎html2pdf4doc/html2pdf4doc.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎html2pdf4doc/html2pdf4doc_fuzzer.py‎
Lines changed: 138 additions & 0 deletions b/‎html2pdf4doc/html2pdf4doc_fuzzer.py‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎requirements.development.txt‎
Lines changed: 6 additions & 0 deletions b/‎requirements.development.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tasks.py‎
Lines changed: 11 additions & 0 deletions b/‎tasks.py‎
Lines changed: 11 additions & 0 deletions
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         python-version: [
-          "3.8", "3.12"
+          "3.9", "3.13"
         ]
 
     steps:
 
@@ -0,0 +1,62 @@
+name: "HTML2PDF4Doc Fuzz Testing on Linux"
+
+on:
+  pull_request:
+    branches: [ "**" ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        python-version: [
+          "3.12"
+        ]
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Upgrade pip
+      run: |
+        python -m pip install --upgrade pip
+
+    - name: Install Python packages
+      run: |
+        pip install -r requirements.development.txt
+
+    - name: Clone html2pdf4doc.js
+      run: |
+        invoke bootstrap
+      env:
+        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Install html2pdf4doc dependencies.
+      run: |
+        python developer/pip_install_html2pdf4doc_deps.py
+
+    - name: Run Lint tasks
+      run: |
+        invoke lint
+
+    - name: Build HTML2PDF4Doc.js
+      run: |
+        invoke build
+
+    - name: Run tests
+      run: |
+        invoke test-fuzz
+
+    - name: Upload broken PDFs as artifact
+      # Always upload, even if job fails.
+      if: failure() || always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: broken-pdfs
+        path: output/
+        retention-days: 30
@@ -9,3 +9,7 @@ tests/integration/.lit_test_times.txt
 tests/integration/**/Output/
 output/
 
+__pycache__/
+
+# Fuzz testing files.
+**.mut.**
@@ -23,6 +23,7 @@
 
 __version__ = "0.0.22"
 
+PATH_TO_HTML2PDF4DOC_PY = __file__
 PATH_TO_HTML2PDF4DOC_JS = os.path.join(
     os.path.dirname(os.path.join(__file__)),
     "html2pdf4doc_js",
 
@@ -0,0 +1,138 @@
+import argparse
+import contextlib
+import datetime
+import os.path
+import random
+import shutil
+import sys
+from pathlib import Path
+from subprocess import CalledProcessError, CompletedProcess, TimeoutExpired, run
+from time import time
+from typing import Iterator, List
+
+from faker import Faker
+from lxml import etree, html
+
+from html2pdf4doc import PATH_TO_HTML2PDF4DOC_PY
+
+
+@contextlib.contextmanager
+def measure_performance(title: str) -> Iterator[None]:
+    time_start = time()
+    yield
+    time_end = time()
+
+    time_diff = time_end - time_start
+    padded_name = f"{title} ".ljust(60, ".")
+    padded_time = f" {time_diff:0.2f}".rjust(6, ".")
+    print(f"{padded_name}{padded_time}s", flush=True)  # noqa: T201
+
+
+def mutate_and_print(path_to_input_file: str) -> bool:
+    assert os.path.isfile(path_to_input_file), path_to_input_file
+
+    text = open(path_to_input_file, encoding="utf-8").read()
+
+    # Parse HTML into DOM
+    tree = html.fromstring(text)
+
+    # Pick a random element
+    elems = tree.xpath("//p | //td")
+    if elems:
+        for _i in range(10):
+            node = random.choice(elems)
+
+            print("Mutating node:", node.tag)  # noqa: T201
+
+            n_sentences = random.randint(1, 100)
+
+            fake = Faker()
+            extra_text = fake.text(max_nb_chars=10 * n_sentences)
+
+            node.text = extra_text
+
+    # Serialize back to HTML
+    mutated_html = etree.tostring(
+        tree, pretty_print=False, method="html", encoding="unicode"
+    )
+
+    # Save next to input file
+    path_to_mut_html = path_to_input_file + ".mut.html"
+    path_to_mut_pdf = path_to_input_file + ".mut.html.pdf"
+    with open(path_to_mut_html, "w", encoding="utf-8") as f:
+        f.write(mutated_html)
+
+    print("Wrote mutated file:", path_to_mut_html)  # noqa: T201
+
+    paths_to_print = [(path_to_mut_html, path_to_mut_pdf)]
+
+    cmd: List[str] = [
+        sys.executable,
+        PATH_TO_HTML2PDF4DOC_PY,
+        "print",
+        "--strict",
+    ]
+
+    for path_to_print_ in paths_to_print:
+        cmd.append(path_to_print_[0])
+        cmd.append(path_to_print_[1])
+
+    with measure_performance(
+        "PDFPrintDriver: printing HTML to PDF using HTML2PDF and Chrome Driver"
+    ):
+        try:
+            _: CompletedProcess[bytes] = run(
+                cmd,
+                capture_output=False,
+                check=True,
+            )
+        except CalledProcessError as called_process_error_:
+            print(called_process_error_)  # noqa: T201
+
+            Path("output/").mkdir(exist_ok=True)
+
+            mut_html_filename = Path(path_to_mut_html).stem
+            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            path_to_mut_html_out = os.path.join(
+                "output", f"{mut_html_filename}.{timestamp}.html"
+            )
+            shutil.copy(path_to_mut_html, path_to_mut_html_out)
+
+            path_to_mut_pdf_out = os.path.join(
+                "output", f"{mut_html_filename}.{timestamp}.pdf"
+            )
+            shutil.copy(path_to_mut_pdf, path_to_mut_pdf_out)
+
+            print(  # noqa: T201
+                f"Saved failed mutated HTML as:\n"
+                f"HTML: {path_to_mut_html_out}\n"
+                f"PDF: {path_to_mut_pdf_out}"
+            )
+            return False
+        except TimeoutExpired:
+            raise TimeoutError from None
+    return True
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("input_file", type=str, help="TODO")
+    args = parser.parse_args()
+
+    path_to_input_file = args.input_file
+
+    success_count, failure_count = 0, 0
+    for i in range(1, 100):
+        print(  # noqa: T201
+            f"--- Printing cycle #{i} — So far: 🟢{success_count} / 🔴{failure_count} ---"
+        )
+        success = mutate_and_print(path_to_input_file)
+        if success:
+            success_count += 1
+        else:
+            failure_count += 1
+
+
+if __name__ == "__main__":
+    main()
@@ -70,6 +70,7 @@ development = [
 
 [project.scripts]
 html2pdf4doc = "html2pdf4doc.html2pdf4doc:main"
+html2pdf4doc_fuzzer = "html2pdf4doc.html2pdf4doc_fuzzer:main"
 
 [project.urls]
 Changelog = "https://github.com/mettta/html2pdf_python/releases/"
 
@@ -16,3 +16,9 @@ ruff>=0.9
 #
 lit
 filecheck==0.0.24
+
+#
+# Fuzz tests
+#
+faker>=37.8.0
+lxml>=5.3.0
@@ -192,6 +192,17 @@ def test_integration(
     run_invoke(context, itest_command)
 
 
+@task(aliases=["tf"])
+def test_fuzz(context):
+    run_invoke(
+        context,
+        """
+            python html2pdf4doc/html2pdf4doc_fuzzer.py
+                tests/fuzz/01_strictdoc_guide_202510/strictdoc/docs/strictdoc_01_user_guide-PDF.html
+        """,
+    )
+
+
 @task(aliases=["t"])
 def test(context):
     test_integration(context)
Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@ jobs:`
`11`	`11`	`strategy:`
`12`	`12`	`matrix:`
`13`	`13`	`python-version: [`
`14`		`- "3.8", "3.12"`
	`14`	`+ "3.9", "3.13"`
`15`	`15`	`]`
`16`	`16`
`17`	`17`	`steps:`