Skip to content

Commit bc0e976

Browse files
author
Ricardo Decal
committed
feat: add strip_pdf_metadata.py tool
1 parent ae334f9 commit bc0e976

3 files changed

Lines changed: 90 additions & 0 deletions

File tree

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,21 @@ Options:
4949
--help Show this message and exit.
5050
```
5151

52+
### [strip_pdf_metadata.py](python/strip_pdf_metadata.py)
53+
54+
Output of `uv run https://tools.ricardodecal.com/python/strip_pdf_metadata.py --help`:
55+
56+
```text
57+
Usage: strip_pdf_metadata.py [OPTIONS] INPUT_FILE [OUTPUT_FILE]
58+
59+
Strip metadata from a PDF file.
60+
61+
If OUTPUT_FILE is not provided, writes to 'stripped_<INPUT_FILE>'.
62+
63+
Options:
64+
--help Show this message and exit.
65+
```
66+
5267
### [yt_transcript.py](python/yt_transcript.py)
5368

5469
Output of `uv run https://tools.ricardodecal.com/python/yt_transcript.py --help`:

index.html

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,17 @@ <h3><a href="python/convert_arrow_to_parquet_streaming.py">convert_arrow_to_parq
202202
dir
203203
--help Show this message and exit.
204204
</code></pre>
205+
<h3><a href="python/strip_pdf_metadata.py">strip_pdf_metadata.py</a></h3>
206+
<p>Output of <code>uv run https://tools.ricardodecal.com/python/strip_pdf_metadata.py --help</code>:</p>
207+
<pre><code class="language-text">Usage: strip_pdf_metadata.py [OPTIONS] INPUT_FILE [OUTPUT_FILE]
208+
209+
Strip metadata from a PDF file.
210+
211+
If OUTPUT_FILE is not provided, writes to 'stripped_&lt;INPUT_FILE&gt;'.
212+
213+
Options:
214+
--help Show this message and exit.
215+
</code></pre>
205216
<h3><a href="python/yt_transcript.py">yt_transcript.py</a></h3>
206217
<p>Output of <code>uv run https://tools.ricardodecal.com/python/yt_transcript.py --help</code>:</p>
207218
<pre><code class="language-text">Usage: yt_transcript.py [OPTIONS] URL [OUTPUT_FILE]

python/strip_pdf_metadata.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python3
2+
# /// script
3+
# requires-python = ">=3.12"
4+
# dependencies = [
5+
# "click",
6+
# "pikepdf",
7+
# ]
8+
# ///
9+
"""
10+
Strip metadata from a single PDF file.
11+
"""
12+
13+
import sys
14+
from pathlib import Path
15+
16+
import click
17+
import pikepdf
18+
19+
20+
def strip_metadata(src: Path, dst: Path) -> None:
21+
"""Strip metadata from a PDF file."""
22+
try:
23+
with pikepdf.open(src) as pdf:
24+
root = pdf.trailer.get("/Root")
25+
if root and "/Metadata" in root:
26+
del root["/Metadata"]
27+
28+
info = pdf.trailer.get("/Info")
29+
if info:
30+
for k in list(info.keys()):
31+
del info[k]
32+
del pdf.trailer["/Info"]
33+
34+
pdf.save(dst)
35+
click.echo(f"Successfully stripped metadata: {src} -> {dst}")
36+
except Exception as e:
37+
click.echo(f"Error stripping metadata from '{src}': {e}", err=True)
38+
sys.exit(1)
39+
40+
41+
@click.command()
42+
@click.argument(
43+
"input_file",
44+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
45+
)
46+
@click.argument(
47+
"output_file",
48+
type=click.Path(writable=True, dir_okay=False, path_type=Path),
49+
required=False,
50+
)
51+
def main(input_file: Path, output_file: Path | None) -> None:
52+
"""
53+
Strip metadata from a PDF file.
54+
55+
If OUTPUT_FILE is not provided, writes to 'stripped_<INPUT_FILE>'.
56+
"""
57+
if output_file is None:
58+
output_file = input_file.with_name(f"stripped_{input_file.name}")
59+
60+
strip_metadata(input_file, output_file)
61+
62+
63+
if __name__ == "__main__":
64+
main()

0 commit comments

Comments
 (0)