Skip to content

Commit 229d7a2

Browse files
committed
Add --header option to add a document header
This allows you to add a header to the top of the plain text file. This is useful for stating that the file is autogenerated.
1 parent 3ed1dfe commit 229d7a2

5 files changed

Lines changed: 55 additions & 3 deletions

File tree

CHANGELOG.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ Change log
44
Unreleased
55
----------
66

7-
- ``--prefix`` argument allows you to customize the suffix of the plain text mirror.
7+
New configuration options from the command line:
8+
9+
- ``--prefix`` option allows you to customize the suffix of the plain text mirror.
10+
- ``--header`` option allows you to add header content to the plain text file.
811

912
0.1.0 (2021-03-18)
1013
------------------

README.rst

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,5 +119,20 @@ However, you can customize the suffix of the file name by setting a ``--suffix``
119119
hooks:
120120
- id: docxplain
121121
args:
122-
- "prefix"
122+
- "--prefix"
123123
- ".extracted.txt"
124+
125+
Plain text file header
126+
----------------------
127+
128+
You can add a header to the plain text file's content by setting the ``--header`` command-line option
129+
This is useful for explaining that the file is autogenerated::
130+
131+
repos:
132+
- repo: https://github.com/jsickcodes/pre-commit-docx-plain
133+
rev: 0.2.0
134+
hooks:
135+
- id: docxplain
136+
args:
137+
- "--header"
138+
- "THIS FILE IS AUTOGENERATED"

src/docxplain/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,10 @@ def create_parser() -> argparse.ArgumentParser:
2323
parser.add_argument(
2424
"--suffix", default=".txt", help="File suffix for plain text file."
2525
)
26+
parser.add_argument(
27+
"--header",
28+
default=None,
29+
help="Content to add to the top of the plain text file.",
30+
)
2631

2732
return parser

src/docxplain/converter.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22

33
import hashlib
44
from pathlib import Path
5+
from typing import Optional
56

67
import pypandoc
78

89
__all__ = ["convert_file", "get_hash"]
910

1011

11-
def convert_file(filename: str, suffix: str = ".txt") -> bool:
12+
def convert_file(
13+
filename: str, suffix: str = ".txt", header: Optional[str] = None
14+
) -> bool:
1215
"""Convert the docx file to plaintext.
1316
1417
Parameters
@@ -19,6 +22,8 @@ def convert_file(filename: str, suffix: str = ".txt") -> bool:
1922
Suffix for the output plain text file, including ``"."`` prefix.
2023
Default is ``".txt"``, but a suffix like ``".extracted.txt"``
2124
could be useful.
25+
header : `str`, optional
26+
Content that is added to the top of the plain text file.
2227
2328
Returns
2429
-------
@@ -38,13 +43,23 @@ def convert_file(filename: str, suffix: str = ".txt") -> bool:
3843

3944
pypandoc.convert_file(str(docx_path), "plain", outputfile=str(plain_path))
4045

46+
if header:
47+
insert_header(plain_path, header)
48+
4149
if exists:
4250
final_hash = get_hash(plain_path)
4351
return final_hash != initial_hash
4452
else:
4553
return True
4654

4755

56+
def insert_header(path: Path, header: str) -> None:
57+
"""Add a header to the beginning of a plain text file."""
58+
content = path.read_text()
59+
content = "\n\n".join((header, content))
60+
path.write_text(content)
61+
62+
4863
def get_hash(path: Path) -> str:
4964
"""Get the SHA256 hash diget of a file."""
5065
m = hashlib.sha256()

tests/converter_test.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,17 @@ def test_suffix(tmp_path: Path) -> None:
4242
assert convert_file(str(docxpath), suffix=".extracted.txt") is True
4343
plain_path = work_dir.joinpath("test_doc.extracted.txt")
4444
assert plain_path.is_file()
45+
46+
47+
def test_header(tmp_path: Path) -> None:
48+
"""Test the case of a customized plain text file header."""
49+
repo_data = Path(__file__).parent.joinpath("data/new")
50+
work_dir = tmp_path / "header"
51+
shutil.copytree(repo_data, work_dir)
52+
docxpath = work_dir.joinpath("test_doc.docx")
53+
header = "This file is autogenerated."
54+
assert convert_file(str(docxpath), header=header) is True
55+
plain_path = docxpath.with_suffix(".txt")
56+
assert plain_path.is_file()
57+
content = plain_path.read_text().splitlines()
58+
assert content[0] == "This file is autogenerated."

0 commit comments

Comments
 (0)