Skip to content

Commit 3ed1dfe

Browse files
authored
Merge pull request #2 from jsickcodes/file-suffix
Allow for a custom text file suffix
2 parents 2dc7f72 + 682c02b commit 3ed1dfe

5 files changed

Lines changed: 51 additions & 3 deletions

File tree

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,9 @@ Change log
44
Unreleased
55
----------
66

7+
- ``--prefix`` argument allows you to customize the suffix of the plain text mirror.
8+
9+
0.1.0 (2021-03-18)
10+
------------------
11+
712
Initial release.

README.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,23 @@ Note that this workflow can only run with private repositories.
101101
The ``GITHUB_TOKEN`` secret is not available to public forks.
102102

103103
When using this workflow, contributors need to either pull down the plain text file update to their local branch, or be prepared to use a forced push (``git push --force``) because their branch is "behind" the GitHub origin.
104+
105+
Configuration
106+
=============
107+
108+
This pre-commit hook works out of the box, but does allow for some customization.
109+
110+
Plain text filename suffix
111+
--------------------------
112+
113+
By default, if the Word file is named ``document.docx``, the plain text mirror file is named ``document.txt``.
114+
However, you can customize the suffix of the file name by setting a ``--suffix`` command-line option::
115+
116+
repos:
117+
- repo: https://github.com/jsickcodes/pre-commit-docx-plain
118+
rev: 0.2.0
119+
hooks:
120+
- id: docxplain
121+
args:
122+
- "prefix"
123+
- ".extracted.txt"

src/docxplain/cli.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def main() -> None:
1010
"""Command-line entrypoint."""
1111
parser = create_parser()
1212
args = parser.parse_args()
13-
changed = convert_file(args.source)
13+
changed = convert_file(args.source, suffix=args.suffix)
1414
if changed:
1515
sys.exit(1)
1616
else:
@@ -20,5 +20,8 @@ def main() -> None:
2020
def create_parser() -> argparse.ArgumentParser:
2121
parser = argparse.ArgumentParser(description="Convert docx to plain text.")
2222
parser.add_argument("source")
23+
parser.add_argument(
24+
"--suffix", default=".txt", help="File suffix for plain text file."
25+
)
2326

2427
return parser

src/docxplain/converter.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,18 @@
88
__all__ = ["convert_file", "get_hash"]
99

1010

11-
def convert_file(filename: str) -> bool:
11+
def convert_file(filename: str, suffix: str = ".txt") -> bool:
1212
"""Convert the docx file to plaintext.
1313
14+
Parameters
15+
----------
16+
filename : `str`
17+
Path of the docx file.
18+
suffix : `str`
19+
Suffix for the output plain text file, including ``"."`` prefix.
20+
Default is ``".txt"``, but a suffix like ``".extracted.txt"``
21+
could be useful.
22+
1423
Returns
1524
-------
1625
changed : bool
@@ -20,7 +29,7 @@ def convert_file(filename: str) -> bool:
2029
if not docx_path.is_file():
2130
raise RuntimeError(f"Source file {docx_path} does not exist.")
2231

23-
plain_path = docx_path.with_suffix(".txt")
32+
plain_path = docx_path.with_suffix(suffix)
2433
if plain_path.is_file():
2534
exists = True
2635
initial_hash = get_hash(plain_path)

tests/converter_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,14 @@ def test_new(tmp_path: Path) -> None:
3131
shutil.copytree(repo_data, work_dir)
3232
docxpath = work_dir.joinpath("test_doc.docx")
3333
assert convert_file(str(docxpath)) is True
34+
35+
36+
def test_suffix(tmp_path: Path) -> None:
37+
"""Test the case of a custom plain text file suffix."""
38+
repo_data = Path(__file__).parent.joinpath("data/new")
39+
work_dir = tmp_path / "suffix"
40+
shutil.copytree(repo_data, work_dir)
41+
docxpath = work_dir.joinpath("test_doc.docx")
42+
assert convert_file(str(docxpath), suffix=".extracted.txt") is True
43+
plain_path = work_dir.joinpath("test_doc.extracted.txt")
44+
assert plain_path.is_file()

0 commit comments

Comments
 (0)