Skip to content

Commit de029ee

Browse files
Merge pull request #1 from andrestobelem/tar-epub-fixer
Enhance EPUB processing by implementing TAR file extraction functiona…
2 parents 9f3e06e + 77c6b27 commit de029ee

1 file changed

Lines changed: 72 additions & 0 deletions

File tree

epub_fixer.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import os
2+
import tarfile
3+
import zipfile
4+
import sys
5+
6+
def fix_epub_tar_to_zip(input_path, output_path):
7+
"""
8+
Converts an EPUB file that is erroneously compressed as TAR
9+
to a correct EPUB ZIP file, normalizing its structure.
10+
"""
11+
if not tarfile.is_tarfile(input_path):
12+
print(f"Error: The file '{input_path}' is not a valid TAR file.")
13+
return False
14+
15+
print(f"Opening TAR: {input_path}")
16+
17+
with tarfile.open(input_path, "r:*") as tar:
18+
members = tar.getmembers()
19+
20+
# 1. Identify the root by locating the 'mimetype' file
21+
mimetype_member = next((m for m in members if os.path.basename(m.name) == 'mimetype'), None)
22+
23+
if not mimetype_member:
24+
print("Error: 'mimetype' file not found. Is this a valid EPUB?")
25+
return False
26+
27+
root = os.path.dirname(mimetype_member.name)
28+
if root and not root.endswith('/'):
29+
root += '/'
30+
31+
if root:
32+
print(f"Detected root directory: {root} (it will be stripped)")
33+
else:
34+
print("The file is already at the root level.")
35+
36+
# 2. Create the new ZIP file (correct EPUB)
37+
print(f"Creating ZIP: {output_path}")
38+
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
39+
for member in members:
40+
if member.isfile():
41+
# Extract content
42+
f = tar.extractfile(member)
43+
if f:
44+
# Normalize path: strip the root prefix if it exists
45+
arcname = member.name[len(root):] if member.name.startswith(root) else member.name
46+
47+
# Write to ZIP
48+
zip_file.writestr(arcname, f.read())
49+
print(f" + {arcname}")
50+
51+
print(f"\nSuccess! Fixed file saved at: {output_path}")
52+
return True
53+
54+
if __name__ == "__main__":
55+
if len(sys.argv) < 2:
56+
print("Usage: python epub_fixer.py <input_file.epub> [output_file.epub]")
57+
sys.exit(1)
58+
59+
input_file = sys.argv[1]
60+
61+
if len(sys.argv) > 2:
62+
output_file = sys.argv[2]
63+
else:
64+
# Default: append '_fixed' to the filename
65+
base, ext = os.path.splitext(input_file)
66+
output_file = f"{base}_fixed{ext}"
67+
68+
if fix_epub_tar_to_zip(input_file, output_file):
69+
print("\nProcess finished.")
70+
else:
71+
sys.exit(1)
72+

0 commit comments

Comments
 (0)