Skip to content

Commit b6f9914

Browse files
authored
Merge pull request #73 from davidbetz/MODERNIZE_AND_CONTAINERIZE
Add Dockerfile, splitByBook, and and fix invalid JSON
2 parents 9adc6ae + 5deb91c commit b6f9914

5 files changed

Lines changed: 60 additions & 13 deletions

File tree

.dockerignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
.git/
2+
HomeFiles/
3+
MAPM/
4+
OSHB Graphics/
5+
WlcWordList/
6+
oxlos-import/
7+
parsing/
8+
read/
9+
structure/
10+
.gitignore

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,7 @@
1616

1717
# Autogenerated files from structure/write_wlc_chapters.py
1818
/structure/OshbVerse/chapters
19+
20+
json/
21+
remapped.json
22+
hebrew.json

Dockerfile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FROM python:3
2+
3+
WORKDIR /var/app
4+
5+
COPY morphhbXML-to-JSON.py /var/app
6+
COPY wlc /var/app/wlc
7+
8+
ENTRYPOINT ["python", "morphhbXML-to-JSON.py"]

morphhbXML-to-JSON.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
import getopt
88
import json
9+
import os
910
from xml.etree import ElementTree as ET
1011

1112
bookNameData = {}
@@ -55,6 +56,7 @@
5556
stripHFromMorph = False
5657
prefixLemmasWithH = False
5758
remapVerses = False
59+
splitByBook = False
5860

5961

6062
def getBookData(filename):
@@ -134,6 +136,7 @@ def getCommandOptions(argv):
134136
global stripHFromMorph
135137
global prefixLemmasWithH
136138
global remapVerses
139+
global splitByBook
137140

138141
try:
139142
opts, args = getopt.getopt(argv, "h:",
@@ -142,14 +145,15 @@ def getCommandOptions(argv):
142145
"removeLemmaTypes",
143146
"stripHFromMorph",
144147
"prefixLemmasWithH",
145-
"remapVerses"
148+
"remapVerses",
149+
"splitByBook"
146150
])
147151
except getopt.GetoptError:
148-
print('python3 morphhb.py --stripPointing --removeLemmaTypes --stripHFromMorph --prefixLemmasWithH --remapVerses')
152+
print('python3 morphhb.py --stripPointing --removeLemmaTypes --stripHFromMorph --prefixLemmasWithH --remapVerses --splitByBook')
149153
sys.exit(2)
150154
for opt, arg in opts:
151155
if opt == '-h':
152-
print('python3 morphhb.py --stripPointing --removeLemmaTypes --stripHFromMorph --prefixLemmasWithH --remapVerses')
156+
print('python3 morphhb.py --stripPointing --removeLemmaTypes --stripHFromMorph --prefixLemmasWithH --remapVerses --splitByBook')
153157
sys.exit()
154158
elif opt in ("--stripPointing"):
155159
print('stripPointing')
@@ -166,6 +170,9 @@ def getCommandOptions(argv):
166170
elif opt in ("--remapVerses"):
167171
print('remapVerses')
168172
remapVerses = True
173+
elif opt in ("--splitByBook"):
174+
print('splitByBook')
175+
splitByBook = True
169176

170177

171178
def main():
@@ -259,17 +266,25 @@ def main():
259266
remapped['Psalms'][12][4][6:16] = []
260267
remapped['Psalms'][12][5][0: 6] = []
261268

262-
jsonStr = str(hebrew)
263269
if remapVerses:
264-
jsonStr = str(remapped)
265-
266-
jsonStr = re.sub(r'(?<=\},)', '\n', jsonStr)
267-
if stripPointing:
268-
jsonStr = stripPointingFunc(jsonStr)
269-
270-
print("var morphhb={};".format(jsonStr))
271-
print('')
272-
print('module.exports=morphhb;')
270+
final = remapped
271+
else:
272+
final = hebrew
273+
274+
name = 'remapped' if remapVerses else 'hebrew'
275+
276+
if splitByBook:
277+
output_dir = os.path.join('./json', name)
278+
if not os.path.exists(output_dir):
279+
os.makedirs(output_dir)
280+
281+
for book in final:
282+
target_file = os.path.join(output_dir, book.replace(" ", "").lower())
283+
with open(target_file + '.json', 'w', encoding='utf8') as f:
284+
json.dump(final[book], f, ensure_ascii=False)
285+
else:
286+
with open(name + '.json', 'w', encoding='utf8') as f:
287+
json.dump(final, f, ensure_ascii=False)
273288

274289

275290
if __name__ == "__main__":

readme.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,13 @@ The perl script which generates this is called morphhbXML-to-JSON.pl. It has sev
8080

8181
You can run this script like so:
8282
`perl morphhbXML-to-JSON.pl --stripPointing --removeLemmaTypes --prefixLemmasWithH --remapVerses`
83+
84+
## Updated
85+
86+
The Python script has been updated to reflect contemporary data practices and to promote cross-platform cooperation.
87+
88+
Use `--splitByBook` to create a JSON file per book.
89+
90+
This is usable directly or via Docker:
91+
92+
docker build . -t local/morphhb && docker run -it -v `pwd`:/var/app local/morphhb

0 commit comments

Comments
 (0)