@@ -55,7 +55,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
5555</ summary >
5656< pre > < code class ="python "> def exportJSON() -> None: # pylint: disable=C0103
5757 """Export to JSON."""
58- logger.debug("exporting registry ad JSON")
58+ logger.debug("exporting registry as JSON")
5959 data = registry_data.registry()
6060 json_obj = []
6161 id_ = {
@@ -74,9 +74,144 @@ <h2 class="section-title" id="header-functions">Functions</h2>
7474</ details >
7575< div class ="desc "> < p > Export to JSON.</ p > </ div >
7676</ dd >
77+ < dt id ="src.jsonid.export.export_pronom "> < code class ="name flex ">
78+ < span > def < span class ="ident "> export_pronom</ span > </ span > (< span > ) ‑> None</ span >
79+ </ code > </ dt >
80+ < dd >
81+ < details class ="source ">
82+ < summary >
83+ < span > Expand source code</ span >
84+ </ summary >
85+ < pre > < code class ="python "> def export_pronom() -> None:
86+ """Export a PRONOM compatible set of signatures.
87+
88+ Export is done in two phases. A set of proposed "Baseline" JSON
89+ signatures to catch many JSON instances.
90+
91+ Second the JSONID registry is exported.
92+
93+ Every export has a priority over the other so that there should
94+ be no multiple identification results.
95+ """
96+
97+ # pylint: disable=R0914; too-many local variables.
98+
99+ logger.debug("exporting registry as PRONOM")
100+
101+ reg_data = registry_data.registry()
102+ formats = []
103+
104+ encodings = ("UTF-8", "UTF-16", "UTF-16BE", "UTF-32LE")
105+ priorities = []
106+
107+ increment_id = 0
108+
109+ for encoding in encodings:
110+ all_baseline = pronom.create_baseline_json_sequences(encoding)
111+ for baseline in all_baseline:
112+ increment_id += 1
113+ fmt = pronom.Format(
114+ id=increment_id,
115+ name=f"JSON (Baseline - fmt/817) ({encoding})",
116+ version="",
117+ puid="jsonid:0000",
118+ mime="application/json",
119+ classification="structured text",
120+ external_signatures=[
121+ pronom.ExternalSignature(
122+ id=increment_id,
123+ signature="json",
124+ type=pronom.EXT,
125+ )
126+ ],
127+ internal_signatures=[baseline],
128+ priorities=priorities,
129+ )
130+ priorities.append(f"{increment_id}")
131+ formats.append(fmt)
132+
133+ for encoding in encodings:
134+ for entry in reg_data:
135+ increment_id += 1
136+ json_puid = f"{entry.json()['identifier']};{encoding}"
137+ name_ = f"{entry.json()['name'][0]['@en']} ({encoding})"
138+ markers = entry.json()["markers"]
139+ try:
140+ mime = entry.json()["mime"][0]
141+ except IndexError:
142+ mime = ""
143+ try:
144+ sequences = pronom.process_markers(
145+ copy.deepcopy(markers),
146+ increment_id,
147+ encoding=encoding,
148+ )
149+ except pronom.UnprocessableEntity as err:
150+ logger.error(
151+ "%s %s: cannot handle: %s",
152+ json_puid,
153+ name_,
154+ err,
155+ )
156+ for marker in markers:
157+ logger.debug("--- START ---")
158+ logger.debug("marker: %s", marker)
159+ logger.debug("--- END ---")
160+ continue
161+ fmt = pronom.Format(
162+ id=increment_id,
163+ name=name_,
164+ version="",
165+ puid=json_puid,
166+ mime=mime,
167+ classification="structured text",
168+ external_signatures=[
169+ pronom.ExternalSignature(
170+ id=increment_id,
171+ signature="json",
172+ type=pronom.EXT,
173+ )
174+ ],
175+ internal_signatures=sequences,
176+ priorities=copy.deepcopy(list(set(priorities))),
177+ )
178+ priorities.append(f"{increment_id}")
179+ formats.append(fmt)
180+
181+ pronom.process_formats_and_save(formats, PRONOM_FILENAME)</ code > </ pre >
182+ </ details >
183+ < div class ="desc "> < p > Export a PRONOM compatible set of signatures.</ p >
184+ < p > Export is done in two phases. A set of proposed "Baseline" JSON
185+ signatures to catch many JSON instances.</ p >
186+ < p > Second the JSONID registry is exported.</ p >
187+ < p > Every export has a priority over the other so that there should
188+ be no multiple identification results.</ p > </ div >
189+ </ dd >
77190</ dl >
78191</ section >
79192< section >
193+ < h2 class ="section-title " id ="header-classes "> Classes</ h2 >
194+ < dl >
195+ < dt id ="src.jsonid.export.PRONOMException "> < code class ="flex name class ">
196+ < span > class < span class ="ident "> PRONOMException</ span > </ span >
197+ < span > (</ span > < span > *args, **kwargs)</ span >
198+ </ code > </ dt >
199+ < dd >
200+ < details class ="source ">
201+ < summary >
202+ < span > Expand source code</ span >
203+ </ summary >
204+ < pre > < code class ="python "> class PRONOMException(Exception):
205+ """Exception class if we can't create a PRONOM signature as expected."""</ code > </ pre >
206+ </ details >
207+ < div class ="desc "> < p > Exception class if we can't create a PRONOM signature as expected.</ p > </ div >
208+ < h3 > Ancestors</ h3 >
209+ < ul class ="hlist ">
210+ < li > builtins.Exception</ li >
211+ < li > builtins.BaseException</ li >
212+ </ ul >
213+ </ dd >
214+ </ dl >
80215</ section >
81216</ article >
82217< nav id ="sidebar ">
@@ -92,6 +227,14 @@ <h2 class="section-title" id="header-functions">Functions</h2>
92227< li > < h3 > < a href ="#header-functions "> Functions</ a > </ h3 >
93228< ul class ="">
94229< li > < code > < a title ="src.jsonid.export.exportJSON " href ="#src.jsonid.export.exportJSON "> exportJSON</ a > </ code > </ li >
230+ < li > < code > < a title ="src.jsonid.export.export_pronom " href ="#src.jsonid.export.export_pronom "> export_pronom</ a > </ code > </ li >
231+ </ ul >
232+ </ li >
233+ < li > < h3 > < a href ="#header-classes "> Classes</ a > </ h3 >
234+ < ul >
235+ < li >
236+ < h4 > < code > < a title ="src.jsonid.export.PRONOMException " href ="#src.jsonid.export.PRONOMException "> PRONOMException</ a > </ code > </ h4 >
237+ </ li >
95238</ ul >
96239</ li >
97240</ ul >
0 commit comments