@@ -213,7 +213,7 @@ def write_export_file(
213213 resource_string = json .dumps (data , cls = encoders .JSONEncoder )
214214 logger .info ('Done serializing attributes.' )
215215
216- batch_size = 100
216+ batch_size = 1000
217217 is_collection = resource_type == 'collection'
218218
219219 concepts_qs = Concept .objects .none ()
@@ -236,97 +236,88 @@ def write_export_file(
236236 if version .is_head :
237237 filters ['is_latest_version' ] = True
238238
239+ resource_name = resource_type .title ()
240+
239241 with open ('export.json' , 'w' ) as out :
240242 out .write (f'{ resource_string [:- 1 ]} , "concepts": [' )
241243
242- resource_name = resource_type .title ()
243-
244- if concepts_qs .exists ():
245- logger .info (f'{ resource_name } has concepts. Getting them in batches of { batch_size :d} ...' )
246244 concept_serializer_class = get_class ('core.concepts.serializers.ConceptVersionExportSerializer' )
245+ written_concepts = False
247246 start = 0
248- end = batch_size
249- batch_queryset = concepts_qs .order_by ('-concept_id' )[start :end ]
250-
251- while batch_queryset .exists ():
252- logger .info (f'Serializing concepts { start + 1 :d} - { end :d} ...' )
247+ while True :
248+ batch_ids = list (
249+ concepts_qs .order_by ('-concept_id' )[start :start + batch_size ].values_list ('concept_id' , flat = True )
250+ )
251+ if not batch_ids :
252+ break
253+ logger .info (f'Serializing concepts { start + 1 :d} - { start + len (batch_ids ):d} ...' )
253254 queryset = Concept .objects .filter (
254- id__in = batch_queryset .values_list ('concept_id' )).filter (** filters ).order_by ('-id' )
255- if queryset .exists ():
256- if start > 0 :
257- with open ('export.json' , 'a' ) as out :
258- out .write (', ' )
259- concept_versions = queryset .prefetch_related ('names' , 'descriptions' )
255+ id__in = batch_ids ).filter (** filters ).prefetch_related ('names' , 'descriptions' ).order_by ('-id' )
256+ concept_versions = list (queryset )
257+ if concept_versions :
258+ if written_concepts :
259+ out .write (', ' )
260260 data = concept_serializer_class (concept_versions , many = True ).data
261261 concept_string = json .dumps (data , cls = encoders .JSONEncoder )
262- concept_string = concept_string [1 :- 1 ]
263-
264- with open ('export.json' , 'a' ) as out :
265- out .write (concept_string )
266-
262+ out .write (concept_string [1 :- 1 ])
263+ written_concepts = True
267264 start += batch_size
268- end += batch_size
269- batch_queryset = concepts_qs .order_by ('-concept_id' )[start :end ]
270265
271- logger .info ('Done serializing concepts.' )
266+ if written_concepts :
267+ logger .info ('Done serializing concepts.' )
272268
273- if is_collection :
274- references_qs = version .references
275- total_references = references_qs .count ()
269+ if is_collection :
270+ references_qs = version .references
271+ total_references = references_qs .count ()
276272
277- with open ('export.json' , 'a' ) as out :
278273 out .write ('], "references": [' )
279- if total_references :
280- logger .info (
281- f'{ resource_name } has { total_references :d} references. Getting them in batches of { batch_size :d } ... '
282- )
283- reference_serializer_class = get_class ( 'core.collections.serializers.CollectionReferenceDetailSerializer' )
284- for start in range ( 0 , total_references , batch_size ):
285- end = min ( start + batch_size , total_references )
286- logger . info ( f'Serializing references { start + 1 :d } - { end :d } ...' )
287- references = references_qs . order_by ( '-id' ). filter ()[ start : end ]
288- reference_serializer = reference_serializer_class ( references , many = True )
289- reference_string = json . dumps ( reference_serializer . data , cls = encoders . JSONEncoder )
290- reference_string = reference_string [ 1 : - 1 ]
291- with open ( 'export.json' , 'a' ) as out :
292- out .write (reference_string )
293- if end != total_references :
274+ if total_references :
275+ logger .info (
276+ f'{ resource_name } has { total_references :d} references. '
277+ f'Getting them in batches of { batch_size :d } ...'
278+ )
279+ reference_serializer_class = get_class (
280+ 'core.collections.serializers.CollectionReferenceDetailSerializer' )
281+ for ref_start in range ( 0 , total_references , batch_size ):
282+ ref_end = min ( ref_start + batch_size , total_references )
283+ logger . info ( f'Serializing references { ref_start + 1 :d } - { ref_end :d } ...' )
284+ references = references_qs . order_by ( '-id' ). filter ()[ ref_start : ref_end ]
285+ reference_serializer = reference_serializer_class ( references , many = True )
286+ reference_string = json . dumps ( reference_serializer . data , cls = encoders . JSONEncoder )
287+ out .write (reference_string [ 1 : - 1 ] )
288+ if ref_end != total_references :
294289 out .write (', ' )
295- logger .info ('Done serializing references.' )
290+ logger .info ('Done serializing references.' )
296291
297- with open ('export.json' , 'a' ) as out :
298292 out .write ('], "mappings": [' )
299293
300- if mappings_qs .exists ():
301- logger .info (f'{ resource_name } has mappings. Getting them in batches of { batch_size :d} ...' )
302294 mapping_serializer_class = get_class ('core.mappings.serializers.MappingDetailSerializer' )
295+ written_mappings = False
303296 start = 0
304- end = batch_size
305- batch_queryset = mappings_qs .order_by ('-mapping_id' )[start :end ]
306-
307- while batch_queryset .exists ():
308- logger .info (f'Serializing mappings { start + 1 :d} - { start + batch_size :d} ...' )
297+ while True :
298+ batch_ids = list (
299+ mappings_qs .order_by ('-mapping_id' )[start :start + batch_size ].values_list ('mapping_id' , flat = True )
300+ )
301+ if not batch_ids :
302+ break
303+ logger .info (f'Serializing mappings { start + 1 :d} - { start + len (batch_ids ):d} ...' )
309304 queryset = Mapping .objects .filter (
310- id__in = batch_queryset . values_list ( 'mapping_id' )) .filter (** filters ).order_by ( '-id' )
311- if queryset . exists ():
312- if start > 0 :
313- with open ( 'export.json' , 'a' ) as out :
314- out . write ( ', ' )
315-
316- data = mapping_serializer_class (queryset , many = True ).data
305+ id__in = batch_ids ) .filter (** filters ).prefetch_related (
306+ 'from_concept' , 'to_concept' , 'from_source' , 'to_source' ). order_by ( '-id' )
307+ mapping_versions = list ( queryset )
308+ if mapping_versions :
309+ if written_mappings :
310+ out . write ( ', ' )
311+ data = mapping_serializer_class (mapping_versions , many = True ).data
317312 mapping_string = json .dumps (data , cls = encoders .JSONEncoder )
318- mapping_string = mapping_string [1 :- 1 ]
319- with open ('export.json' , 'a' ) as out :
320- out .write (mapping_string )
321-
313+ out .write (mapping_string [1 :- 1 ])
314+ written_mappings = True
322315 start += batch_size
323- end += batch_size
324- batch_queryset = mappings_qs .order_by ('-mapping_id' )[start :end ]
325316
326- logger .info ('Done serializing mappings.' )
317+ if written_mappings :
318+ logger .info ('Done serializing mappings.' )
327319
328- end_time = str (round ((time .time () - start_time ) + 2 , 2 ))
329- with open ('export.json' , 'a' ) as out :
320+ end_time = str (round ((time .time () - start_time ) + 2 , 2 ))
330321 out .write ('], "export_time": ' + json .dumps (f"{ end_time } secs" , cls = encoders .JSONEncoder ) + '}' )
331322
332323 version .update_extras ('__export_time' , end_time )
0 commit comments