@@ -422,22 +422,19 @@ def _get_import_input_files(self, import_input, absolute_import_dir):
422422 for pattern in patterns :
423423 if pattern :
424424 files = glob .glob (os .path .join (absolute_import_dir ,
425- pattern ))
425+ pattern ),
426+ recursive = True )
426427 if not files and not glob .has_magic (pattern ):
427428 errors .append (
428429 f'No matching files for { file_type } :{ pattern } ' )
429430 else :
430431 input_files .extend (sorted (files ))
431- import_prefix = ''
432- if input_files :
433- import_prefix = os .path .splitext (os .path .basename (
434- input_files [0 ]))[0 ]
435432 if errors :
436433 logging .fatal (
437434 f'Missing import files in { absolute_import_dir } : { errors } ' )
438435 raise RuntimeError (
439436 'Import job failed due to missing user script output files.' )
440- return input_files , import_prefix
437+ return input_files
441438
442439 @log_function_call
443440 def _invoke_import_tool (self , absolute_import_dir : str ,
@@ -453,17 +450,12 @@ def _invoke_import_tool(self, absolute_import_dir: str,
453450 import_stage = ImportStage .GENMCF
454451 import_name = import_spec ['import_name' ]
455452 import_inputs = import_spec .get ('import_inputs' , [])
456- import_prefix_list = []
457453 input_index = - 1
458454 for import_input in import_inputs :
459455 input_index += 1
460- input_files , import_prefix = self ._get_import_input_files (
461- import_input , absolute_import_dir )
462- import_prefix_list .append (import_prefix )
463- if not import_prefix :
464- logging .error (
465- 'Skipping genmcf due to missing import input spec.' )
466- continue
456+ input_files = self ._get_import_input_files (import_input ,
457+ absolute_import_dir )
458+ import_prefix = f'input{ input_index } '
467459 output_path = os .path .join (absolute_import_dir , import_name ,
468460 version , import_prefix , 'genmcf' )
469461
@@ -521,7 +513,6 @@ def _invoke_import_tool(self, absolute_import_dir: str,
521513 import_name , import_stage , ImportStatus .SUCCESS ,
522514 import_summary .import_stats .get ('genmcf_execution_time' , 0 ),
523515 import_summary .import_stats .get ('mcf_data_size' , 0 ))
524- return import_prefix_list
525516
526517 def _get_validation_config_file (self , repo_dir : str ,
527518 absolute_import_dir : str , import_spec : dict ,
@@ -559,7 +550,7 @@ def _get_validation_config_file(self, repo_dir: str,
559550 @log_function_call
560551 def _invoke_import_validation (self , repo_dir : str , relative_import_dir : str ,
561552 absolute_import_dir : str , import_spec : dict ,
562- version : str , import_prefix_list : list ,
553+ version : str ,
563554 import_summary : ImportStatusSummary ) -> bool :
564555 """
565556 Performs validations on import data.
@@ -577,12 +568,11 @@ def _invoke_import_validation(self, repo_dir: str, relative_import_dir: str,
577568 differ_job_name = 'differ'
578569
579570 # Trigger validations for each tmcf/csv under import_inputs.
571+ import_inputs = import_spec .get ('import_inputs' , [])
580572 input_index = - 1
581- for import_prefix in import_prefix_list :
573+ for import_input in import_inputs :
582574 input_index += 1
583- if not import_prefix :
584- logging .error ('Skipping validation due to missing import spec.' )
585- continue
575+ import_prefix = f'input{ input_index } '
586576
587577 genmcf_output_path = os .path .join (absolute_import_dir , import_name ,
588578 version , import_prefix , 'genmcf' )
@@ -593,17 +583,20 @@ def _invoke_import_validation(self, repo_dir: str, relative_import_dir: str,
593583 current_data_path = os .path .join (genmcf_output_path , '*.mcf' )
594584 previous_data_path = latest_version + f'/{ import_prefix } /genmcf/*.mcf'
595585 # TODO: remove fallback logic once all imports move to new path.
596- if latest_version and not file_util .file_get_matching (
597- previous_data_path ):
586+ if not file_util .file_get_matching (previous_data_path ):
587+ input_files = self ._get_import_input_files (
588+ import_input , absolute_import_dir )
589+ import_prefix = os .path .splitext (
590+ os .path .basename (input_files [0 ]))[0 ]
591+ previous_data_path = latest_version + f'/{ import_prefix } /genmcf/*.mcf'
592+ if not file_util .file_get_matching (previous_data_path ):
598593 previous_data_path = latest_version + f'/{ import_prefix } /validation/*.mcf'
599594 # END
600595 summary_stats = os .path .join (genmcf_output_path ,
601596 'summary_report.csv' )
602597 report_json = os .path .join (genmcf_output_path , 'report.json' )
603598 validation_output_file = os .path .join (validation_output_path ,
604599 'validation_output.csv' )
605- differ_output = os .path .join (validation_output_path ,
606- 'obs_diff_summary.csv' )
607600
608601 # Invoke differ and validation scripts.
609602 differ_output_file = ''
@@ -620,7 +613,7 @@ def _invoke_import_validation(self, repo_dir: str, relative_import_dir: str,
620613 job_name = differ_job_name ,
621614 file_format = 'mcf' ,
622615 runner_mode = 'local' )
623- differ .run_differ ()
616+ differ_summary = differ .run_differ ()
624617 log_metric (
625618 AUTO_IMPORT_JOB_STAGE , "INFO" ,
626619 f"Import: { import_name } , differ for { import_prefix } { latest_version } vs { version } " ,
@@ -633,6 +626,11 @@ def _invoke_import_validation(self, repo_dir: str, relative_import_dir: str,
633626 "current_version" : version
634627 })
635628 differ_output_file = validation_output_path
629+ if differ_summary .get ('obs_diff_size' ,
630+ '0' ) == 0 and differ_summary .get (
631+ 'schema_diff_size' , '0' ) == 0 :
632+ import_summary .status = ImportStatus .SKIP
633+ logging .info ("Marking import as SKIP due to empty diff." )
636634 else :
637635 differ_output_file = ''
638636 logging .error (
@@ -693,6 +691,13 @@ def _invoke_import_validation(self, repo_dir: str, relative_import_dir: str,
693691 import_summary .import_stats .get ('validation_execution_time' , 0 ),
694692 import_summary .import_stats .get ('validation_data_size' ,
695693 0 ), validation_message )
694+ if self .config .ignore_validation_status or validation_status :
695+ import_summary .status = ImportStatus .STAGING
696+ else :
697+ logging .error (
698+ "Marking import as VALIDATION due to validation failure." )
699+ import_summary .status = ImportStatus .VALIDATION
700+
696701 return validation_status
697702
698703 def _get_validation_message (
@@ -853,11 +858,13 @@ def _import_one_helper(
853858 repo_dir , 'import-automation' , 'executor' ,
854859 self .config .requirements_filename )
855860 timer = Timer ()
856- interpreter_path , process = _create_venv (
857- (central_requirements_path , requirements_path ),
858- tmpdir ,
859- timeout = self .config .venv_create_timeout ,
860- )
861+ interpreter_path = sys .executable
862+ process = subprocess .CompletedProcess (args = [], returncode = 0 )
863+ # interpreter_path, process = _create_venv(
864+ # [requirements_path],
865+ # tmpdir,
866+ # timeout=self.config.venv_create_timeout,
867+ # )
861868
862869 _log_process (process = process ,
863870 import_name = import_name ,
@@ -878,7 +885,7 @@ def _import_one_helper(
878885
879886 if self .config .invoke_import_tool :
880887 logging .info ("Invoking import tool genmcf" )
881- import_prefix_list = self ._invoke_import_tool (
888+ self ._invoke_import_tool (
882889 absolute_import_dir = absolute_import_dir ,
883890 relative_import_dir = relative_import_dir ,
884891 version = version ,
@@ -894,7 +901,6 @@ def _import_one_helper(
894901 absolute_import_dir = absolute_import_dir ,
895902 import_spec = import_spec ,
896903 version = version ,
897- import_prefix_list = import_prefix_list ,
898904 import_summary = import_summary )
899905 logging .info (
900906 f'Validations for version { version } completed with status: { validation_status } '
@@ -910,13 +916,6 @@ def _import_one_helper(
910916 import_summary .import_stats .get ('validation_data_size' , 0 ))
911917 logging .info (import_summary )
912918
913- if self .config .ignore_validation_status or validation_status :
914- import_summary .status = ImportStatus .STAGING
915- else :
916- logging .error (
917- "Staging latest version update due to validation failure." )
918- import_summary .status = ImportStatus .VALIDATION
919-
920919 self ._update_latest_version (version , output_dir , import_spec ,
921920 import_summary )
922921
@@ -970,7 +969,9 @@ def _upload_import_inputs(
970969 import_inputs = import_spec .get ('import_inputs' , [])
971970 errors = []
972971 data_size = 0
972+ input_index = - 1
973973 for import_input in import_inputs :
974+ input_index += 1
974975 for input_type in self .config .import_input_types :
975976 path = import_input .get (input_type )
976977 if not path :
@@ -984,13 +985,13 @@ def _upload_import_inputs(
984985 if import_files :
985986 for file in import_files :
986987 if file :
987- dest = f'{ output_dir } /{ version } /{ os .path .basename (file )} '
988+ dest = f'{ output_dir } /{ version } /input { input_index } / { os .path .basename (file )} '
988989 data_size += os .path .getsize (file )
989990 self ._upload_file_helper (
990991 src = file ,
991992 dest = dest ,
992993 )
993- uploaded_dest = f'{ output_dir } /{ version } /{ os .path .basename (path )} '
994+ uploaded_dest = f'{ output_dir } /{ version } /input { input_index } / { os .path .basename (path )} '
994995 setattr (uploaded , input_type , uploaded_dest )
995996 elif not glob .has_magic (path ):
996997 errors .append (
0 commit comments