|
1 | 1 | import os |
2 | 2 | import pandas as pd |
| 3 | +from multiprocessing import Pool, cpu_count |
| 4 | +from functools import partial |
3 | 5 |
|
4 | 6 | from pat2vec.util.filter_methods import filter_dataframe_by_fuzzy_terms |
5 | 7 | from pat2vec.util.filter_methods import ( |
|
9 | 11 | filter_dataframe_by_fuzzy_terms, |
10 | 12 | ) |
11 | 13 |
|
12 | | -import pandas as pd |
13 | | -import os |
14 | | - |
15 | 14 |
|
16 | 15 | def verify_split_data_concatenated(original_df, client_idcode_column, save_folder): |
17 | 16 | """ |
@@ -79,12 +78,6 @@ def verify_split_data_individual(original_df, client_idcode_column, save_folder) |
79 | 78 | print("Verification successful: All CSVs match the original DataFrame.") |
80 | 79 |
|
81 | 80 |
|
82 | | -import pandas as pd |
83 | | -import os |
84 | | -from multiprocessing import Pool, cpu_count |
85 | | -from functools import partial |
86 | | - |
87 | | - |
88 | 81 | def save_group(client_idcode_group, save_folder): |
89 | 82 | """Helper function to save a single group to CSV.""" |
90 | 83 | client_idcode, group = client_idcode_group |
@@ -129,7 +122,7 @@ def split_and_save_csv(df, client_idcode_column, save_folder, num_processes=None |
129 | 122 | # split_and_save_csv(df, 'client_idcode', 'client_data', num_processes=4) |
130 | 123 |
|
131 | 124 |
|
132 | | -def get_pat_batch_bloods( |
| 125 | +def get_merged_pat_batch_bloods( |
133 | 126 | client_idcode_list, |
134 | 127 | search_term, |
135 | 128 | config_obj=None, |
@@ -177,7 +170,10 @@ def get_pat_batch_bloods( |
177 | 170 | bloods_time_field = config_obj.bloods_time_field |
178 | 171 |
|
179 | 172 | # Define the output directory using config_obj.proj_name |
180 | | - input_directory = os.path.join(config_obj.proj_name, "merged_input_batches") |
| 173 | + input_directory = os.path.join(config_obj.proj_name, "merged_input_pat_batches") |
| 174 | + |
| 175 | + input_directory = config_obj.pre_merged_input_batches_path |
| 176 | + |
181 | 177 | os.makedirs(input_directory, exist_ok=True) # Ensure the directory exists |
182 | 178 |
|
183 | 179 | # Define the path for the merged batches output |
@@ -236,7 +232,8 @@ def get_pat_batch_bloods( |
236 | 232 | # Save the merged DataFrame to the dynamically constructed directory |
237 | 233 | if store_pat_batch_observations or overwrite_stored_pat_observations: |
238 | 234 | batch_target.to_csv(merged_batches_path, index=False) |
239 | | - print(f"Merged batches saved to {merged_batches_path}") |
| 235 | + if config_obj.verbosity >= 1: |
| 236 | + print(f"Merged batches saved to {merged_batches_path}") |
240 | 237 |
|
241 | 238 | return batch_target |
242 | 239 |
|
|
0 commit comments