|
15 | 15 | import time |
16 | 16 | import random |
17 | 17 | import math |
| 18 | +from RucioClient import RucioClient |
18 | 19 | from McMClient import McMClient |
19 | 20 | from JIRAClient import JIRAClient |
20 | 21 | from htmlor import htmlor |
@@ -1012,10 +1013,43 @@ def upward( ns ): |
1012 | 1013 |
|
1013 | 1014 | time_point("checked custodiality", sub_lap=True) |
1014 | 1015 |
|
1015 | | - ## presence in phedex |
| 1016 | + ## presence in phedex and/or rucio |
1016 | 1017 | phedex_presence ={} |
| 1018 | + rucioClient = RucioClient() |
1017 | 1019 | for output in wfi.request['OutputDatasets']: |
1018 | | - phedex_presence[output] = phedexClient.getFileCountDataset(url, output ) |
| 1020 | + _,dsn,process_string,tier = output.split('/') |
| 1021 | + if tier in set(UC.get('tiers_to_rucio_relval')) | set(UC.get('tiers_to_rucio_nonrelval')): |
| 1022 | + # - creates lists of tuples ot the type: ('blockName', numFiles) |
| 1023 | + # for all blockNames per Dataset known to both Phedex and Rucio |
| 1024 | + # - creates the union of the two sets in order to avoid any duplicates |
| 1025 | + # (files present in both systems) |
| 1026 | + # - sums the number of files for the union set |
| 1027 | + # - assigns the value to 'phedex_presence' even though the full sum |
| 1028 | + # of the files is present in both systems - this way we avoid |
| 1029 | + # changing the code for the rest of the consistency checks |
| 1030 | + phedex_filecount_pb = phedexClient.getFileCountPerBlock(url, output) |
| 1031 | + rucio_filecount_pb = rucioClient.getFileCountPerBlock(output) |
| 1032 | + all_filecount_pb = set(phedex_filecount_pb) | set(rucio_filecount_pb) |
| 1033 | + all_blocks = set(map(lambda x: x[0], phedex_filecount_pb)) | set(map(lambda x: x[0], rucio_filecount_pb)) |
| 1034 | + |
| 1035 | + # bellow we will misscount in case there are same blocks in both |
| 1036 | + # Rucio and Phedex but with different number of files in the two |
| 1037 | + # systems - they will enter the sum twice, because the two tuples |
| 1038 | + # will be concidered as two different blocks from the two subsets |
| 1039 | + # hence the following check: |
| 1040 | + if len(all_blocks) == len(all_filecount_pb): |
| 1041 | + phedex_presence[output] = sum(map(lambda x: x[1], all_filecount_pb)) |
| 1042 | + else: |
| 1043 | + # TODO: to check if we need to rise a higher level of alarm here. |
| 1044 | + msg = "There are inconsistences of number of files per block" |
| 1045 | + msg += "between Phedex and Rucio for dataset: {}".format(output) |
| 1046 | + wfi.sendLog('checkor', msg) |
| 1047 | + phedex_presence[output] = 0 |
| 1048 | + # we do not announce this output untill the discrepancy from above is resolved |
| 1049 | + del(all_filecount_pb) |
| 1050 | + del(all_blocks) |
| 1051 | + else: |
| 1052 | + phedex_presence[output] = phedexClient.getFileCountDataset(url, output) |
1019 | 1053 |
|
1020 | 1054 | one_output_not_in_phedex = any([Nfiles==0 for Nfiles in phedex_presence.values()]) |
1021 | 1055 | if one_output_not_in_phedex and 'announce' in assistance_tags: |
@@ -1055,7 +1089,16 @@ def upward( ns ): |
1055 | 1089 | assistance_tags.add('filemismatch') |
1056 | 1090 | #print this for show and tell if no recovery on-going |
1057 | 1091 | for out in dbs_presence: |
1058 | | - _,_,missing_phedex,missing_dbs = getDatasetFiles(url, out) |
| 1092 | + dbs_filenames,phedex_filenames,missing_phedex,missing_dbs = getDatasetFiles(url, out) |
| 1093 | + |
| 1094 | + # Corrections to the lists of files present in Phedex for the data Tiers managed by Rucio |
| 1095 | + _,dsn,process_string,tier = output.split('/') |
| 1096 | + if tier in set(UC.get('tiers_to_rucio_relval')) | set(UC.get('tiers_to_rucio_nonrelval')): |
| 1097 | + # Here recalculating the filenames as a union of the phedex_files | rucio_files |
| 1098 | + all_filenames = set(phedex_filenames) | set(rucioClient.getFileNamesDataset(out)) |
| 1099 | + missing_phedex = list(set(dbs_filenames) - all_filenames) |
| 1100 | + missing_dbs = list(all_filenames - set(dbs_filenames)) |
| 1101 | + |
1059 | 1102 | if missing_phedex: |
1060 | 1103 | wfi.sendLog('checkor',"These %d files are missing in phedex, or extra in dbs, showing %s only\n%s"%(len(missing_phedex),show_N_only, |
1061 | 1104 | "\n".join( missing_phedex[:show_N_only] ))) |
|
0 commit comments