|
1 | 1 | import logging |
| 2 | +import json |
2 | 3 | import os |
3 | 4 |
|
4 | 5 | from django.conf import settings |
| 6 | +from django.core.serializers.json import DjangoJSONEncoder |
5 | 7 | from django.core.mail import send_mail |
6 | 8 | from django.contrib.auth import get_user_model |
7 | 9 | from django.utils.translation import gettext as _ |
|
20 | 22 | ) |
21 | 23 |
|
22 | 24 |
|
| 25 | +LOGFILE_STAT_RESULT_CTIME_INDEX = 9 |
| 26 | + |
23 | 27 | User = get_user_model() |
24 | 28 |
|
25 | 29 |
|
@@ -83,37 +87,60 @@ def _add_log_file(user, collection, root, name, visible_dates): |
83 | 87 |
|
84 | 88 |
|
85 | 89 | @celery_app.task(bind=True, name=_('Validate log files'), timelimit=-1) |
86 | | -def task_validate_log_files(self, collections=[], user_id=None, username=None): |
| 90 | +def task_validate_log_files(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None, ignore_date=False): |
87 | 91 | """ |
88 | 92 | Task to validate log files in the database. |
89 | 93 |
|
90 | 94 | Parameters: |
91 | 95 | collections (list, optional): List of collection acronyms. Defaults to []. |
| 96 | + from_date (str, optional): The start date for log discovery in YYYY-MM-DD format. Defaults to None. |
| 97 | + until_date (str, optional): The end date for log discovery in YYYY-MM-DD format. Defaults to None. |
| 98 | + days_to_go_back (int, optional): The number of days to go back from today for log discovery. Defaults to None. |
92 | 99 | user_id (int, optional): The ID of the user initiating the task. Defaults to None. |
93 | 100 | username (str, optional): The username of the user initiating the task. Defaults to None. |
| 101 | + ignore_date (bool, optional): If True, ignore the date of the log file. Defaults to False. |
94 | 102 | """ |
95 | 103 | user = _get_user(self.request, username=username, user_id=user_id) |
96 | 104 |
|
97 | | - for col in collections or Collection.acron3_list(): |
98 | | - for log_file in models.LogFile.objects.filter(status=choices.LOG_FILE_STATUS_CREATED, collection__acron3=col): |
99 | | - logging.info(f'Validating log file {log_file.path} for collection {log_file.collection.acron3}.') |
| 105 | + logging.info(f'Validating log files for collections: {collections}.') |
100 | 106 |
|
101 | | - buffer_size, sample_size = _fetch_validation_parameters(col) |
102 | | - |
103 | | - val_results = utils.validate_file(path=log_file.path, buffer_size=buffer_size, sample_size=sample_size) |
104 | | - if val_results.get('is_valid', {}).get('all', False): |
105 | | - models.LogFileDate.create_or_update( |
106 | | - user=user, |
107 | | - log_file=log_file, |
108 | | - date=val_results.get('probably_date', ''), |
109 | | - ) |
110 | | - log_file.status = choices.LOG_FILE_STATUS_QUEUED |
| 107 | + visible_dates = _get_visible_dates(from_date, until_date, days_to_go_back) |
111 | 108 |
|
112 | | - else: |
113 | | - log_file.status = choices.LOG_FILE_STATUS_INVALIDATED |
| 109 | + if not ignore_date: |
| 110 | + logging.info(f'Interval: {visible_dates[0]} to {visible_dates[-1]}.') |
114 | 111 |
|
115 | | - logging.info(f'Log file {log_file.path} ({log_file.collection.acron3}) has status {log_file.status}.') |
116 | | - log_file.save() |
| 112 | + for col in collections or Collection.acron3_list(): |
| 113 | + for log_file in models.LogFile.objects.filter(status=choices.LOG_FILE_STATUS_CREATED, collection__acron3=col): |
| 114 | + file_ctime = date_utils.get_date_obj_from_timestamp(log_file.stat_result[LOGFILE_STAT_RESULT_CTIME_INDEX]) |
| 115 | + if file_ctime in visible_dates or ignore_date: |
| 116 | + logging.info(f'Validating log file {log_file.path} for collection {log_file.collection.acron3}.') |
| 117 | + |
| 118 | + buffer_size, sample_size = _fetch_validation_parameters(col) |
| 119 | + |
| 120 | + val_result = utils.validate_file(path=log_file.path, buffer_size=buffer_size, sample_size=sample_size) |
| 121 | + if 'datetimes' in val_result.get('content', {}).get('summary', {}): |
| 122 | + del val_result['content']['summary']['datetimes'] |
| 123 | + |
| 124 | + try: |
| 125 | + log_file.validation['result'] = json.dumps(val_result, cls=DjangoJSONEncoder) if val_result else {} |
| 126 | + log_file.validation['parameters'] = {'buffer_size': buffer_size, 'sample_size': sample_size} |
| 127 | + except json.JSONDecodeError as e: |
| 128 | + logging.error(f'Error serializing validation result: {e}') |
| 129 | + log_file.validation = {} |
| 130 | + |
| 131 | + if val_result.get('is_valid', {}).get('all', False): |
| 132 | + models.LogFileDate.create_or_update( |
| 133 | + user=user, |
| 134 | + log_file=log_file, |
| 135 | + date=val_result.get('probably_date', ''), |
| 136 | + ) |
| 137 | + log_file.status = choices.LOG_FILE_STATUS_QUEUED |
| 138 | + |
| 139 | + else: |
| 140 | + log_file.status = choices.LOG_FILE_STATUS_INVALIDATED |
| 141 | + |
| 142 | + logging.info(f'Log file {log_file.path} ({log_file.collection.acron3}) has status {log_file.status}.') |
| 143 | + log_file.save() |
117 | 144 |
|
118 | 145 |
|
119 | 146 | def _fetch_validation_parameters(collection, default_buffer_size=0.1, default_sample_size=2048): |
@@ -170,30 +197,51 @@ def _check_missing_logs_for_date(user, collection, date): |
170 | 197 |
|
171 | 198 |
|
172 | 199 | @celery_app.task(bind=True, name=_('Generate log files count report')) |
173 | | -def task_log_files_count_status_report(self, collection, user_id=None, username=None): |
174 | | - col = models.Collection.objects.get(acron3=collection) |
175 | | - subject = _(f'Log Files Report for {col.main_name}') |
176 | | - |
177 | | - message = _(f'Dear collection {col.main_name},\n\nThis message is to inform you of the results of the Usage Log Validation service. Here are the results:\n\n') |
178 | | - |
179 | | - missing = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES) |
180 | | - extra = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES) |
181 | | - ok = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK) |
182 | | - |
183 | | - if missing.count() > 0: |
184 | | - message += _(f'There are {missing.count()} missing log files.\n') |
185 | | - if extra.count() > 0: |
186 | | - message += _(f'There are {extra.count()} extra log files.\n') |
187 | | - if ok.count() > 0: |
188 | | - message += _(f'There are {ok.count()} dates with correct log files.\n') |
189 | | - |
190 | | - if missing.count() > 0 or extra.count() > 0: |
191 | | - message += _(f'\nPlease check the script that shares the logs.\n') |
| 200 | +def task_log_files_count_status_report(self, collections=[], from_date=None, until_date=None, user_id=None, username=None): |
| 201 | + from_date, until_date = date_utils.get_date_range_str(from_date, until_date) |
| 202 | + possible_dates_n = len(date_utils.get_date_objs_from_date_range(from_date, until_date)) |
| 203 | + |
| 204 | + from_date_obj = date_utils.get_date_obj(from_date) |
| 205 | + until_date_obj = date_utils.get_date_obj(until_date) |
| 206 | + |
| 207 | + for collection in collections or Collection.acron3_list(): |
| 208 | + col = models.Collection.objects.get(acron3=collection) |
| 209 | + subject = _(f'Usage Log Validation Results ({from_date} to {until_date})') |
| 210 | + message = _(f'This message provides the results of the Usage Log Validation for the period {from_date} to {until_date}:\n\n') |
192 | 211 |
|
193 | | - message += _(f'\nYou can view the complete report results at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.') |
| 212 | + missing = models.CollectionLogFileDateCount.objects.filter( |
| 213 | + collection__acron3=collection, |
| 214 | + status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES, |
| 215 | + date__gte=from_date_obj, |
| 216 | + date__lte=until_date_obj, |
| 217 | + ) |
| 218 | + extra = models.CollectionLogFileDateCount.objects.filter( |
| 219 | + collection__acron3=collection, |
| 220 | + status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES, |
| 221 | + date__gte=from_date_obj, |
| 222 | + date__lte=until_date_obj, |
| 223 | + ) |
| 224 | + ok = models.CollectionLogFileDateCount.objects.filter( |
| 225 | + collection__acron3=collection, |
| 226 | + status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK, |
| 227 | + date__gte=from_date_obj, |
| 228 | + date__lte=until_date_obj, |
| 229 | + ) |
| 230 | + |
| 231 | + if missing.count() > 0: |
| 232 | + message += _(f'- There are {missing.count()} missing log files.\n') |
| 233 | + if extra.count() > 0: |
| 234 | + message += _(f'- There are {extra.count()} extra log files.\n') |
| 235 | + if ok.count() > 0: |
| 236 | + message += _(f'- There are {ok.count()} dates with correct log files.\n') |
| 237 | + |
| 238 | + if missing.count() > 0 or extra.count() > 0: |
| 239 | + message += _(f'\nPlease review the script responsible for sharing the log files.\n') |
| 240 | + |
| 241 | + message += _(f'\nYou can view the full report at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.') |
194 | 242 |
|
195 | | - logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}') |
196 | | - _send_message(subject, message, collection) |
| 243 | + logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}') |
| 244 | + _send_message(subject, message, collection) |
197 | 245 |
|
198 | 246 |
|
199 | 247 | def _send_message(subject, message, collection): |
|
0 commit comments