Skip to content

Commit d57887b

Browse files
Merge pull request #93 from pitangainnovare/better-tasks
Melhora tasks de validação e relatório de logs
2 parents 98f098a + 7b35e49 commit d57887b

8 files changed

Lines changed: 121 additions & 43 deletions

File tree

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.7.6
1+
1.8.0

config/settings/production.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
# Admin URL regex.
108108
DJANGO_ADMIN_URL = env("DJANGO_ADMIN_URL")
109109
# https://docs.wagtail.org/en/stable/reference/settings.html#wagtailadmin-base-url
110-
WAGTAILADMIN_BASE_URL = env("WAGTAIL_ADMIN_URL")
110+
WAGTAILADMIN_BASE_URL = env("WAGTAILADMIN_BASE_URL")
111111

112112
# Anymail
113113
# ------------------------------------------------------------------------------

core/utils/date_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def get_date_obj(date_str: str, format: str = "%Y-%m-%d") -> datetime.date:
2929
try:
3030
return datetime.strptime(date_str, format).date()
3131
except (ValueError, TypeError):
32-
logging.error("Invalid date format. Use YYYY-MM-DD.")
32+
...
3333

3434

3535
def get_date_range_str(from_date_str: str = None, until_date_str: str = None, days_to_go_back: int = None) -> tuple[str, str]:

log_manager/choices.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
LOG_FILE_STATUS_PARSING = 'PAR'
77
LOG_FILE_STATUS_PROCESSED = 'PRO'
88
LOG_FILE_STATUS_INVALIDATED = 'INV'
9+
LOG_FILE_STATUS_IGNORED = 'IGN'
910

1011
LOG_FILE_STATUS = [
1112
(LOG_FILE_STATUS_CREATED, _("Created")),
1213
(LOG_FILE_STATUS_QUEUED, _("Queued")),
1314
(LOG_FILE_STATUS_PARSING, _("Parsing")),
1415
(LOG_FILE_STATUS_PROCESSED, _("Processed")),
1516
(LOG_FILE_STATUS_INVALIDATED, _("Invalidated")),
17+
(LOG_FILE_STATUS_IGNORED, _("Ignored")),
1618
]
1719

1820

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Generated by Django 5.0.7 on 2025-05-24 15:54
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
("log_manager", "0003_collectionlogfiledatecount_is_usage_metric_computed"),
9+
]
10+
11+
operations = [
12+
migrations.AddField(
13+
model_name="logfile",
14+
name="validation",
15+
field=models.JSONField(
16+
blank=True, default=dict, null=True, verbose_name="Validation"
17+
),
18+
),
19+
]

log_manager/models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,13 @@ class LogFile(CommonControlField):
204204
null=False,
205205
)
206206

207+
validation = models.JSONField(
208+
_("Validation"),
209+
null=True,
210+
blank=True,
211+
default=dict,
212+
)
213+
207214
collection = models.ForeignKey(
208215
Collection,
209216
verbose_name=_("Collection"),
@@ -217,6 +224,7 @@ class LogFile(CommonControlField):
217224
FieldPanel('path'),
218225
FieldPanel('stat_result'),
219226
FieldPanel('status'),
227+
FieldPanel('validation'),
220228
AutocompletePanel('collection'),
221229
]
222230

log_manager/tasks.py

Lines changed: 88 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import logging
2+
import json
23
import os
34

45
from django.conf import settings
6+
from django.core.serializers.json import DjangoJSONEncoder
57
from django.core.mail import send_mail
68
from django.contrib.auth import get_user_model
79
from django.utils.translation import gettext as _
@@ -20,6 +22,8 @@
2022
)
2123

2224

25+
LOGFILE_STAT_RESULT_CTIME_INDEX = 9
26+
2327
User = get_user_model()
2428

2529

@@ -83,37 +87,60 @@ def _add_log_file(user, collection, root, name, visible_dates):
8387

8488

8589
@celery_app.task(bind=True, name=_('Validate log files'), timelimit=-1)
86-
def task_validate_log_files(self, collections=[], user_id=None, username=None):
90+
def task_validate_log_files(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None, ignore_date=False):
8791
"""
8892
Task to validate log files in the database.
8993
9094
Parameters:
9195
collections (list, optional): List of collection acronyms. Defaults to [].
96+
from_date (str, optional): The start date for log discovery in YYYY-MM-DD format. Defaults to None.
97+
until_date (str, optional): The end date for log discovery in YYYY-MM-DD format. Defaults to None.
98+
days_to_go_back (int, optional): The number of days to go back from today for log discovery. Defaults to None.
9299
user_id (int, optional): The ID of the user initiating the task. Defaults to None.
93100
username (str, optional): The username of the user initiating the task. Defaults to None.
101+
ignore_date (bool, optional): If True, ignore the date of the log file. Defaults to False.
94102
"""
95103
user = _get_user(self.request, username=username, user_id=user_id)
96104

97-
for col in collections or Collection.acron3_list():
98-
for log_file in models.LogFile.objects.filter(status=choices.LOG_FILE_STATUS_CREATED, collection__acron3=col):
99-
logging.info(f'Validating log file {log_file.path} for collection {log_file.collection.acron3}.')
105+
logging.info(f'Validating log files for collections: {collections}.')
100106

101-
buffer_size, sample_size = _fetch_validation_parameters(col)
102-
103-
val_results = utils.validate_file(path=log_file.path, buffer_size=buffer_size, sample_size=sample_size)
104-
if val_results.get('is_valid', {}).get('all', False):
105-
models.LogFileDate.create_or_update(
106-
user=user,
107-
log_file=log_file,
108-
date=val_results.get('probably_date', ''),
109-
)
110-
log_file.status = choices.LOG_FILE_STATUS_QUEUED
107+
visible_dates = _get_visible_dates(from_date, until_date, days_to_go_back)
111108

112-
else:
113-
log_file.status = choices.LOG_FILE_STATUS_INVALIDATED
109+
if not ignore_date:
110+
logging.info(f'Interval: {visible_dates[0]} to {visible_dates[-1]}.')
114111

115-
logging.info(f'Log file {log_file.path} ({log_file.collection.acron3}) has status {log_file.status}.')
116-
log_file.save()
112+
for col in collections or Collection.acron3_list():
113+
for log_file in models.LogFile.objects.filter(status=choices.LOG_FILE_STATUS_CREATED, collection__acron3=col):
114+
file_ctime = date_utils.get_date_obj_from_timestamp(log_file.stat_result[LOGFILE_STAT_RESULT_CTIME_INDEX])
115+
if file_ctime in visible_dates or ignore_date:
116+
logging.info(f'Validating log file {log_file.path} for collection {log_file.collection.acron3}.')
117+
118+
buffer_size, sample_size = _fetch_validation_parameters(col)
119+
120+
val_result = utils.validate_file(path=log_file.path, buffer_size=buffer_size, sample_size=sample_size)
121+
if 'datetimes' in val_result.get('content', {}).get('summary', {}):
122+
del val_result['content']['summary']['datetimes']
123+
124+
try:
125+
log_file.validation['result'] = json.dumps(val_result, cls=DjangoJSONEncoder) if val_result else {}
126+
log_file.validation['parameters'] = {'buffer_size': buffer_size, 'sample_size': sample_size}
127+
except json.JSONDecodeError as e:
128+
logging.error(f'Error serializing validation result: {e}')
129+
log_file.validation = {}
130+
131+
if val_result.get('is_valid', {}).get('all', False):
132+
models.LogFileDate.create_or_update(
133+
user=user,
134+
log_file=log_file,
135+
date=val_result.get('probably_date', ''),
136+
)
137+
log_file.status = choices.LOG_FILE_STATUS_QUEUED
138+
139+
else:
140+
log_file.status = choices.LOG_FILE_STATUS_INVALIDATED
141+
142+
logging.info(f'Log file {log_file.path} ({log_file.collection.acron3}) has status {log_file.status}.')
143+
log_file.save()
117144

118145

119146
def _fetch_validation_parameters(collection, default_buffer_size=0.1, default_sample_size=2048):
@@ -170,30 +197,51 @@ def _check_missing_logs_for_date(user, collection, date):
170197

171198

172199
@celery_app.task(bind=True, name=_('Generate log files count report'))
173-
def task_log_files_count_status_report(self, collection, user_id=None, username=None):
174-
col = models.Collection.objects.get(acron3=collection)
175-
subject = _(f'Log Files Report for {col.main_name}')
176-
177-
message = _(f'Dear collection {col.main_name},\n\nThis message is to inform you of the results of the Usage Log Validation service. Here are the results:\n\n')
178-
179-
missing = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES)
180-
extra = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES)
181-
ok = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK)
182-
183-
if missing.count() > 0:
184-
message += _(f'There are {missing.count()} missing log files.\n')
185-
if extra.count() > 0:
186-
message += _(f'There are {extra.count()} extra log files.\n')
187-
if ok.count() > 0:
188-
message += _(f'There are {ok.count()} dates with correct log files.\n')
189-
190-
if missing.count() > 0 or extra.count() > 0:
191-
message += _(f'\nPlease check the script that shares the logs.\n')
200+
def task_log_files_count_status_report(self, collections=[], from_date=None, until_date=None, user_id=None, username=None):
201+
from_date, until_date = date_utils.get_date_range_str(from_date, until_date)
202+
possible_dates_n = len(date_utils.get_date_objs_from_date_range(from_date, until_date))
203+
204+
from_date_obj = date_utils.get_date_obj(from_date)
205+
until_date_obj = date_utils.get_date_obj(until_date)
206+
207+
for collection in collections or Collection.acron3_list():
208+
col = models.Collection.objects.get(acron3=collection)
209+
subject = _(f'Usage Log Validation Results ({from_date} to {until_date})')
210+
message = _(f'This message provides the results of the Usage Log Validation for the period {from_date} to {until_date}:\n\n')
192211

193-
message += _(f'\nYou can view the complete report results at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.')
212+
missing = models.CollectionLogFileDateCount.objects.filter(
213+
collection__acron3=collection,
214+
status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES,
215+
date__gte=from_date_obj,
216+
date__lte=until_date_obj,
217+
)
218+
extra = models.CollectionLogFileDateCount.objects.filter(
219+
collection__acron3=collection,
220+
status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES,
221+
date__gte=from_date_obj,
222+
date__lte=until_date_obj,
223+
)
224+
ok = models.CollectionLogFileDateCount.objects.filter(
225+
collection__acron3=collection,
226+
status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK,
227+
date__gte=from_date_obj,
228+
date__lte=until_date_obj,
229+
)
230+
231+
if missing.count() > 0:
232+
message += _(f'- There are {missing.count()} missing log files.\n')
233+
if extra.count() > 0:
234+
message += _(f'- There are {extra.count()} extra log files.\n')
235+
if ok.count() > 0:
236+
message += _(f'- There are {ok.count()} dates with correct log files.\n')
237+
238+
if missing.count() > 0 or extra.count() > 0:
239+
message += _(f'\nPlease review the script responsible for sharing the log files.\n')
240+
241+
message += _(f'\nYou can view the full report at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.')
194242

195-
logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}')
196-
_send_message(subject, message, collection)
243+
logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}')
244+
_send_message(subject, message, collection)
197245

198246

199247
def _send_message(subject, message, collection):

log_manager/wagtail_hooks.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class LogFileSnippetViewSet(SnippetViewSet):
6161
"stat_result",
6262
"collection",
6363
"status",
64+
"validation",
6465
"hash"
6566
)
6667
list_filter = ("status", "collection")

0 commit comments

Comments
 (0)