Skip to content

Commit fcc5529

Browse files
Aprimora o carregamento de artigos a partir do pid provider e passa a registrar erros no registro do pid provider (#1422)
* refactor: antecipa validação de journal e issue em load_article Alterada a lógica de carregamento para que a existência de 'journal' e 'issue' seja verificada antes da chamada de Article.create_or_update. Principais mudanças: - Evita a criação ou atualização parcial de objetos Article quando as FKs obrigatórias estão ausentes. - Remove chamadas redundantes a article.save() dentro dos blocos de erro, centralizando a persistência após a atribuição bem-sucedida. - Melhora a clareza das mensagens de exceção incluindo o 'sps_pkg_name'. * chore(pid_provider): add UNMATCHED status for journal/issue validation * feat(pid_provider): implement XMLEvent model and add_event helper * refactor(article): simplify load_article to require pp_xml and use XMLEvent
1 parent d99608f commit fcc5529

4 files changed

Lines changed: 199 additions & 86 deletions

File tree

article/sources/xmlsps.py

Lines changed: 50 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from packtools.sps.models.kwd_group import ArticleKeywords
2222
from packtools.sps.models.v2.article_toc_sections import ArticleTocSections
2323
from packtools.sps.models.v2.related_articles import RelatedArticles
24-
from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre
2524

2625
from article import choices
2726
from article.models import (
@@ -42,7 +41,7 @@
4241
from issue.articlemeta.loader import load_issue_sections
4342
from journal.models import Journal
4443
from location.models import Location
45-
from pid_provider.choices import PPXML_STATUS_DONE, PPXML_STATUS_INVALID
44+
from pid_provider.choices import PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, PPXML_STATUS_INVALID
4645
from pid_provider.models import PidProviderXML
4746
# Researcher no longer used - replaced by ContribPerson
4847
# from researcher.models import Affiliation, Researcher
@@ -70,7 +69,7 @@ def add_error(errors, function_name, error, **kwargs):
7069
errors.append(error_dict)
7170

7271

73-
def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
72+
def load_article(user, pp_xml):
7473
"""
7574
Carrega um artigo a partir de XML.
7675
@@ -80,10 +79,7 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
8079
8180
Args:
8281
user: Usuário responsável pela operação (obrigatório)
83-
xml: String contendo o XML do artigo (opcional)
84-
file_path: Caminho para o arquivo XML (opcional)
85-
v3: PID v3 do artigo (opcional)
86-
pp_xml: Objeto PidProviderXML relacionado (opcional)
82+
pp_xml: Objeto PidProviderXML relacionado (obrigatório)
8783
8884
Returns:
8985
Article: Instância do artigo processado com todos os relacionamentos
@@ -98,76 +94,65 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
9894
- O processamento continua mesmo com falhas parciais
9995
- O campo article.valid indica se o processamento foi completo
10096
"""
101-
logging.info(f"load article {pp_xml} {v3} {file_path}")
102-
errors = []
103-
article = None # Inicializar no início
97+
logging.info(f"load article {pp_xml}")
98+
detail = {"pp_xml": str(pp_xml)}
10499

105100
# Validações iniciais
106101
if not user:
107102
raise ValueError("User is required")
108103

109-
if not any([pp_xml, v3, file_path, xml]):
104+
if not pp_xml:
110105
raise ValueError(
111-
"load_article() requires params: pp_xml or v3 or file_path or xml"
106+
"load_article() requires params: pp_xml"
112107
)
113108

114-
if not pp_xml and v3:
115-
try:
116-
pp_xml = PidProviderXML.get_by_pid_v3(pid_v3=v3)
117-
except PidProviderXML.DoesNotExist:
118-
pp_xml = None
119-
120109
try:
121-
if pp_xml:
122-
xml_with_pre = pp_xml.xml_with_pre
123-
elif file_path:
124-
for xml_with_pre in XMLWithPre.create(file_path):
125-
xmltree = xml_with_pre.xmltree
126-
break
127-
elif xml:
128-
xml_with_pre = XMLWithPre("", etree.fromstring(xml))
110+
xml_with_pre = pp_xml.xml_with_pre
129111
except Exception as e:
130-
exc_type, exc_value, exc_traceback = sys.exc_info()
131-
UnexpectedEvent.create(
132-
item=str(pp_xml or v3 or file_path or "xml"),
133-
action="article.sources.xmlsps.load_article",
134-
exception=e,
135-
exc_traceback=exc_traceback,
136-
detail=dict(
137-
function="article.sources.xmlsps.load_article",
138-
xml=f"{xml}",
139-
v3=v3,
140-
file_path=file_path,
141-
pp_xml=str(pp_xml),
142-
),
143-
)
144-
item = str(pp_xml or v3 or file_path or "xml")
145-
if pp_xml:
146-
pp_xml.proc_status = PPXML_STATUS_INVALID
147-
pp_xml.save()
148-
updated = (
149-
Article.objects.filter(pid_v3=pp_xml.v3)
150-
.exclude(
151-
pp_xml=pp_xml,
152-
data_status=choices.DATA_STATUS_INVALID,
153-
)
154-
.update(
155-
pp_xml=pp_xml,
156-
data_status=choices.DATA_STATUS_INVALID,
157-
)
112+
updated = (
113+
Article.objects.filter(pp_xml=pp_xml)
114+
.exclude(
115+
data_status=choices.DATA_STATUS_INVALID,
158116
)
159-
raise ValueError(f"Unable to get XML to load article from {item}: {e}")
117+
.update(
118+
data_status=choices.DATA_STATUS_INVALID,
119+
)
120+
)
121+
errors = [
122+
{
123+
"function": "load_article",
124+
"error_type": e.__class__.__name__,
125+
"error_message": str(e),
126+
"timestamp": datetime.now().isoformat(),
127+
}
128+
]
129+
pp_xml.add_event(name="load_article", proc_status=PPXML_STATUS_INVALID, detail=detail, errors=errors, exceptions=e)
130+
raise ValueError(f"Unable to get XML to load article from {pp_xml}: {e}")
160131

161-
pid_v3 = v3 or xml_with_pre.v3
162132

163133
try:
164-
# Sequência organizada para atribuição de campos do Article
165-
# Do mais simples (campos diretos) para o mais complexo (FKs e M2M)
134+
errors = []
135+
article = None
166136
event = None
137+
167138
xmltree = xml_with_pre.xmltree
168139

140+
pid_v3 = xml_with_pre.v3
169141
sps_pkg_name = xml_with_pre.sps_pkg_name
170-
logging.info(f"Article {pid_v3} {sps_pkg_name}")
142+
143+
logging.info(f"Pid Provider XML: {pid_v3} {sps_pkg_name}")
144+
145+
journal = get_journal(xmltree=xmltree, errors=errors)
146+
if not journal:
147+
raise ValueError(f"Not found journal for pid provider xml: {pid_v3} {sps_pkg_name}")
148+
issue = get_issue(
149+
xmltree=xmltree,
150+
journal=journal,
151+
item=pid_v3,
152+
errors=errors,
153+
)
154+
if not issue:
155+
raise ValueError(f"Not found issue for pid provider xml: {pid_v3} {sps_pkg_name}")
171156

172157
# CRIAÇÃO/OBTENÇÃO DO OBJETO PRINCIPAL
173158
article = Article.create_or_update(
@@ -201,19 +186,9 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
201186
)
202187

203188
# FOREIGN KEYS SIMPLES
204-
article.journal = get_journal(xmltree=xmltree, errors=errors)
205-
if not article.journal:
206-
article.save()
207-
raise ValueError(f"Not found journal for article: {pid_v3}")
208-
article.issue = get_issue(
209-
xmltree=xmltree,
210-
journal=article.journal,
211-
item=pid_v3,
212-
errors=errors,
213-
)
214-
if not article.issue:
215-
article.save()
216-
raise ValueError(f"Not found issue for article: {pid_v3}")
189+
article.journal = journal
190+
article.issue = issue
191+
article.save()
217192

218193
# Salvar uma vez após definir todos os campos simples
219194
logging.info(
@@ -286,19 +261,9 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
286261
if event:
287262
event.finish(errors=errors, exceptions=traceback.format_exc())
288263
raise
289-
UnexpectedEvent.create(
290-
item=str(pp_xml or v3 or file_path or "xml"),
291-
action="article.sources.xmlsps.load_article",
292-
exception=e,
293-
exc_traceback=exc_traceback,
294-
detail=dict(
295-
function="article.sources.xmlsps.load_article",
296-
xml=f"{xml}",
297-
v3=v3,
298-
file_path=file_path,
299-
pp_xml=str(pp_xml),
300-
),
301-
)
264+
265+
pp_xml.add_event(name="load_article", proc_status=PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, detail=detail, errors=errors, exceptions=e)
266+
302267
raise
303268

304269

pid_provider/choices.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
PPXML_STATUS_DONE = "DONE"
99
PPXML_STATUS_UNDEF = "UNDEF"
1010
PPXML_STATUS_INVALID = "NVALID"
11+
PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE = "UNMATCH"
12+
1113
PPXML_STATUS_DUPLICATED = "DUP"
1214
PPXML_STATUS_DEDUPLICATED = "DEDUP"
1315
PPXML_STATUS = (
@@ -19,4 +21,5 @@
1921
(PPXML_STATUS_INVALID, _("invalid")),
2022
(PPXML_STATUS_DUPLICATED, _("duplicated")),
2123
(PPXML_STATUS_DEDUPLICATED, _("deduplicated")),
24+
(PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, _("unmatched journal or issue")),
2225
)
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Generated by Django 5.2.7 on 2026-03-26 20:52
2+
3+
import django.db.models.deletion
4+
import modelcluster.fields
5+
from django.conf import settings
6+
from django.db import migrations, models
7+
8+
9+
class Migration(migrations.Migration):
10+
11+
dependencies = [
12+
("pid_provider", "0015_alter_xmlversion_file_xmlurl"),
13+
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
14+
]
15+
16+
operations = [
17+
migrations.AlterField(
18+
model_name="pidproviderxml",
19+
name="proc_status",
20+
field=models.CharField(
21+
blank=True,
22+
choices=[
23+
("TODO", "To do"),
24+
("DONE", "Done"),
25+
("WAIT", "waiting"),
26+
("IGNORE", "ignore"),
27+
("UNDEF", "undefined"),
28+
("NVALID", "invalid"),
29+
("DUP", "duplicated"),
30+
("DEDUP", "deduplicated"),
31+
("UNMATCH", "unmatched journal or issue"),
32+
],
33+
default="TODO",
34+
max_length=7,
35+
null=True,
36+
verbose_name="processing status",
37+
),
38+
),
39+
migrations.CreateModel(
40+
name="XMLEvent",
41+
fields=[
42+
(
43+
"id",
44+
models.BigAutoField(
45+
auto_created=True,
46+
primary_key=True,
47+
serialize=False,
48+
verbose_name="ID",
49+
),
50+
),
51+
(
52+
"updated",
53+
models.DateTimeField(
54+
auto_now=True, verbose_name="Last update date"
55+
),
56+
),
57+
("name", models.CharField(max_length=200, verbose_name="name")),
58+
("detail", models.JSONField(blank=True, null=True)),
59+
(
60+
"created",
61+
models.DateTimeField(
62+
auto_now_add=True, verbose_name="Creation date"
63+
),
64+
),
65+
("completed", models.BooleanField(default=False)),
66+
(
67+
"creator",
68+
models.ForeignKey(
69+
editable=False,
70+
null=True,
71+
on_delete=django.db.models.deletion.SET_NULL,
72+
related_name="%(class)s_creator",
73+
to=settings.AUTH_USER_MODEL,
74+
verbose_name="Creator",
75+
),
76+
),
77+
(
78+
"ppxml",
79+
modelcluster.fields.ParentalKey(
80+
on_delete=django.db.models.deletion.CASCADE,
81+
related_name="events",
82+
to="pid_provider.pidproviderxml",
83+
),
84+
),
85+
(
86+
"updated_by",
87+
models.ForeignKey(
88+
blank=True,
89+
editable=False,
90+
null=True,
91+
on_delete=django.db.models.deletion.SET_NULL,
92+
related_name="%(class)s_last_mod_user",
93+
to=settings.AUTH_USER_MODEL,
94+
verbose_name="Updater",
95+
),
96+
),
97+
],
98+
options={
99+
"abstract": False,
100+
},
101+
),
102+
]

pid_provider/models.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
zero_to_none,
3939
QueryBuilderPidProviderXML,
4040
)
41-
from tracker.models import BaseEvent, EventSaveError, UnexpectedEvent
41+
from tracker.models import BaseEvent, UnexpectedEvent
4242

4343
try:
4444
from django_prometheus.models import ExportModelOperationsMixin
@@ -441,12 +441,16 @@ class PidProviderXML(BasePidProviderXML, CommonControlField, ClusterableModel):
441441
FieldPanel("z_links"),
442442
FieldPanel("z_partial_body"),
443443
]
444+
panels_event = [
445+
InlinePanel("events", label=_("Events")),
446+
]
444447

445448
edit_handler = TabbedInterface(
446449
[
447450
ObjectList(panel_a, heading=_("Identification")),
448451
ObjectList(panel_b, heading=_("Other PIDs")),
449452
ObjectList(panel_c, heading=_("Data")),
453+
ObjectList(panels_event, heading=_("Events")),
450454
]
451455
)
452456

@@ -1447,6 +1451,11 @@ def fix_pkg_name(self, pkg_name):
14471451
self.save()
14481452
return True
14491453
return False
1454+
1455+
def add_event(self, name, proc_status, detail=None, errors=None, exceptions=None):
1456+
self.proc_status = proc_status
1457+
self.save()
1458+
return XMLEvent.register(self, name, detail=detail, errors=errors, exceptions=exceptions)
14501459

14511460

14521461
class FixPidV2(CommonControlField):
@@ -1748,3 +1757,37 @@ def save_file(self, xml_content, filename=None):
17481757
except Exception as e:
17491758
logging.error(f"Error saving zip file for XMLURL {self.url}: {e}")
17501759
return False
1760+
1761+
1762+
class XMLEvent(BaseEvent, CommonControlField):
1763+
"""
1764+
Model to log events related to XML processing in the PID Provider system.
1765+
1766+
This model captures various events that occur during the processing of XML data,
1767+
such as registration attempts, validation errors, and other significant actions,
1768+
along with relevant details for debugging and monitoring purposes.
1769+
1770+
Attributes:
1771+
name (CharField): Name of the event.
1772+
detail (JSONField): Detailed information about the event.
1773+
created (DateTimeField): Timestamp when the event was created.
1774+
completed (BooleanField): Indicates if the event has been completed.
1775+
ppxml (ParentalKey): Reference to the related PidProviderXML instance.
1776+
1777+
Methods:
1778+
data (property): Returns a dictionary with the event's name, detail, and creation timestamp.
1779+
create (classmethod): Creates and saves a new XMLEvent instance.
1780+
finish: Marks the event as completed and optionally updates details, errors, or exceptions.
1781+
"""
1782+
ppxml = ParentalKey(
1783+
PidProviderXML, on_delete=models.CASCADE, related_name="events"
1784+
)
1785+
1786+
@classmethod
1787+
def register(cls, ppxml, name, detail=None, errors=None, exceptions=None):
1788+
obj = cls()
1789+
obj.ppxml = ppxml
1790+
obj.name = name
1791+
completed = bool(not errors and not exceptions)
1792+
obj.finish(completed=completed, detail=detail, errors=errors, exceptions=exceptions)
1793+
return obj

0 commit comments

Comments
 (0)