Skip to content

Commit de9e01d

Browse files
introduz a capacidade de configurar a verificação de certificados SSL (#1423)
* refactor: define verify=False como padrão em fetch_data e harvesters * feat: permite passar parâmetro verify na carga de coleções * refactor: propaga parâmetro verify nas tasks e fontes de journals * refactor: adiciona suporte a verify nas tasks e loaders de issues * config: atualiza scheduler para desabilitar verificação SSL por padrão
1 parent fcc5529 commit de9e01d

8 files changed

Lines changed: 39 additions & 25 deletions

File tree

bigbang/tasks_scheduler.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,15 +271,15 @@ def schedule_load_journal_from_article_meta(username, enabled=False):
271271
"""
272272
Agenda a tarefa de carga de dados de journals obtidos do AM e Core.
273273
274-
Configura verify=True para verificação SSL nas requisições HTTP.
274+
Configura verify=False para verificação SSL nas requisições HTTP.
275275
"""
276276
schedule_task(
277277
task="journal.tasks.load_journal_from_article_meta",
278278
name="Carga de dados de journals obtidos do AM e Core",
279279
kwargs=dict(
280280
load_data=False,
281281
collection_acron="scl",
282-
verify=True,
282+
verify=False,
283283
),
284284
description=_("Carga de dados de journals obtidos do AM e Core"),
285285
priority=1,
@@ -295,15 +295,15 @@ def schedule_collect_journals_from_am(username, enabled=False):
295295
"""
296296
Agenda a tarefa de coleta de journals da fonte AM.
297297
298-
Configura verify=True para verificação SSL nas requisições HTTP.
298+
Configura verify=False para verificação SSL nas requisições HTTP.
299299
"""
300300
schedule_task(
301301
task="journal.tasks.load_journal_from_article_meta",
302302
name="Coleta de journals da fonte AM",
303303
kwargs=dict(
304304
load_data=True,
305305
collection_acron="scl",
306-
verify=True,
306+
verify=False,
307307
),
308308
description=_("Coleta de journals da fonte AM"),
309309
priority=1,

collection/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,12 +223,12 @@ def __str__(self):
223223
base_form_class = CoreAdminModelForm
224224

225225
@classmethod
226-
def load(cls, user, collections_data=None):
226+
def load(cls, user, collections_data=None, verify=False):
227227
if not collections_data:
228228
collections_data = fetch_data(
229229
"https://articlemeta.scielo.org/api/v1/collection/identifiers/",
230230
json=True,
231-
verify=False,
231+
verify=verify,
232232
)
233233

234234
for collection_data in collections_data:

core/utils/harvesters.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def __init__(
1919
until_date: Optional[str] = None,
2020
limit: Optional[int] = None,
2121
timeout: int = 30,
22+
verify: bool = False,
2223
):
2324
"""
2425
Inicializa o harvester do ArticleMeta.
@@ -37,6 +38,7 @@ def __init__(
3738
self.until_date = until_date or datetime.utcnow().isoformat()[:10]
3839
self.limit = limit or 1000
3940
self.timeout = timeout
41+
self.verify = verify
4042

4143
def harvest_documents(self) -> Generator[Dict[str, Any], None, None]:
4244
"""
@@ -73,7 +75,7 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]:
7375
logging.info(f"Fetching AM documents from: {url}")
7476

7577
# Faz requisição
76-
response = fetch_data(url, json=True, timeout=self.timeout, verify=False)
78+
response = fetch_data(url, json=True, timeout=self.timeout, verify=self.verify)
7779

7880
# Processa objetos retornados
7981
objects = response.get("objects", [])
@@ -147,6 +149,7 @@ def __init__(
147149
until_date: Optional[str] = None,
148150
limit: int = 100,
149151
timeout: int = 5,
152+
verify: bool = False,
150153
):
151154
"""
152155
Inicializa o harvester do OPAC.
@@ -165,6 +168,7 @@ def __init__(
165168
self.until_date = until_date or datetime.utcnow().isoformat()[:10]
166169
self.limit = limit or 100
167170
self.timeout = timeout or 5
171+
self.verify = verify
168172

169173
def harvest_documents(self) -> Generator[Dict[str, Any], None, None]:
170174
"""
@@ -199,7 +203,7 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]:
199203

200204
# Faz requisição
201205
# verify=False é necessário para evitar erros de SSL em ambientes onde o certificado do OPAC não é reconhecido
202-
response = fetch_data(url, json=True, timeout=self.timeout, verify=False)
206+
response = fetch_data(url, json=True, timeout=self.timeout, verify=self.verify)
203207

204208
# Define total de páginas na primeira iteração
205209
if total_pages is None:

core/utils/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class NonRetryableError(Exception):
4242
wait=wait_exponential(multiplier=1, min=1, max=5),
4343
stop=stop_after_attempt(5),
4444
)
45-
def fetch_data(url, headers=None, json=False, timeout=FETCH_DATA_TIMEOUT, verify=True):
45+
def fetch_data(url, headers=None, json=False, timeout=FETCH_DATA_TIMEOUT, verify=False):
4646
"""
4747
Get the resource with HTTP
4848
Retry: Wait 2^x * 1 second between each retry starting with 4 seconds,

issue/articlemeta/loader.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,16 @@
1414

1515

1616
def harvest_issue_identifiers(
17-
collection_acron, from_date, until_date, force_update, timeout=30
17+
collection_acron, from_date, until_date, force_update, timeout=30, verify=False
1818
):
19+
# chamado em core/issue/tasks.py
1920
try:
2021
harvester = AMHarvester(
2122
record_type="issue",
2223
collection_acron=collection_acron,
2324
from_date=from_date,
2425
until_date=until_date,
26+
verify=verify
2527
)
2628
yield from harvester.harvest_documents()
2729

@@ -40,7 +42,7 @@ def harvest_issue_identifiers(
4042
)
4143

4244

43-
def harvest_and_load_issue(user, url, code, collection_acron, processing_date, force_update, timeout=30):
45+
def harvest_and_load_issue(user, url, code, collection_acron, processing_date, force_update, timeout=30, verify=False):
4446
if not url:
4547
raise ValueError("URL is required to harvest and load issue")
4648

@@ -50,7 +52,7 @@ def harvest_and_load_issue(user, url, code, collection_acron, processing_date, f
5052
if not collection_acron:
5153
raise ValueError("Collection acronym is required to harvest and load issue")
5254

53-
harvested_data = harvest_issue_data(url, timeout=timeout)
55+
harvested_data = harvest_issue_data(url, timeout=timeout, verify=verify)
5456
am_issue = load_am_issue(
5557
user,
5658
Collection.objects.get(acron3=collection_acron),
@@ -60,16 +62,17 @@ def harvest_and_load_issue(user, url, code, collection_acron, processing_date, f
6062
harvested_data,
6163
force_update=force_update,
6264
timeout=timeout,
65+
verify=verify,
6366
)
6467
if not am_issue:
6568
raise ValueError(f"Unable to create am_issue for {url}")
6669
return create_issue_from_am_issue(user, am_issue)
6770

6871

69-
def harvest_issue_data(url, timeout=30):
72+
def harvest_issue_data(url, timeout=30, verify=False):
7073
try:
7174
item = {}
72-
item["data"] = utils.fetch_data(url, json=True, timeout=timeout, verify=False)
75+
item["data"] = utils.fetch_data(url, json=True, timeout=timeout, verify=verify)
7376
item["status"] = "pending"
7477
return item
7578
except Exception as e:
@@ -96,14 +99,15 @@ def load_am_issue(
9699
force_update,
97100
do_harvesting=False,
98101
timeout=30,
102+
verify=False,
99103
):
100104
try:
101105
if not url:
102106
raise ValueError("URL is required to load AMIssue")
103107

104108
# Corrigido: não redefine harvested_data se já existe
105109
if do_harvesting or not harvested_data:
106-
harvested_data = harvest_issue_data(url, timeout=timeout)
110+
harvested_data = harvest_issue_data(url, timeout=timeout, verify=verify)
107111

108112
return AMIssue.create_or_update(
109113
pid=pid,
@@ -132,7 +136,7 @@ def load_am_issue(
132136
return None
133137

134138

135-
def complete_am_issue(user, am_issue):
139+
def complete_am_issue(user, am_issue, verify=False):
136140
try:
137141
detail = {}
138142

@@ -144,7 +148,7 @@ def complete_am_issue(user, am_issue):
144148
if not am_issue.url:
145149
raise ValueError("am_issue.url is required")
146150

147-
harvested_data = harvest_issue_data(am_issue.url)
151+
harvested_data = harvest_issue_data(am_issue.url, verify=verify)
148152
detail["harvested_data"] = str(harvested_data)
149153
am_issue.status = harvested_data.get("status")
150154
am_issue.data = harvested_data.get("data")
@@ -160,7 +164,7 @@ def complete_am_issue(user, am_issue):
160164
)
161165

162166

163-
def get_issue_data_from_am_issue(am_issue, user=None):
167+
def get_issue_data_from_am_issue(am_issue, user=None, verify=False):
164168
"""
165169
Extrai e ajusta dados do AMIssue para criação de Issue.
166170
@@ -183,7 +187,7 @@ def get_issue_data_from_am_issue(am_issue, user=None):
183187
am_data = am_issue.data
184188
if not am_data:
185189
if user:
186-
complete_am_issue(user, am_issue)
190+
complete_am_issue(user, am_issue, verify=verify)
187191
am_data = am_issue.data
188192

189193
if not am_data:

issue/tasks.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def load_issue_from_articlemeta(
2828
until_date=None,
2929
force_update=None,
3030
timeout=30,
31+
verify=False,
3132
):
3233
"""
3334
Carrega issues do ArticleMeta para collections específicas.
@@ -53,7 +54,7 @@ def load_issue_from_articlemeta(
5354

5455
# Coletar identificadores de issues
5556
for issue_identifier in harvest_issue_identifiers(
56-
acron3, from_date, until_date, force_update, timeout
57+
acron3, from_date, until_date, force_update, timeout, verify
5758
):
5859
try:
5960
logger.info(f"Scheduling load for issue {issue_identifier.get('code')} in collection {acron3}")
@@ -65,6 +66,7 @@ def load_issue_from_articlemeta(
6566
issue_identifier=issue_identifier,
6667
force_update=force_update,
6768
timeout=timeout,
69+
verify=verify,
6870
)
6971
except Exception as e:
7072
exc_type, exc_value, exc_traceback = sys.exc_info()
@@ -116,6 +118,8 @@ def task_harvest_and_load_issue(
116118
issue_identifier=None,
117119
force_update=None,
118120
timeout=30,
121+
verify=False,
122+
119123
):
120124
"""
121125
Carrega um issue específico do ArticleMeta.
@@ -127,6 +131,7 @@ def task_harvest_and_load_issue(
127131
issue_identifier: Dados do identificador do issue
128132
force_update: Forçar atualização de registros existentes
129133
timeout: Timeout para requisições HTTP
134+
verify: Verificação SSL para requisições HTTP
130135
"""
131136
try:
132137
user = _get_user(request=self.request, user_id=user_id, username=username)
@@ -158,6 +163,7 @@ def task_harvest_and_load_issue(
158163
processing_date=processing_date,
159164
force_update=force_update,
160165
timeout=timeout,
166+
verify=verify,
161167
)
162168

163169
if issue:

journal/sources/article_meta.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(self, message):
1515
super().__init__(f"Failed to save SciELO Journal from article meta: {message}")
1616

1717

18-
def _get_collection_journals(offset=None, limit=None, collection=None, verify=True):
18+
def _get_collection_journals(offset=None, limit=None, collection=None, verify=False):
1919
limit = limit or 10
2020
offset = f"&offset={offset}" if offset else ""
2121
if not collection:
@@ -30,7 +30,7 @@ def _get_collection_journals(offset=None, limit=None, collection=None, verify=Tr
3030
return data
3131

3232

33-
def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=True):
33+
def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=False):
3434
url_journal = f"https://articlemeta.scielo.org/api/v1/journal/?collection={collection}&issn={issn}"
3535
data_journal = fetch_data(url_journal, json=True, timeout=30, verify=verify)
3636
AMJournal.create_or_update(
@@ -41,7 +41,7 @@ def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=True
4141
)
4242

4343

44-
def process_journal_article_meta(collection, limit, user, journal_issn_list=None, verify=True):
44+
def process_journal_article_meta(collection, limit, user, journal_issn_list=None, verify=False):
4545
obj_collection = Collection.objects.get(acron3=collection)
4646
if journal_issn_list:
4747
for issn in journal_issn_list:

journal/tasks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def load_journal_from_article_meta(
4949
collection_acron=None,
5050
load_data=None,
5151
journal_issn_list=None,
52-
verify=True,
52+
verify=False,
5353
):
5454
try:
5555
if journal_issn_list and not collection_acron:
@@ -95,7 +95,7 @@ def load_journal_from_article_meta_for_one_collection(
9595
limit=None,
9696
load_data=None,
9797
journal_issn_list=None,
98-
verify=True,
98+
verify=False,
9999
):
100100
user = _get_user(self.request, username=username, user_id=user_id)
101101
try:

0 commit comments

Comments
 (0)