Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion scholarly/_proxy_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,7 @@ def _handle_captcha2(self, url):
cookie.pop("httpOnly", None)
cookie.pop("expiry", None)
cookie.pop("sameSite", None)
cookie.pop("secure", None) # httpx Cookies.set() does not accept 'secure'
self._session.cookies.set(**cookie)
Comment thread
ipeirotis marked this conversation as resolved.
Outdated

return self._session
Expand Down Expand Up @@ -478,8 +479,11 @@ def _new_session(self, **kwargs):
init_kwargs.update(headers=_HEADERS)

if self._proxy_works:
init_kwargs["proxies"] = proxies #.get("http", None)
self._proxies = proxies
# httpx uses proxy= (single URL), not proxies= (dict)
proxy_url = proxies.get("https://") or proxies.get("http://")
if proxy_url:
init_kwargs["proxy"] = proxy_url
Comment on lines 485 to +490
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In _new_session(), proxy handling still relies on converting a proxies dict into a single proxy URL. Be careful about callers that pass only {'http://': ...} (e.g., ScraperAPI()), because the later conversion logic indexes the https:// entry and can raise KeyError. Suggested fix: normalize/validate the dict here (ensure both schemes exist) or choose https:// with a fallback to http:// when deriving the proxy URL.

Copilot uses AI. Check for mistakes.
Comment on lines +487 to +490
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR title/description focuses on stripping the secure cookie attribute, but this hunk also changes how proxy configuration is passed into httpx.Client (switching behavior around proxies vs proxy). Please either update the PR description to cover this additional behavior change or split it into a separate PR so it can be reviewed/validated independently.

Copilot uses AI. Check for mistakes.
if self.proxy_mode is ProxyMode.SCRAPERAPI:
# SSL Certificate verification must be disabled for
# ScraperAPI requests to work.
Expand Down
29 changes: 22 additions & 7 deletions test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import sys
from collections import Counter
from scholarly import scholarly, ProxyGenerator
from scholarly import scholarly, ProxyGenerator, MaxTriesExceededException
from scholarly.data_types import Mandate
from scholarly.publication_parser import PublicationParser
import random
Expand All @@ -15,6 +15,12 @@
import pandas as pd
except ImportError:
pd = None
try:
from fp.errors import FreeProxyException
except ImportError:
class FreeProxyException(Exception):
"""Placeholder when free-proxy package is not installed."""
pass
Comment thread
ipeirotis marked this conversation as resolved.
Outdated


class TestLuminati(unittest.TestCase):
Expand Down Expand Up @@ -100,9 +106,12 @@ def setUpClass(cls):
scholarly.set_timeout(5)
scholarly.set_retries(5)

pg = ProxyGenerator()
pg.FreeProxies()
scholarly.use_proxy(pg, ProxyGenerator())
try:
pg = ProxyGenerator()
pg.FreeProxies()
scholarly.use_proxy(pg, ProxyGenerator())
except (FreeProxyException, MaxTriesExceededException) as e:
raise unittest.SkipTest(f"No working free proxy available: {e}") from e

# Try storing the file temporarily as `scholarly.csv` and delete it.
# If there exists already a file with that name, generate a random name
Expand Down Expand Up @@ -611,12 +620,18 @@ def setUpClass(cls):
cls.connection_method = os.getenv("CONNECTION_METHOD")
else:
cls.connection_method = "none"
scholarly.use_proxy(None)
try:
scholarly.use_proxy(None)
except (FreeProxyException, MaxTriesExceededException) as e:
raise unittest.SkipTest(f"No working free proxy available: {e}") from e
return

# Use dual proxies for unit testing
secondary_proxy_generator = ProxyGenerator()
secondary_proxy_generator.FreeProxies()
try:
secondary_proxy_generator = ProxyGenerator()
secondary_proxy_generator.FreeProxies()
except (FreeProxyException, MaxTriesExceededException) as e:
raise unittest.SkipTest(f"No working free proxy available: {e}") from e

proxy_generator = ProxyGenerator()
if cls.connection_method == "tor":
Expand Down
Loading