Skip to content

Commit 20b72df

Browse files
authored
fix(utils): properly url encode all params and fix tests (#15)
1 parent 7faa894 commit 20b72df

3 files changed

Lines changed: 16 additions & 29 deletions

File tree

scrapingbee/utils.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@
88
DEFAULT_HEADERS = {"User-Agent": f"ScrapingBee-Python/{__version__}"}
99

1010

11-
def process_url(url: str) -> str:
12-
return urllib.parse.quote(url)
13-
14-
1511
def process_js_snippet(js_snippet: str) -> str:
1612
return base64.b64encode(js_snippet.encode()).decode()
1713

@@ -34,7 +30,7 @@ def process_cookies(cookies: dict) -> str:
3430

3531
def process_json_stringify_param(param: dict, param_name: str) -> str:
3632
if isinstance(param, dict):
37-
return urllib.parse.quote(json.dumps(param))
33+
return json.dumps(param)
3834
else:
3935
raise ValueError(f"{param_name} must be a dict or a stringified JSON")
4036

@@ -44,8 +40,6 @@ def process_params(params: dict) -> dict:
4440
for k, v in params.items():
4541
if v in (None, '', [], {}):
4642
continue
47-
elif k == 'url':
48-
new_params[k] = process_url(v)
4943
elif k == 'js_snippet':
5044
new_params[k] = process_js_snippet(v)
5145
elif k == 'cookies':
@@ -71,6 +65,6 @@ def get_scrapingbee_url(api_url: str, api_key: str, url: str, params: dict) -> s
7165
spb_params = process_params(all_params)
7266

7367
# Format url query string
74-
qs = '&'.join(f'{k}={v}' for k, v in spb_params.items())
68+
qs = urllib.parse.urlencode(spb_params)
7569

7670
return f'{api_url}?{qs}'

tests/test_client.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_get(mock_session, client):
1919
mock_session.return_value.request.assert_called_with(
2020
'GET',
2121
'https://app.scrapingbee.com/api/v1/'
22-
'?api_key=API_KEY&url=https%3A//httpbin.org',
22+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org',
2323
data=None,
2424
headers=DEFAULT_HEADERS
2525
)
@@ -33,7 +33,7 @@ def test_get_with_params(mock_session, client):
3333
mock_session.return_value.request.assert_called_with(
3434
'GET',
3535
'https://app.scrapingbee.com/api/v1/'
36-
'?api_key=API_KEY&url=https%3A//httpbin.org&render_js=True',
36+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&render_js=True',
3737
data=None,
3838
headers=DEFAULT_HEADERS,
3939
)
@@ -47,7 +47,7 @@ def test_get_with_headers(mock_session, client):
4747
mock_session.return_value.request.assert_called_with(
4848
'GET',
4949
'https://app.scrapingbee.com/api/v1/'
50-
'?api_key=API_KEY&url=https%3A//httpbin.org&forward_headers=True',
50+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&forward_headers=True',
5151
data=None,
5252
headers={'Spb-Content-Type': 'text/html; charset=utf-8',
5353
**DEFAULT_HEADERS},
@@ -65,7 +65,7 @@ def test_get_with_cookies(mock_session, client):
6565
mock_session.return_value.request.assert_called_with(
6666
'GET',
6767
'https://app.scrapingbee.com/api/v1/'
68-
'?api_key=API_KEY&url=https%3A//httpbin.org&cookies=name_1=value_1;name_2=value_2',
68+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&cookies=name_1%3Dvalue_1%3Bname_2%3Dvalue_2',
6969
data=None,
7070
headers=DEFAULT_HEADERS,
7171
)
@@ -84,9 +84,9 @@ def test_get_with_extract_rules(mock_session, client):
8484
mock_session.return_value.request.assert_called_with(
8585
'GET',
8686
'https://app.scrapingbee.com/api/v1/'
87-
'?api_key=API_KEY&url=https%3A//httpbin.org&'
88-
'extract_rules=%7B%22title%22%3A%20%22h1%22%2C%20%22'
89-
'subtitle%22%3A%20%22%23subtitle%22%7D',
87+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&'
88+
'extract_rules=%7B%22title%22%3A+%22h1%22%2C+%22'
89+
'subtitle%22%3A+%22%23subtitle%22%7D',
9090
data=None,
9191
headers=DEFAULT_HEADERS,
9292
)
@@ -106,8 +106,8 @@ def test_get_with_js_scenario(mock_session, client):
106106
mock_session.return_value.request.assert_called_with(
107107
'GET',
108108
'https://app.scrapingbee.com/api/v1/'
109-
'?api_key=API_KEY&url=https%3A//httpbin.org&'
110-
'js_scenario=%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D',
109+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&'
110+
'js_scenario=%7B%22instructions%22%3A+%5B%7B%22click%22%3A+%22%23buttonId%22%7D%5D%7D',
111111
data=None,
112112
headers=DEFAULT_HEADERS,
113113
)
@@ -120,7 +120,7 @@ def test_post(mock_session, client):
120120

121121
mock_session.return_value.request.assert_called_with(
122122
'POST',
123-
'https://app.scrapingbee.com/api/v1/?api_key=API_KEY&url=https%3A//httpbin.org',
123+
'https://app.scrapingbee.com/api/v1/?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org',
124124
data={'KEY_1': 'VALUE_1'},
125125
headers=DEFAULT_HEADERS
126126
)

tests/test_utils.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from scrapingbee.utils import (
2-
process_url,
32
process_js_snippet,
43
process_json_stringify_param,
54
process_headers,
@@ -9,12 +8,6 @@
98
)
109

1110

12-
def test_process_url():
13-
'''It should encode the url'''
14-
output = process_url('https://example.com?p=1')
15-
assert output == 'https%3A//example.com%3Fp%3D1'
16-
17-
1811
def test_process_js_snippet():
1912
'''It should encode JavaScript code'''
2013
output = process_js_snippet(
@@ -46,7 +39,7 @@ def test_process_extract_rules():
4639
output = process_json_stringify_param({
4740
'title': '.title'
4841
}, 'extract_rules')
49-
assert output == '%7B%22title%22%3A%20%22.title%22%7D'
42+
assert output == '{"title": ".title"}'
5043

5144

5245
def test_process_js_scenario():
@@ -56,7 +49,7 @@ def test_process_js_scenario():
5649
{"click": "#buttonId"}
5750
]
5851
}, 'js_scenario')
59-
assert output == '%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D'
52+
assert output == '{"instructions": [{"click": "#buttonId"}]}'
6053

6154

6255
def test_process_params():
@@ -71,7 +64,7 @@ def test_get_scrapingbee_url():
7164
'https://app.scrapingbee.com/api/v1/',
7265
'API_KEY',
7366
'https://httpbin.org',
74-
{'render_js': True}
67+
{'render_js': True, 'wait_for': '#foo'}
7568
)
7669
assert output == 'https://app.scrapingbee.com/api/v1/' \
77-
'?api_key=API_KEY&url=https%3A//httpbin.org&render_js=True'
70+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&render_js=True&wait_for=%23foo'

0 commit comments

Comments
 (0)