Skip to content

Commit 530446b

Browse files
authored
Add js_scenario param (#9)
1 parent bace54e commit 530446b

7 files changed

Lines changed: 55 additions & 19 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ lint: ## Lint code
88
flake8 --config flake8 scrapingbee/ tests/ setup.py
99

1010
test: ## Run tests
11-
pytest tests/
11+
python -m pytest tests/
1212

1313
build: ## Build a binary wheel and a source tarball
1414
python setup.py sdist bdist_wheel

README.md

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,17 @@ Signup to ScrapingBee to [get your API key](https://app.scrapingbee.com/account/
4242
'extract_rules': {'title': 'h1'},
4343
# Wrap response in JSON
4444
'json_response': False,
45-
# JavaScript snippet to execute (clicking on a button, scrolling ...)
46-
'js_snippet': '',
47-
# Scrolling to the end of the page before returning your results
48-
'js_scroll': False,
49-
# The time to wait between each scroll
50-
'js_scroll_wait': 1000,
51-
# The number of scrolls you want to make
52-
'js_scroll_count': 1,
45+
# Interact with the webpage you want to scrape
46+
'json_scenario': {
47+
"instructions": [
48+
{"wait_for": "#slow_button"},
49+
{"click": "#slow_button"},
50+
{"scroll_x": 1000},
51+
{"wait": 1000},
52+
{"scroll_x": 1000},
53+
{"wait": 1000},
54+
]
55+
},
5356
# Use premium proxies to bypass difficult to scrape websites (10-25 credits/request)
5457
'premium_proxy': False,
5558
# Execute JavaScript code with a Headless Browser (5 credits/request)

RELEASE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
A new package is automatically uploaded to PyPI when a new tag is pushed to Github. To release a new version follow the steps:
44

5-
1. Update the version number X.X.X in [setup.py](setup.py) and push the change.
5+
1. Update the version number X.X.X in [scrapingbee/__version__.py](scrapingbee/__version__.py) and push the change.
66

77
2. Create a tag with the same version number.
88

scrapingbee/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.1.6'
1+
__version__ = '1.1.7'

scrapingbee/utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ def process_cookies(cookies: dict) -> str:
2525
return cookies
2626

2727

28-
def process_extract_rules(extract_rules: dict) -> str:
29-
if isinstance(extract_rules, dict):
30-
return urllib.parse.quote(json.dumps(extract_rules))
28+
def process_json_stringify_param(param: dict, param_name: str) -> str:
29+
if isinstance(param, dict):
30+
return urllib.parse.quote(json.dumps(param))
3131
else:
32-
raise ValueError("extract_rules must be a dict or a stringified JSON")
32+
raise ValueError(f"{param_name} must be a dict or a stringified JSON")
3333

3434

3535
def process_params(params: dict) -> dict:
@@ -44,7 +44,9 @@ def process_params(params: dict) -> dict:
4444
elif k == 'cookies':
4545
new_params[k] = process_cookies(v)
4646
elif k == 'extract_rules':
47-
new_params[k] = process_extract_rules(v)
47+
new_params[k] = process_json_stringify_param(v, 'extract_rules')
48+
elif k == 'js_scenario':
49+
new_params[k] = process_json_stringify_param(v, 'js_scenario')
4850
else:
4951
new_params[k] = v
5052
return new_params

tests/test_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,27 @@ def test_get_with_extract_rules(mock_request, client):
9292
)
9393

9494

95+
@mock.patch('scrapingbee.client.request')
96+
def test_get_with_js_scenario(mock_request, client):
97+
'''It should format the extract_rules and add them to the url'''
98+
client.get('https://httpbin.org', params={
99+
'js_scenario': {
100+
'instructions': [
101+
{"click": "#buttonId"}
102+
]
103+
}
104+
})
105+
106+
mock_request.assert_called_with(
107+
'GET',
108+
'https://app.scrapingbee.com/api/v1/'
109+
'?api_key=API_KEY&url=https%3A//httpbin.org&'
110+
'js_scenario=%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D',
111+
data=None,
112+
headers=default_headers,
113+
)
114+
115+
95116
@mock.patch('scrapingbee.client.request')
96117
def test_post(mock_request, client):
97118
'''It should make a POST request with some data'''

tests/test_utils.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from scrapingbee.utils import (
22
process_url,
33
process_js_snippet,
4+
process_json_stringify_param,
45
process_headers,
56
process_cookies,
6-
process_extract_rules,
77
process_params,
88
get_scrapingbee_url
99
)
@@ -40,12 +40,22 @@ def test_process_cookies():
4040

4141
def test_process_extract_rules():
4242
'''It should format extract_rules to a stringified JSON'''
43-
output = process_extract_rules({
43+
output = process_json_stringify_param({
4444
'title': '.title'
45-
})
45+
}, 'extract_rules')
4646
assert output == '%7B%22title%22%3A%20%22.title%22%7D'
4747

4848

49+
def test_process_js_scenario():
50+
'''It should format js_scenario to a stringified JSON'''
51+
output = process_json_stringify_param({
52+
'instructions': [
53+
{"click": "#buttonId"}
54+
]
55+
}, 'js_scenario')
56+
assert output == '%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D'
57+
58+
4959
def test_process_params():
5060
'''It should keep boolean parameters'''
5161
output = process_params({'render_js': True})

0 commit comments

Comments
 (0)