File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -8,7 +8,7 @@ lint: ## Lint code
88 flake8 --config flake8 scrapingbee/ tests/ setup.py
99
1010test : # # Run tests
11- pytest tests/
11+ python -m pytest tests/
1212
1313build : # # Build a binary wheel and a source tarball
1414 python setup.py sdist bdist_wheel
Original file line number Diff line number Diff line change @@ -42,14 +42,17 @@ Signup to ScrapingBee to [get your API key](https://app.scrapingbee.com/account/
4242 ' extract_rules' : {' title' : ' h1' },
4343 # Wrap response in JSON
4444 ' json_response' : False ,
45- # JavaScript snippet to execute (clicking on a button, scrolling ...)
46- ' js_snippet' : ' ' ,
47- # Scrolling to the end of the page before returning your results
48- ' js_scroll' : False ,
49- # The time to wait between each scroll
50- ' js_scroll_wait' : 1000 ,
51- # The number of scrolls you want to make
52- ' js_scroll_count' : 1 ,
45+ # Interact with the webpage you want to scrape
46+ ' json_scenario' : {
47+ " instructions" : [
48+ {" wait_for" : " #slow_button" },
49+ {" click" : " #slow_button" },
50+ {" scroll_x" : 1000 },
51+ {" wait" : 1000 },
52+ {" scroll_x" : 1000 },
53+ {" wait" : 1000 },
54+ ]
55+ },
5356 # Use premium proxies to bypass difficult to scrape websites (10-25 credits/request)
5457 ' premium_proxy' : False ,
5558 # Execute JavaScript code with a Headless Browser (5 credits/request)
Original file line number Diff line number Diff line change 22
33A new package is automatically uploaded to PyPI when a new tag is pushed to Github. To release a new version follow the steps:
44
5- 1 . Update the version number X.X.X in [ setup .py] ( setup .py) and push the change.
5+ 1 . Update the version number X.X.X in [ scrapingbee/ __ version __ .py] ( scrapingbee/__version__ .py) and push the change.
66
772 . Create a tag with the same version number.
88
Original file line number Diff line number Diff line change 1- __version__ = '1.1.6 '
1+ __version__ = '1.1.7 '
Original file line number Diff line number Diff line change @@ -25,11 +25,11 @@ def process_cookies(cookies: dict) -> str:
2525 return cookies
2626
2727
28- def process_extract_rules ( extract_rules : dict ) -> str :
29- if isinstance (extract_rules , dict ):
30- return urllib .parse .quote (json .dumps (extract_rules ))
28+ def process_json_stringify_param ( param : dict , param_name : str ) -> str :
29+ if isinstance (param , dict ):
30+ return urllib .parse .quote (json .dumps (param ))
3131 else :
32- raise ValueError ("extract_rules must be a dict or a stringified JSON" )
32+ raise ValueError (f" { param_name } must be a dict or a stringified JSON" )
3333
3434
3535def process_params (params : dict ) -> dict :
@@ -44,7 +44,9 @@ def process_params(params: dict) -> dict:
4444 elif k == 'cookies' :
4545 new_params [k ] = process_cookies (v )
4646 elif k == 'extract_rules' :
47- new_params [k ] = process_extract_rules (v )
47+ new_params [k ] = process_json_stringify_param (v , 'extract_rules' )
48+ elif k == 'js_scenario' :
49+ new_params [k ] = process_json_stringify_param (v , 'js_scenario' )
4850 else :
4951 new_params [k ] = v
5052 return new_params
Original file line number Diff line number Diff line change @@ -92,6 +92,27 @@ def test_get_with_extract_rules(mock_request, client):
9292 )
9393
9494
95+ @mock .patch ('scrapingbee.client.request' )
96+ def test_get_with_js_scenario (mock_request , client ):
97+ '''It should format the extract_rules and add them to the url'''
98+ client .get ('https://httpbin.org' , params = {
99+ 'js_scenario' : {
100+ 'instructions' : [
101+ {"click" : "#buttonId" }
102+ ]
103+ }
104+ })
105+
106+ mock_request .assert_called_with (
107+ 'GET' ,
108+ 'https://app.scrapingbee.com/api/v1/'
109+ '?api_key=API_KEY&url=https%3A//httpbin.org&'
110+ 'js_scenario=%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D' ,
111+ data = None ,
112+ headers = default_headers ,
113+ )
114+
115+
95116@mock .patch ('scrapingbee.client.request' )
96117def test_post (mock_request , client ):
97118 '''It should make a POST request with some data'''
Original file line number Diff line number Diff line change 11from scrapingbee .utils import (
22 process_url ,
33 process_js_snippet ,
4+ process_json_stringify_param ,
45 process_headers ,
56 process_cookies ,
6- process_extract_rules ,
77 process_params ,
88 get_scrapingbee_url
99)
@@ -40,12 +40,22 @@ def test_process_cookies():
4040
4141def test_process_extract_rules ():
4242 '''It should format extract_rules to a stringified JSON'''
43- output = process_extract_rules ({
43+ output = process_json_stringify_param ({
4444 'title' : '.title'
45- })
45+ }, 'extract_rules' )
4646 assert output == '%7B%22title%22%3A%20%22.title%22%7D'
4747
4848
49+ def test_process_js_scenario ():
50+ '''It should format js_scenario to a stringified JSON'''
51+ output = process_json_stringify_param ({
52+ 'instructions' : [
53+ {"click" : "#buttonId" }
54+ ]
55+ }, 'js_scenario' )
56+ assert output == '%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D'
57+
58+
4959def test_process_params ():
5060 '''It should keep boolean parameters'''
5161 output = process_params ({'render_js' : True })
You can’t perform that action at this time.
0 commit comments