Skip to content

Commit fbcf741

Browse files
author
Aaron Gonzales
committed
some linting
1 parent 6a130de commit fbcf741

4 files changed

Lines changed: 135 additions & 87 deletions

File tree

tools/twitter_search.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,32 @@
33
import json
44
import sys
55
import logging
6-
import configparser
7-
from io import StringIO
6+
from twittersearch.result_stream import ResultStream
7+
from twittersearch.api_utils import *
8+
from twittersearch.utils import *
89

910
logger = logging.getLogger()
1011
# we want to leave this here and have it command-line configurable via the
1112
# --debug flag
1213
logging.basicConfig(level=os.environ.get("LOGLEVEL", "ERROR"))
1314

14-
from twittersearch.result_stream import ResultStream
15-
from twittersearch.api_utils import *
16-
from twittersearch.utils import *
1715

1816

1917
REQUIRED_KEYS = {"pt_rule", "endpoint"}
2018

2119
def parse_cmd_args():
2220
twitter_parser = argparse.ArgumentParser()
21+
help_msg = """configuration file with all parameters. Far,
22+
easier to use than the command-line args version.,
23+
If a valid file is found, all args will be populated,
24+
from there. Remaining command-line args,
25+
will overrule args found in the config,
26+
file."""
2327

2428
twitter_parser.add_argument("--config-file",
2529
dest="config_filename",
2630
default=None,
27-
help=("configuration file with all parameters. Far",
28-
"easier to use than the command-line args version.",
29-
"If a valid file is found, all args will be populated",
30-
"from there. Remaining command-line args",
31-
"will overrule args found in the config",
32-
"file."))
33-
31+
help=help_msg)
3432
twitter_parser.add_argument("--endpoint",
3533
dest="endpoint",
3634
default=None,
@@ -54,17 +52,20 @@ def parse_cmd_args():
5452
twitter_parser.add_argument("--count-bucket",
5553
dest="count_bucket",
5654
default=None,
57-
help=("Bucket size for counts query. Options",
58-
"are day, hour, minute (default is 'day')."))
55+
help=("Bucket size for counts API. Options:",
56+
"day, hour, minute (default is 'day')."))
5957

6058
twitter_parser.add_argument("--start-datetime",
6159
dest="from_date",
6260
default=None,
63-
help="Start of datetime window, format 'YYYY-mm-DDTHH:MM' (default: 30 days ago)")
61+
help="Start of datetime window, format "
62+
"'YYYY-mm-DDTHH:MM' (default: -30 days)")
6463

6564
twitter_parser.add_argument("--end-datetime", dest="to_date",
6665
default=None,
67-
help="End of datetime window, format 'YYYY-mm-DDTHH:MM' (default: most recent activities)")
66+
help="End of datetime window, format "
67+
"'YYYY-mm-DDTHH:MM' (default: most recent"
68+
" date)")
6869

6970
twitter_parser.add_argument("--filter-rule", dest="pt_rule",
7071
default=None,
@@ -77,13 +78,15 @@ def parse_cmd_args():
7778
twitter_parser.add_argument("--max-tweets", dest="max_tweets",
7879
default=500,
7980
type=int,
80-
help="Maximum results to return for all pages; see -a option")
81+
help="Maximum results to return for all pages;"
82+
"see -a option")
8183

8284
twitter_parser.add_argument("--max-pages",
8385
dest="max_pages",
8486
type=int,
8587
default=None,
86-
help="Maximum number of pages to use for this session.")
88+
help="Maximum number of pages to use "
89+
"for this session.")
8790

8891
twitter_parser.add_argument("--results-per-file", dest="results_per_file",
8992
default=0,
@@ -93,8 +96,8 @@ def parse_cmd_args():
9396
twitter_parser.add_argument("--filename-prefix",
9497
dest="filename_prefix",
9598
default=None,
96-
help="prefix for the filename where tweet json data will be stored."
97-
)
99+
help="prefix for the filename where tweet "
100+
" json data will be stored.")
98101

99102
twitter_parser.add_argument("--no-print-stream",
100103
dest="print_stream",
@@ -135,7 +138,7 @@ def main():
135138

136139
if len(dict_filter(config_dict).keys() & REQUIRED_KEYS) < len(REQUIRED_KEYS):
137140
print(REQUIRED_KEYS - dict_filter(config_dict).keys())
138-
logger.error("ERROR: not enough arguments present for the program to work")
141+
logger.error("ERROR: not enough arguments for the program to work")
139142
sys.exit(1)
140143

141144
stream_params = gen_params_from_config(config_dict)
@@ -158,5 +161,6 @@ def main():
158161
if config_dict["print_stream"] is True:
159162
print(json.dumps(tweet))
160163

164+
161165
if __name__ == '__main__':
162166
main()

twittersearch/api_utils.py

Lines changed: 69 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,49 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Module containing the various functions that are used for API calls,
4+
rule generation, and related.
5+
"""
6+
17
import re
28
import datetime
39
import logging
4-
import sys
5-
import configparser
610
try:
711
import ujson as json
812
except ImportError:
913
import json
1014

1115
__all__ = ["gen_rule_payload", "gen_params_from_config",
12-
"validate_count_api", "GNIP_RESP_CODES"]
16+
"validate_count_api", "GNIP_RESP_CODES", "change_to_count_endpoint"]
1317

1418
logger = logging.getLogger(__name__)
1519

1620
GNIP_RESP_CODES = {
17-
'200': 'OK: The request was successful. The JSON response will be similar to the following:',
21+
'200': ("OK: The request was successful. "
22+
"The JSON response will be similar to the following:"),
1823

19-
'400': ("Bad Request: Generally, this response occurs due to the presence of "
20-
"invalid JSON in the request, or where the request failed to send any JSON payload."),
24+
'400': ("Bad Request: Generally, this response occurs due "
25+
"to the presence of invalid JSON in the request, "
26+
"or where the request failed to send any JSON payload."),
2127

2228
'401': ("Unauthorized: HTTP authentication failed due to invalid "
23-
"credentials. Log in to console.gnip.com with your credentials to ensure"
24-
" you are using them correctly with your request. "),
29+
"credentials. Log in to console.gnip.com with your credentials "
30+
"to ensure you are using them correctly with your request. "),
2531
'404': ("Not Found: The resource was not found at the URL to which the "
2632
"request was sent, likely because an incorrect URL was used."),
33+
2734
'422': ("Unprocessable Entity: This is returned due to invalid parameters "
28-
"in a query or when a query is too complex for us to process. – e.g. "
29-
" invalid PowerTrack rules or too many phrase operators, rendering a "
30-
" query too complex."),
35+
"in a query or when a query is too complex for us to process. "
36+
"–e.g. invalid PowerTrack rules or too many phrase operators,"
37+
" rendering a query too complex."),
3138
'429': ("Unknown Code: Your app has exceeded the limit on connection "
32-
"requests. The corresponding JSON message will look similar to the "
33-
"following:"),
39+
"requests. The corresponding JSON message will look "
40+
"similar to the following:"),
3441
'500': ("Internal Server Error: There was an error on Gnip's side. Retry "
3542
"your request using an exponential backoff pattern."),
36-
'502': ("Proxy Error: There was an error on Gnip's side. Retry your request "
37-
"using an exponential backoff pattern."),
38-
'503': ("Service Unavailable: There was an error on Gnip's side. Retry your "
39-
"request using an exponential backoff pattern.")
43+
'502': ("Proxy Error: There was an error on Gnip's side. Retry your "
44+
"request using an exponential backoff pattern."),
45+
'503': ("Service Unavailable: There was an error on Gnip's side. "
46+
"Retry your request using an exponential backoff pattern.")
4047
}
4148

4249

@@ -48,7 +55,8 @@ def convert_utc_time(datetime_str):
4855
Args:
4956
datetime_str (str): the datestring, which can either be in GNIP API
5057
Format (YYYYmmDDHHSS), ISO date format (YYYY-mm-DD), ISO datetime
51-
format (YYYY-mm-DD HH:mm), or command-line ISO format (YYYY-mm-DDTHH:mm)
58+
format (YYYY-mm-DD HH:mm),
59+
or command-line ISO format (YYYY-mm-DDTHH:mm)
5260
5361
Returns:
5462
string of GNIP API formatted date.
@@ -71,23 +79,32 @@ def convert_utc_time(datetime_str):
7179
else:
7280
try:
7381
if "T" in datetime_str:
74-
datetime_str = datetime_str.replace('T', ' ') # command line with 'T'
82+
# command line with 'T'
83+
datetime_str = datetime_str.replace('T', ' ')
7584
_date = datetime.datetime.strptime(datetime_str, "%Y-%m-%d %H:%M")
7685
except ValueError:
7786
_date = datetime.datetime.strptime(datetime_str, "%Y-%m-%d")
7887
return _date.strftime("%Y%m%d%H%M")
7988

8089

8190
def change_to_count_endpoint(endpoint):
91+
"""Utility function to change a normal endpoint to a ``count`` api
92+
endpoint. Returns the same endpoint if it's already a valid count endpoint.
93+
Args:
94+
endpoint (str): your api endpoint
95+
96+
Returns:
97+
str: the modified endpoint for a count endpoint.
98+
"""
99+
82100
tokens = filter(lambda x: x != '', re.split("[/:]", endpoint))
83-
tokens = list(filter(lambda x: x != "https", tokens))
84-
last = tokens[-1].split('.')[0] # removes .json on the endpoint, saving
85-
tokens[-1] = last # changes from *.json -> '' since we are going to change the input
101+
filt_tokens = list(filter(lambda x: x != "https", tokens))
102+
last = filt_tokens[-1].split('.')[0] # removes .json on the endpoint
103+
filt_tokens[-1] = last # changes from *.json -> '' for changing input
86104
if last == 'counts':
87105
return endpoint
88106
else:
89-
return "https://" + '/'.join(tokens) + '/' + "counts.json"
90-
107+
return "https://" + '/'.join(filt_tokens) + '/' + "counts.json"
91108

92109

93110
def gen_rule_payload(pt_rule, max_results=500,
@@ -99,14 +116,19 @@ def gen_rule_payload(pt_rule, max_results=500,
99116
Generates the dict or json payload for a PowerTrack rule.
100117
101118
Args:
102-
pt_rule (str): the string version of a powertrack rule, e.g., "kanye west has:geo". Accepts multi-line strings for ease of entry.
103-
max_results (int): max results for the batch. Defaults to 500 to reduce API call usage.
104-
from_date (str or None): date format as specified by `convert_utc_time` for the starting time of your search.
105-
106-
to_date (str or None): date format as specified by `convert_utc_time` for the end time of your search.
107-
108-
count_bucket (str or None): if using the counts api endpoint, will define the count bucket for which tweets are aggregated.
109-
stringify (bool): specifies the return type, `dict` or json-formatted `str`.
119+
pt_rule (str): The string version of a powertrack rule,
120+
e.g., "kanye west has:geo". Accepts multi-line strings
121+
for ease of entry.
122+
max_results (int): max results for the batch.
123+
Defaults to 500 to reduce API call usage.
124+
from_date (str or None): Date format as specified by
125+
`convert_utc_time` for the starting time of your search.
126+
to_date (str or None): date format as specified by `convert_utc_time`
127+
for the end time of your search.
128+
count_bucket (str or None): If using the counts api endpoint,
129+
will define the count bucket for which tweets are aggregated.
130+
stringify (bool): specifies the return type, `dict`
131+
or json-formatted `str`.
110132
111133
Example:
112134
@@ -117,10 +139,8 @@ def gen_rule_payload(pt_rule, max_results=500,
117139
'{"query":"kanye west has:geo","maxResults":100,"toDate":"201708220000","fromDate":"201708210000"}'
118140
"""
119141

120-
pt_rule = ' '.join(pt_rule.split()) # allows multi-line strings
121-
payload = {"query": pt_rule,
122-
"maxResults": max_results,
123-
}
142+
pt_rule = ' '.join(pt_rule.split()) # allows multi-line strings
143+
payload = {"query": pt_rule, "maxResults": max_results}
124144
if to_date:
125145
payload["toDate"] = convert_utc_time(to_date)
126146
if from_date:
@@ -130,7 +150,8 @@ def gen_rule_payload(pt_rule, max_results=500,
130150
payload["bucket"] = count_bucket
131151
del payload["maxResults"]
132152
else:
133-
logger.error("invalid count bucket: provided {}".format(count_bucket))
153+
logger.error("invalid count bucket: provided {}"
154+
.format(count_bucket))
134155
raise ValueError
135156
if tag:
136157
payload["tag"] = tag
@@ -144,20 +165,18 @@ def gen_params_from_config(config_dict):
144165
"""
145166

146167
if config_dict.get("count_bucket"):
147-
logger.warn("change your endpoint to the count endpoint; this is"
148-
" default behavior when the count bucket field is defined")
168+
logger.warning("change your endpoint to the count endpoint; this is "
169+
"default behavior when the count bucket "
170+
"field is defined")
149171
endpoint = change_to_count_endpoint(config_dict.get("endpoint"))
150172
else:
151173
endpoint = config_dict.get("endpoint")
152174

153-
154175
rule = gen_rule_payload(pt_rule=config_dict["pt_rule"],
155176
from_date=config_dict.get("from_date", None),
156177
to_date=config_dict.get("to_date", None),
157-
max_results=int(config_dict.get("max_results", None)),
158-
count_bucket=config_dict.get("count_bucket", None)
159-
)
160-
178+
max_results=int(config_dict.get("max_results")),
179+
count_bucket=config_dict.get("count_bucket", None))
161180

162181
_dict = {"endpoint": endpoint,
163182
"username": config_dict.get("username"),
@@ -166,13 +185,16 @@ def gen_params_from_config(config_dict):
166185
"rule_payload": rule,
167186
"results_per_file": int(config_dict.get("results_per_file")),
168187
"max_tweets": int(config_dict.get("max_tweets")),
169-
"max_pages": config_dict.get("max_pages", None)
170-
}
188+
"max_pages": config_dict.get("max_pages", None)}
171189
return _dict
172190

173191

174192
def validate_count_api(rule_payload, endpoint):
175-
rule = rule_payload if isinstance(rule_payload, dict) else json.loads(rule_payload)
193+
"""
194+
Ensures that the counts api is set correctly in a payload.
195+
"""
196+
rule = (rule_payload if isinstance(rule_payload, dict)
197+
else json.loads(rule_payload))
176198
bucket = rule.get('bucket')
177199
counts = set(endpoint.split("/")) & {"counts.json"}
178200
if len(counts) == 0:

0 commit comments

Comments
 (0)