1+ # -*- coding: utf-8 -*-
2+ """
3+ Module containing the various functions that are used for API calls,
4+ rule generation, and related.
5+ """
6+
17import re
28import datetime
39import logging
4- import sys
5- import configparser
610try :
711 import ujson as json
812except ImportError :
913 import json
1014
1115__all__ = ["gen_rule_payload" , "gen_params_from_config" ,
12- "validate_count_api" , "GNIP_RESP_CODES" ]
16+ "validate_count_api" , "GNIP_RESP_CODES" , "change_to_count_endpoint" ]
1317
1418logger = logging .getLogger (__name__ )
1519
1620GNIP_RESP_CODES = {
17- '200' : 'OK: The request was successful. The JSON response will be similar to the following:' ,
21+ '200' : ("OK: The request was successful. "
22+ "The JSON response will be similar to the following:" ),
1823
19- '400' : ("Bad Request: Generally, this response occurs due to the presence of "
20- "invalid JSON in the request, or where the request failed to send any JSON payload." ),
24+ '400' : ("Bad Request: Generally, this response occurs due "
25+ "to the presence of invalid JSON in the request, "
26+ "or where the request failed to send any JSON payload." ),
2127
2228 '401' : ("Unauthorized: HTTP authentication failed due to invalid "
23- "credentials. Log in to console.gnip.com with your credentials to ensure "
24- " you are using them correctly with your request. " ),
29+ "credentials. Log in to console.gnip.com with your credentials "
30+ "to ensure you are using them correctly with your request. " ),
2531 '404' : ("Not Found: The resource was not found at the URL to which the "
2632 "request was sent, likely because an incorrect URL was used." ),
33+
2734 '422' : ("Unprocessable Entity: This is returned due to invalid parameters "
28- "in a query or when a query is too complex for us to process. – e.g. "
29- " invalid PowerTrack rules or too many phrase operators, rendering a "
30- " query too complex." ),
35+ "in a query or when a query is too complex for us to process. "
36+ "–e.g. invalid PowerTrack rules or too many phrase operators,"
37+ " rendering a query too complex." ),
3138 '429' : ("Unknown Code: Your app has exceeded the limit on connection "
32- "requests. The corresponding JSON message will look similar to the "
33- "following:" ),
39+ "requests. The corresponding JSON message will look "
40+ "similar to the following:" ),
3441 '500' : ("Internal Server Error: There was an error on Gnip's side. Retry "
3542 "your request using an exponential backoff pattern." ),
36- '502' : ("Proxy Error: There was an error on Gnip's side. Retry your request "
37- "using an exponential backoff pattern." ),
38- '503' : ("Service Unavailable: There was an error on Gnip's side. Retry your "
39- "request using an exponential backoff pattern." )
43+ '502' : ("Proxy Error: There was an error on Gnip's side. Retry your "
44+ "request using an exponential backoff pattern." ),
45+ '503' : ("Service Unavailable: There was an error on Gnip's side. "
46+ "Retry your request using an exponential backoff pattern." )
4047}
4148
4249
@@ -48,7 +55,8 @@ def convert_utc_time(datetime_str):
4855 Args:
4956 datetime_str (str): the datestring, which can either be in GNIP API
5057 Format (YYYYmmDDHHSS), ISO date format (YYYY-mm-DD), ISO datetime
51- format (YYYY-mm-DD HH:mm), or command-line ISO format (YYYY-mm-DDTHH:mm)
58+ format (YYYY-mm-DD HH:mm),
59+ or command-line ISO format (YYYY-mm-DDTHH:mm)
5260
5361 Returns:
5462 string of GNIP API formatted date.
@@ -71,23 +79,32 @@ def convert_utc_time(datetime_str):
7179 else :
7280 try :
7381 if "T" in datetime_str :
74- datetime_str = datetime_str .replace ('T' , ' ' ) # command line with 'T'
82+ # command line with 'T'
83+ datetime_str = datetime_str .replace ('T' , ' ' )
7584 _date = datetime .datetime .strptime (datetime_str , "%Y-%m-%d %H:%M" )
7685 except ValueError :
7786 _date = datetime .datetime .strptime (datetime_str , "%Y-%m-%d" )
7887 return _date .strftime ("%Y%m%d%H%M" )
7988
8089
8190def change_to_count_endpoint (endpoint ):
91+ """Utility function to change a normal endpoint to a ``count`` api
92+ endpoint. Returns the same endpoint if it's already a valid count endpoint.
93+ Args:
94+ endpoint (str): your api endpoint
95+
96+ Returns:
97+ str: the modified endpoint for a count endpoint.
98+ """
99+
82100 tokens = filter (lambda x : x != '' , re .split ("[/:]" , endpoint ))
83- tokens = list (filter (lambda x : x != "https" , tokens ))
84- last = tokens [- 1 ].split ('.' )[0 ] # removes .json on the endpoint, saving
85- tokens [- 1 ] = last # changes from *.json -> '' since we are going to change the input
101+ filt_tokens = list (filter (lambda x : x != "https" , tokens ))
102+ last = filt_tokens [- 1 ].split ('.' )[0 ] # removes .json on the endpoint
103+ filt_tokens [- 1 ] = last # changes from *.json -> '' for changing input
86104 if last == 'counts' :
87105 return endpoint
88106 else :
89- return "https://" + '/' .join (tokens ) + '/' + "counts.json"
90-
107+ return "https://" + '/' .join (filt_tokens ) + '/' + "counts.json"
91108
92109
93110def gen_rule_payload (pt_rule , max_results = 500 ,
@@ -99,14 +116,19 @@ def gen_rule_payload(pt_rule, max_results=500,
99116 Generates the dict or json payload for a PowerTrack rule.
100117
101118 Args:
102- pt_rule (str): the string version of a powertrack rule, e.g., "kanye west has:geo". Accepts multi-line strings for ease of entry.
103- max_results (int): max results for the batch. Defaults to 500 to reduce API call usage.
104- from_date (str or None): date format as specified by `convert_utc_time` for the starting time of your search.
105-
106- to_date (str or None): date format as specified by `convert_utc_time` for the end time of your search.
107-
108- count_bucket (str or None): if using the counts api endpoint, will define the count bucket for which tweets are aggregated.
109- stringify (bool): specifies the return type, `dict` or json-formatted `str`.
119+ pt_rule (str): The string version of a powertrack rule,
120+ e.g., "kanye west has:geo". Accepts multi-line strings
121+ for ease of entry.
122+ max_results (int): max results for the batch.
123+ Defaults to 500 to reduce API call usage.
124+ from_date (str or None): Date format as specified by
125+ `convert_utc_time` for the starting time of your search.
126+ to_date (str or None): date format as specified by `convert_utc_time`
127+ for the end time of your search.
128+ count_bucket (str or None): If using the counts api endpoint,
129+ will define the count bucket for which tweets are aggregated.
130+ stringify (bool): specifies the return type, `dict`
131+ or json-formatted `str`.
110132
111133 Example:
112134
@@ -117,10 +139,8 @@ def gen_rule_payload(pt_rule, max_results=500,
117139 '{"query":"kanye west has:geo","maxResults":100,"toDate":"201708220000","fromDate":"201708210000"}'
118140 """
119141
120- pt_rule = ' ' .join (pt_rule .split ()) # allows multi-line strings
121- payload = {"query" : pt_rule ,
122- "maxResults" : max_results ,
123- }
142+ pt_rule = ' ' .join (pt_rule .split ()) # allows multi-line strings
143+ payload = {"query" : pt_rule , "maxResults" : max_results }
124144 if to_date :
125145 payload ["toDate" ] = convert_utc_time (to_date )
126146 if from_date :
@@ -130,7 +150,8 @@ def gen_rule_payload(pt_rule, max_results=500,
130150 payload ["bucket" ] = count_bucket
131151 del payload ["maxResults" ]
132152 else :
133- logger .error ("invalid count bucket: provided {}" .format (count_bucket ))
153+ logger .error ("invalid count bucket: provided {}"
154+ .format (count_bucket ))
134155 raise ValueError
135156 if tag :
136157 payload ["tag" ] = tag
@@ -144,20 +165,18 @@ def gen_params_from_config(config_dict):
144165 """
145166
146167 if config_dict .get ("count_bucket" ):
147- logger .warn ("change your endpoint to the count endpoint; this is"
148- " default behavior when the count bucket field is defined" )
168+ logger .warning ("change your endpoint to the count endpoint; this is "
169+ "default behavior when the count bucket "
170+ "field is defined" )
149171 endpoint = change_to_count_endpoint (config_dict .get ("endpoint" ))
150172 else :
151173 endpoint = config_dict .get ("endpoint" )
152174
153-
154175 rule = gen_rule_payload (pt_rule = config_dict ["pt_rule" ],
155176 from_date = config_dict .get ("from_date" , None ),
156177 to_date = config_dict .get ("to_date" , None ),
157- max_results = int (config_dict .get ("max_results" , None )),
158- count_bucket = config_dict .get ("count_bucket" , None )
159- )
160-
178+ max_results = int (config_dict .get ("max_results" )),
179+ count_bucket = config_dict .get ("count_bucket" , None ))
161180
162181 _dict = {"endpoint" : endpoint ,
163182 "username" : config_dict .get ("username" ),
@@ -166,13 +185,16 @@ def gen_params_from_config(config_dict):
166185 "rule_payload" : rule ,
167186 "results_per_file" : int (config_dict .get ("results_per_file" )),
168187 "max_tweets" : int (config_dict .get ("max_tweets" )),
169- "max_pages" : config_dict .get ("max_pages" , None )
170- }
188+ "max_pages" : config_dict .get ("max_pages" , None )}
171189 return _dict
172190
173191
174192def validate_count_api (rule_payload , endpoint ):
175- rule = rule_payload if isinstance (rule_payload , dict ) else json .loads (rule_payload )
193+ """
194+ Ensures that the counts api is set correctly in a payload.
195+ """
196+ rule = (rule_payload if isinstance (rule_payload , dict )
197+ else json .loads (rule_payload ))
176198 bucket = rule .get ('bucket' )
177199 counts = set (endpoint .split ("/" )) & {"counts.json" }
178200 if len (counts ) == 0 :
0 commit comments