1- """OGRe Twitter Interface
1+ """
2+ OGRe Twitter Interface
23
34:func:`twitter` : method for fetching data from Twitter
4-
55"""
66
77import base64
88import hashlib
99import logging
1010import sys
11+ import time
1112import urllib
1213from datetime import datetime
13- from time import time
1414from twython import Twython
1515from ogre .validation import sanitize
16- from snowflake2time .snowflake import *
16+ from ogre .exceptions import OGReError , OGReLimitError
17+ from snowflake2time .snowflake import snowflake2utc , utc2snowflake
1718
1819
1920def sanitize_twitter (
@@ -25,7 +26,8 @@ def sanitize_twitter(
2526 interval = None
2627):
2728
28- """Validate and prepare parameters for use in Twitter data retrieval.
29+ """
30+ Validate and prepare parameters for use in Twitter data retrieval.
2931
3032 .. seealso:: :meth:`ogre.validation.validate` describes the format each
3133 parameter must have.
@@ -57,7 +59,6 @@ def sanitize_twitter(
5759
5860 :rtype: tuple
5961 :returns: Each passed parameter is returned (in order) in the proper format.
60-
6162 """
6263
6364 clean_keys = {}
@@ -138,7 +139,8 @@ def twitter(
138139 ** kwargs
139140):
140141
141- """Fetch Tweets from the Twitter API.
142+ """
143+ Fetch Tweets from the Twitter API.
142144
143145 .. seealso:: :meth:`sanitize_twitter` describes more about
144146 the format each parameter must have.
@@ -217,7 +219,7 @@ def twitter(
217219 :type network: callable
218220 :param network: Specify a network access point (for dependency injection).
219221
220- :raises: TwythonError
222+ :raises: OGReError, OGReLimitError, TwythonError
221223
222224 :rtype: list
223225 :returns: GeoJSON Feature(s)
@@ -226,7 +228,6 @@ def twitter(
226228 how to build queries for Twitter using the `keyword` parameter.
227229 More information may also be found at
228230 https://dev.twitter.com/docs/api/1.1/get/search/tweets.
229-
230231 """
231232
232233 keychain , kinds , q , remaining , geocode , (since_id , max_id ) = \
@@ -239,46 +240,45 @@ def twitter(
239240 interval = interval
240241 )
241242
243+ modifiers = {
244+ "api" : Twython ,
245+ "fail_hard" : False ,
246+ "network" : urllib .urlopen ,
247+ "query_limit" : 450 , # Twitter allows 450 queries every 15 minutes.
248+ "secure" : True ,
249+ "strict_media" : False
250+ }
251+ for modifier , _ in modifiers .items ():
252+ if kwargs .get (modifier ) is not None :
253+ modifiers [modifier ] = kwargs [modifier ]
254+
242255 qid = hashlib .md5 (
243256 str (time .time ()) +
244257 str (q ) +
245258 str (remaining ) +
246259 str (geocode ) +
247260 str (since_id ) +
248- str (max_id )
261+ str (max_id ) +
262+ str (kwargs )
249263 ).hexdigest ()
250- logging .basicConfig (
251- filename = "OGRe.log" ,
252- level = logging .ERROR ,
253- format = "%(asctime)s %(levelname)s:%(message)s" ,
254- datefmt = "%Y/%m/%d %H:%M:%S %Z"
255- )
256- log = logging .getLogger (__name__ )
257- if kwargs .get ("test" , False ):
258- log .setLevel (logging .DEBUG )
259- log .info (qid + " Request: Twitter TEST " + kwargs .get ("test_message" , "" ))
260- log .debug (
261- qid + " Status:" +
262- " media(" + str (media )+ ")" +
263- " keyword(" + str (q )+ ")" +
264- " quantity(" + str (remaining )+ ")" +
265- " location(" + str (geocode )+ ")" +
266- " interval(" + str (since_id )+ "," + str (max_id )+ ")" +
267- " kwargs(" + str (kwargs )+ ")"
268- )
269- else :
270- log .setLevel (logging .INFO )
271- log .info (qid + " Request: Twitter" )
272264
273- maximum_queries = kwargs .get ("query_limit" )
274- if maximum_queries is None :
275- maximum_queries = 450 # Twitter allows 450 queries every 15 minutes.
265+ log = logging .getLogger (__name__ )
266+ log .info (qid + " Request: Twitter" )
267+ log .debug (
268+ qid + " Status:" +
269+ " media(" + str (media )+ ")" +
270+ " keyword(" + str (q )+ ")" +
271+ " quantity(" + str (remaining )+ ")" +
272+ " location(" + str (geocode )+ ")" +
273+ " interval(" + str (since_id )+ "," + str (max_id )+ ")" +
274+ " kwargs(" + str (kwargs )+ ")"
275+ )
276276
277- if not kinds or remaining < 1 or maximum_queries < 1 :
277+ if not kinds or remaining < 1 or modifiers [ "query_limit" ] < 1 :
278278 log .info (qid + " Success: No results were requested." )
279279 return []
280280
281- api = kwargs . get ( "api" , Twython ) (
281+ api = modifiers [ "api" ] (
282282 keychain ["consumer_key" ],
283283 access_token = keychain ["access_token" ]
284284 )
@@ -288,18 +288,28 @@ def twitter(
288288 limit = int (
289289 limits ["resources" ]["search" ]["/search/tweets" ]["remaining" ]
290290 )
291+ reset = int (
292+ limits ["resources" ]["search" ]["/search/tweets" ]["reset" ]
293+ )
291294 if limit < 1 :
292- log .info (qid + " Failure: Queries are being limited." )
295+ message = "Queries are being limited."
296+ log .info (qid + " Failure: " + message )
297+ if modifiers ["fail_hard" ]:
298+ raise OGReLimitError (
299+ source = "Twitter" ,
300+ message = message ,
301+ reset = reset
302+ )
293303 else :
294304 log .debug (qid + " Status: " + str (limit )+ " queries remain." )
295- if limit < maximum_queries :
296- maximum_queries = limit
305+ if limit < modifiers [ "query_limit" ] :
306+ modifiers [ "query_limit" ] = limit
297307 except KeyError :
298308 log .warn (qid + " Unobtainable Rate Limit" )
299309 total = remaining
300310
301311 collection = []
302- for query in range (maximum_queries ):
312+ for query in range (modifiers [ "query_limit" ] ):
303313 count = min (remaining , 100 ) # Twitter accepts a max count of 100.
304314 try :
305315 results = api .search (
@@ -318,12 +328,15 @@ def twitter(
318328 )
319329 raise
320330 if results .get ("statuses" ) is None :
331+ message = "The request is too complex."
321332 log .info (
322333 qid + " Failure: " +
323334 str (query + 1 )+ " queries produced " +
324335 str (len (collection ))+ " results. " +
325- "The request is too complex."
336+ message
326337 )
338+ if modifiers ["fail_hard" ]:
339+ raise OGReError (source = "Twitter" , message = message )
327340 break
328341 for tweet in results ["statuses" ]:
329342 if tweet .get ("coordinates" ) is None or tweet .get ("id" ) is None :
@@ -349,23 +362,20 @@ def twitter(
349362 if tweet .get ("text" ) is not None :
350363 feature ["properties" ]["text" ] = tweet ["text" ]
351364 if "image" in kinds :
352- if not kwargs . get ( "strict_media" , False ) :
365+ if not modifiers [ "strict_media" ] :
353366 if tweet .get ("text" ) is not None :
354367 feature ["properties" ]["text" ] = tweet ["text" ]
355368 if tweet .get ("entities" , {}).get ("media" ) is not None :
356369 for entity in tweet ["entities" ]["media" ]:
357370 if entity .get ("type" ) is not None :
358371 if entity ["type" ].lower () == "photo" :
359372 media_url = "media_url_https"
360- if not kwargs . get ( "secure" , True ) :
373+ if not modifiers [ "secure" ] :
361374 media_url = "media_url"
362375 if entity .get (media_url ) is not None :
363376 feature ["properties" ]["image" ] = \
364377 base64 .b64encode (
365- kwargs .get (
366- "network" ,
367- urllib .urlopen
368- )(
378+ modifiers ["network" ](
369379 entity [media_url ]
370380 ).read ()
371381 )
@@ -398,7 +408,7 @@ def twitter(
398408 .split ("max_id=" )[1 ]
399409 .split ("&" )[0 ]
400410 )
401- if query + 1 >= maximum_queries :
411+ if query + 1 >= modifiers [ "query_limit" ] :
402412 outcome = "Success" if len (collection ) > 0 else "Failure"
403413 log .info (
404414 qid + " " + outcome + ": " +
0 commit comments