1- """Download service."""
1+ #!/usr/bin/env python
2+
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+ __author__ = "John Wieczorek"
16+ __contributors__ = "Aaron Steele, John Wieczorek"
17+ __copyright__ = "Copyright 2016 vertnet.org"
18+ __version__ = "download.py 2016-09-14T11:47+02:00"
219
320# Removing dependency on Files API due to its deprecation by Google
421import cloudstorage as gcs
1027from google .appengine .api import taskqueue
1128from google .appengine .api import search
1229from vertnet .service import util as vnutil
30+ from vertnet .service .util import UTIL_VERSION
1331from vertnet .service import search as vnsearch
1432import webapp2
1533import json
1634import logging
1735import uuid
1836import sys
37+ import gc
1938
20- DOWNLOAD_VERSION = 'download.py 2015-09-03T10:52:50+02:00'
21-
39+ DOWNLOAD_VERSION = __version__
2240SEARCH_CHUNK_SIZE = 1000 # limit on documents in a search result: rows per file
2341OPTIMUM_CHUNK_SIZE = 500 # See api_cnt_performance_analysis.pdf at https://goo.gl/xbLIGz
2442COMPOSE_FILE_LIMIT = 32 # limit on the number of files in a single compose request
2543COMPOSE_OBJECT_LIMIT = 1024 # limit on the number of files in a composition
2644TEMP_BUCKET = 'vn-dltest' # bucket for temp compositions
2745DOWNLOAD_BUCKET = 'vn-downloads2' # production bucket for downloads
28- FILE_EXTENSION = 'tsv '
46+ FILE_EXTENSION = 'txt '
2947
3048def _tsv (json ):
3149 # These should be the names of the original fields in the index document.
@@ -40,6 +58,9 @@ def _tsv(json):
4058 values .append (unicode (json [x ]).rstrip ())
4159 else :
4260 values .append (u'' )
61+ # logging.debug('%s: JSON: %s' % (DOWNLOAD_VERSION, json))
62+ # logging.debug('%s: DOWNLOAD_FIELDS: %s' % (UTIL_VERSION, download_fields))
63+ # logging.debug('%s: VALUES: %s' % (DOWNLOAD_VERSION, values))
4364 return u'\t ' .join (values ).encode ('utf-8' )
4465
4566def _get_tsv_chunk (records ):
@@ -92,6 +113,8 @@ def _queue(self, q, email, name, latlon, fromapi, source, countonly):
92113 latlon = latlon , fileindex = 0 , reccount = 0 , requesttime = requesttime ,
93114 source = source , fromapi = fromapi )
94115
116+ # Attempt to keep memory usage at a minimum
117+ gc .collect ()
95118 if countonly is not None and len (countonly )> 0 :
96119 taskqueue .add (url = '/service/download/count' , params = params ,
97120 queue_name = "count" )
@@ -159,6 +182,7 @@ def get(self):
159182 self .response .headers ['Content-Disposition' ] = "attachment; filename=%s" \
160183 % filename
161184 records , cursor , count , query_version = vnsearch .query (q , count )
185+ # logging.debug('%s: RECORDS: %s' % (DOWNLOAD_VERSION, records))
162186
163187 # Build dictionary for search counts
164188 res_counts = vnutil .search_resource_counts (records )
@@ -210,6 +234,8 @@ def post(self):
210234 cursor = self .request .get ('cursor' )
211235 email = self .request .get ('email' )
212236
237+ # Attempt to keep memory usage at a minimum
238+ gc .collect ()
213239 if cursor :
214240 curs = search .Cursor (web_safe_string = cursor )
215241 else :
@@ -279,6 +305,8 @@ def post(self):
279305 else :
280306 curs = None
281307
308+ # Attempt to keep memory usage at a minimum
309+ gc .collect ()
282310 # Write single chunk to file, GCS does not support append
283311 records , next_cursor , count , query_version = \
284312 vnsearch .query (q , SEARCH_CHUNK_SIZE , curs = curs )
@@ -317,9 +345,9 @@ def post(self):
317345 f .write ('%s\n ' % vnutil .download_header ())
318346 f .write (chunk )
319347 success = True
320- logging .info ('Download chunk saved to %s: Total %s records. Has next \
321- cursor: %s \n Version: %s'
322- % (filename , reccount , not next_cursor is None , DOWNLOAD_VERSION ))
348+ # logging.info('Download chunk saved to %s: Total %s records. Has next \
349+ # cursor: %s \nVersion: %s'
350+ # % (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION))
323351 except Exception , e :
324352 logging .error ("Error writing chunk to FILE: %s for\n QUERY: %s \
325353 Error: %s\n Version: %s" % (filename , q , e , DOWNLOAD_VERSION ) )
@@ -404,6 +432,8 @@ def post(self):
404432 total_files_to_compose = int (self .request .get ('fileindex' ))+ 1
405433 compositions = total_files_to_compose
406434
435+ # Attempt to keep memory usage at a minimum
436+ gc .collect ()
407437 # Get the application default credentials.
408438 credentials = GoogleCredentials .get_application_default ()
409439
@@ -499,8 +529,9 @@ def post(self):
499529 try :
500530 gcs .copy2 (src , dest )
501531 except Exception , e :
502- logging .error ("Error copying %s to %s \n Error: %s\
503- Version: %s" % (src , dest , e , DOWNLOAD_VERSION ) )
532+ s = 'Error copying %s to %s\n ' % (src , dest )
533+ s += 'Error: %s Version: %s' % (e , DOWNLOAD_VERSION )
534+ logging .error ()
504535
505536 # Change the ACL so that the download file is publicly readable.
506537 mbody = acl_update_request ()
@@ -519,7 +550,7 @@ def post(self):
519550 mail .send_mail (sender = "VertNet Downloads <vertnetinfo@vertnet.org>" ,
520551 to = email , subject = "Your truncated VertNet download is ready!" ,
521552 body = """
522- Your VertNet download file is now available for a limited time at
553+ Your VertNet download file is now available for a limited time (roughly 60 days) at
523554https://storage.googleapis.com/%s/%s.\n
524555The results in this file are not complete based on your query\n
525556%s\n
@@ -537,7 +568,7 @@ def post(self):
537568 mail .send_mail (sender = "VertNet Downloads <vertnetinfo@vertnet.org>" ,
538569 to = email , subject = "Your VertNet download is ready!" ,
539570 body = """
540- Your VertNet download file is now available for a limited time at
571+ Your VertNet download file is now available for a limited time (roughly 60 days) at
541572https://storage.googleapis.com/%s/%s.\n
542573Query: %s\n Matching records: %s\n Request submitted: %s\n Request fulfilled: %s"""
543574 % (DOWNLOAD_BUCKET , composed_filename , q , reccount , requesttime ,
@@ -559,6 +590,8 @@ def post(self):
559590 composed_filename = '%s.%s' % (filepattern ,FILE_EXTENSION )
560591 total_files_to_compose = int (self .request .get ('fileindex' ))
561592
593+ # Attempt to keep memory usage at a minimum
594+ gc .collect ()
562595 # Get the application default credentials.
563596 credentials = GoogleCredentials .get_application_default ()
564597
0 commit comments