Skip to content

Commit 2249389

Browse files
author
tucotuco
committed
Long-awaited merge of developments with traits back into master. This commit reflects the state of deployment as of 2016-09-15.
2 parents eccd011 + 94b34dd commit 2249389

28 files changed

+1181
-383
lines changed

app-dev.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ handlers:
4646

4747
- url: /img
4848
static_dir: www/img
49+
50+
- url: /favicon\.ico
51+
static_files: img/favicon.ico
52+
upload: img/favicon\.ico
4953

5054
# APIs
5155

app.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ libraries:
2828
# - name: webapp2
2929
# version: "2.5.1"
3030

31+
# Try the following to see if it overcomes the out of memory issues in the traits version
32+
# of the app. It didn't.
33+
#instance_class: B2
34+
#basic_scaling:
35+
# max_instances: 4
36+
# idle_timeout: 5m
37+
3138
handlers:
3239

3340
# Static assets
@@ -46,6 +53,10 @@ handlers:
4653

4754
- url: /img
4855
static_dir: www/img
56+
57+
- url: /favicon\.ico
58+
static_files: img/favicon.ico
59+
upload: img/favicon\.ico
4960

5061
# APIs
5162

appengine_config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,12 @@ def namespace_manager_default_namespace_for_request():
4646
# [dwca-reader-clj "0.10.1-SNAPSHOT"]
4747
# return 'index-2014-02-11a'
4848
# DwC2015-style index with traits and days of year
49-
return 'index-2014-02-11'
49+
# return 'index-2014-02-11'
50+
# DwC2015-style index with traits and Atom fields for booleans and some others
51+
return 'index-2013-08-08'
52+
# DwC2015-style small-size test index with lengthtype in addition to traits and
53+
# Atom fields
54+
#return 'index-2014-02-06t2'
5055

5156
engineauth = {
5257
# Login uri. The user will be returned here if an error occurs.

portal-web.yaml

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# This file is part of VertNet: https://github.com/VertNet/webapp
2+
#
3+
# VertNet is free software: you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License as published by
5+
# the Free Software Foundation, either version 3 of the License, or
6+
# (at your option) any later version.
7+
#
8+
# VertNet is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
# GNU General Public License for more details.
12+
#
13+
# You should have received a copy of the GNU General Public License
14+
# along with Foobar. If not, see: http://www.gnu.org/licenses
15+
16+
# This is the App Engine configuration file for local development.
17+
# Note that static assets are sources from www/ which contains full
18+
# sourcecode without minification.
19+
20+
# CODEBASE INFO
21+
# Repository: VertNet/webapp
22+
# Pushed to branch feature/laura-trait-ui 2016-09-14T23:30_02:00
23+
# Last pushed to master branch:
24+
#
25+
# DEPLOYMENT INFO
26+
# Last deployed tuco version: 2016-09-15T20:51+02:00 index-2013-08-08
27+
# Last deployed pagodarose version: 2016-09-15T20:51+02:00 index-2013-08-08
28+
# Last deployed prod version: 2016-09-15T20:51+02:00 index-2013-08-08 (from VertNet/webapp repo)
29+
# command: gcloud app deploy portal-web.yaml
30+
# args:
31+
# --project vertnet-portal # Optional, no need if gcloud is configured to use this project
32+
# --version prod|dev # Indicates version to use. If 'prod', --promote flag should be used
33+
# --promote # Optional, removes the version id from the deployment URL. Skip for testing and/or dev version
34+
35+
# This is the App Engine configuration file for using gcloud.
36+
# To deploy for production at http://portal-web.vertnet-portal.appspot.com:
37+
# gcloud app deploy portal-web.yaml --version prod --promote
38+
#
39+
# Or to deploy for testing at http://tuco.portal-web.vertnet-portal.appspot.com
40+
# gcloud app deploy portal-web.yaml --version tuco --no-promote
41+
42+
# Or to deploy for testing at http://pagodarose.vertnet-portal.appspot.com
43+
# gcloud app deploy portal-web.yaml --version pagodarose --no-promote
44+
45+
service: default
46+
runtime: python27
47+
api_version: 1
48+
threadsafe: true
49+
50+
libraries:
51+
- name: jinja2
52+
version: "latest"
53+
# - name: webapp2
54+
# version: "2.5.1"
55+
56+
handlers:
57+
58+
# Static assets
59+
60+
- url: /js/lib/*.
61+
static_dir: www/lib
62+
63+
- url: /js
64+
static_dir: www/js
65+
66+
- url: /css
67+
static_dir: www/css
68+
69+
- url: /fonts
70+
static_dir: www/fonts
71+
72+
- url: /img
73+
static_dir: www/img
74+
75+
# APIs
76+
77+
# - url: /api/search.*
78+
# script: vertnet.service.api.handlers
79+
80+
# - url: /api/download.*
81+
# script: vertnet.service.api.handlers
82+
83+
- url: /api/user/get
84+
script: vertnet.service.user.handler
85+
86+
- url: /api/github.*
87+
script: vertnet.service.github.handler
88+
89+
- url: /service/download.*
90+
script: vertnet.service.download.api
91+
92+
# - url: /apitracker.*
93+
# script: vertnet.service.tracker.api
94+
95+
- url: /service/rpc/record.*
96+
script: vertnet.service.record.rpc
97+
98+
- url: /service/organization.*
99+
script: vertnet.service.organization.rpc
100+
101+
- url: /api/user.*
102+
script: vertnet.service.user.handler
103+
104+
- url: /api/organization.*
105+
script: vertnet.api.organization.rpc
106+
107+
# - url: /api/stats.*
108+
# script: vertnet.api.stats.rpc
109+
110+
- url: /service/stats.*
111+
script: vertnet.service.stats.main
112+
113+
# - url: /service/repochecker.*
114+
# script: vertnet.service.repochecker.main
115+
116+
# CRON tasks
117+
118+
# - url: /tasks/daily_portal_stats.*
119+
# script: vertnet.service.tasks.daily_portal_stats.main
120+
121+
# Main handler
122+
123+
- url: /.*
124+
script: app.handler

queue.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ queue:
7575
- name: usagestatsqueue
7676
rate: 35/s
7777
# index-clean is dangerous - turn it on only if you really need to
78-
#- name: index-clean
79-
# rate: 35/s
80-
# retry_parameters:
81-
# task_retry_limit: 7
82-
# task_age_limit: 60m
83-
# min_backoff_seconds: 30
84-
# max_backoff_seconds: 960
85-
# max_doublings: 7
78+
- name: index-clean
79+
rate: 35/s
80+
retry_parameters:
81+
task_retry_limit: 7
82+
task_age_limit: 60m
83+
min_backoff_seconds: 30
84+
max_backoff_seconds: 960
85+
max_doublings: 7

templates/base.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
<html lang="en">
2323
<head>
2424
{% block head %}
25-
<title>VertNet</title>
25+
<title>VertNet Search Portal</title>
2626
<meta charset="utf-8">
2727
<meta http-equiv="Accept-Encoding" content="gzip, deflate"/>
2828
<meta name="viewport" content="width=device-width, initial-scale=1.0">

vertnet/service/download.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,21 @@
1-
"""Download service."""
1+
#!/usr/bin/env python
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
__author__ = "John Wieczorek"
16+
__contributors__ = "Aaron Steele, John Wieczorek"
17+
__copyright__ = "Copyright 2016 vertnet.org"
18+
__version__ = "download.py 2016-09-14T11:47+02:00"
219

320
# Removing dependency on Files API due to its deprecation by Google
421
import cloudstorage as gcs
@@ -10,22 +27,23 @@
1027
from google.appengine.api import taskqueue
1128
from google.appengine.api import search
1229
from vertnet.service import util as vnutil
30+
from vertnet.service.util import UTIL_VERSION
1331
from vertnet.service import search as vnsearch
1432
import webapp2
1533
import json
1634
import logging
1735
import uuid
1836
import sys
37+
import gc
1938

20-
DOWNLOAD_VERSION='download.py 2015-09-03T10:52:50+02:00'
21-
39+
DOWNLOAD_VERSION=__version__
2240
SEARCH_CHUNK_SIZE=1000 # limit on documents in a search result: rows per file
2341
OPTIMUM_CHUNK_SIZE=500 # See api_cnt_performance_analysis.pdf at https://goo.gl/xbLIGz
2442
COMPOSE_FILE_LIMIT=32 # limit on the number of files in a single compose request
2543
COMPOSE_OBJECT_LIMIT=1024 # limit on the number of files in a composition
2644
TEMP_BUCKET='vn-dltest' # bucket for temp compositions
2745
DOWNLOAD_BUCKET='vn-downloads2' # production bucket for downloads
28-
FILE_EXTENSION='tsv'
46+
FILE_EXTENSION='txt'
2947

3048
def _tsv(json):
3149
# These should be the names of the original fields in the index document.
@@ -40,6 +58,9 @@ def _tsv(json):
4058
values.append(unicode(json[x]).rstrip())
4159
else:
4260
values.append(u'')
61+
# logging.debug('%s: JSON: %s' % (DOWNLOAD_VERSION, json))
62+
# logging.debug('%s: DOWNLOAD_FIELDS: %s' % (UTIL_VERSION, download_fields))
63+
# logging.debug('%s: VALUES: %s' % (DOWNLOAD_VERSION, values))
4364
return u'\t'.join(values).encode('utf-8')
4465

4566
def _get_tsv_chunk(records):
@@ -92,6 +113,8 @@ def _queue(self, q, email, name, latlon, fromapi, source, countonly):
92113
latlon=latlon, fileindex=0, reccount=0, requesttime=requesttime,
93114
source=source, fromapi=fromapi)
94115

116+
# Attempt to keep memory usage at a minimum
117+
gc.collect()
95118
if countonly is not None and len(countonly)>0:
96119
taskqueue.add(url='/service/download/count', params=params,
97120
queue_name="count")
@@ -159,6 +182,7 @@ def get(self):
159182
self.response.headers['Content-Disposition'] = "attachment; filename=%s" \
160183
% filename
161184
records, cursor, count, query_version = vnsearch.query(q, count)
185+
# logging.debug('%s: RECORDS: %s' % (DOWNLOAD_VERSION, records))
162186

163187
# Build dictionary for search counts
164188
res_counts = vnutil.search_resource_counts(records)
@@ -210,6 +234,8 @@ def post(self):
210234
cursor = self.request.get('cursor')
211235
email = self.request.get('email')
212236

237+
# Attempt to keep memory usage at a minimum
238+
gc.collect()
213239
if cursor:
214240
curs = search.Cursor(web_safe_string=cursor)
215241
else:
@@ -279,6 +305,8 @@ def post(self):
279305
else:
280306
curs = None
281307

308+
# Attempt to keep memory usage at a minimum
309+
gc.collect()
282310
# Write single chunk to file, GCS does not support append
283311
records, next_cursor, count, query_version = \
284312
vnsearch.query(q, SEARCH_CHUNK_SIZE, curs=curs)
@@ -317,9 +345,9 @@ def post(self):
317345
f.write('%s\n' % vnutil.download_header())
318346
f.write(chunk)
319347
success = True
320-
logging.info('Download chunk saved to %s: Total %s records. Has next \
321-
cursor: %s \nVersion: %s'
322-
% (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION))
348+
# logging.info('Download chunk saved to %s: Total %s records. Has next \
349+
#cursor: %s \nVersion: %s'
350+
# % (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION))
323351
except Exception, e:
324352
logging.error("Error writing chunk to FILE: %s for\nQUERY: %s \
325353
Error: %s\nVersion: %s" % (filename, q, e, DOWNLOAD_VERSION) )
@@ -404,6 +432,8 @@ def post(self):
404432
total_files_to_compose = int(self.request.get('fileindex'))+1
405433
compositions=total_files_to_compose
406434

435+
# Attempt to keep memory usage at a minimum
436+
gc.collect()
407437
# Get the application default credentials.
408438
credentials = GoogleCredentials.get_application_default()
409439

@@ -499,8 +529,9 @@ def post(self):
499529
try:
500530
gcs.copy2(src, dest)
501531
except Exception, e:
502-
logging.error("Error copying %s to %s \nError: %s\
503-
Version: %s" % (src, dest, e, DOWNLOAD_VERSION) )
532+
s = 'Error copying %s to %s\n' % (src, dest)
533+
s += 'Error: %s Version: %s' % (e, DOWNLOAD_VERSION)
534+
logging.error()
504535

505536
# Change the ACL so that the download file is publicly readable.
506537
mbody=acl_update_request()
@@ -519,7 +550,7 @@ def post(self):
519550
mail.send_mail(sender="VertNet Downloads <vertnetinfo@vertnet.org>",
520551
to=email, subject="Your truncated VertNet download is ready!",
521552
body="""
522-
Your VertNet download file is now available for a limited time at
553+
Your VertNet download file is now available for a limited time (roughly 60 days) at
523554
https://storage.googleapis.com/%s/%s.\n
524555
The results in this file are not complete based on your query\n
525556
%s\n
@@ -537,7 +568,7 @@ def post(self):
537568
mail.send_mail(sender="VertNet Downloads <vertnetinfo@vertnet.org>",
538569
to=email, subject="Your VertNet download is ready!",
539570
body="""
540-
Your VertNet download file is now available for a limited time at
571+
Your VertNet download file is now available for a limited time (roughly 60 days) at
541572
https://storage.googleapis.com/%s/%s.\n
542573
Query: %s\nMatching records: %s\nRequest submitted: %s\nRequest fulfilled: %s"""
543574
% (DOWNLOAD_BUCKET, composed_filename, q, reccount, requesttime,
@@ -559,6 +590,8 @@ def post(self):
559590
composed_filename='%s.%s' % (filepattern,FILE_EXTENSION)
560591
total_files_to_compose = int(self.request.get('fileindex'))
561592

593+
# Attempt to keep memory usage at a minimum
594+
gc.collect()
562595
# Get the application default credentials.
563596
credentials = GoogleCredentials.get_application_default()
564597

0 commit comments

Comments
 (0)