Skip to content

Commit 226e772

Browse files
Merge pull request #237 from scholarly-python-package/develop
Develop
2 parents e645619 + 1bd48e0 commit 226e772

File tree

6 files changed

+20
-4
lines changed

6 files changed

+20
-4
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ print([citation['bib']['title'] for citation in scholarly.citedby(pub)])
8585
>>> author = scholarly.search_author_id('Smr99uEAAAAJ')
8686
>>> scholarly.pprint(author)
8787
{'affiliation': 'Professor of Vision Science, UC Berkeley',
88+
'email_domain': '@berkeley.edu',
8889
'filled': False,
8990
'interests': ['vision science', 'psychology', 'human factors', 'neuroscience'],
9091
'name': 'Martin Banks',

scholarly/author_parser.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,16 @@ def _fill_basics(self, soup, author):
7676
author['affiliation'] = soup.find('div', class_='gsc_prf_il').text
7777
author['interests'] = [i.text.strip() for i in
7878
soup.find_all('a', class_='gsc_prf_inta')]
79-
79+
if author['source'] == AuthorSource.AUTHOR_PROFILE_PAGE:
80+
email = soup.find('div', id="gsc_prf_ivh", class_="gsc_prf_il")
81+
if email.text != "No verified email":
82+
author['email_domain'] = '@'+email.text.split(" ")[3]
83+
if author['source'] == AuthorSource.CO_AUTHORS_LIST:
84+
picture = soup.find('img', id="gsc_prf_pup-img").get('src')
85+
if "avatar_scholar" in picture:
86+
picture = _HOST.format(picture)
87+
author['url_picture'] = picture
88+
8089
def _fill_indices(self, soup, author):
8190
index = soup.find_all('td', class_='gsc_rsb_std')
8291
if index:

scholarly/data_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ class Author(TypedDict, total=False):
182182
:param scholar_id: The id of the author on Google Scholar
183183
:param name: The name of the author
184184
:param affiliation: The affiliation of the author
185-
:param email_domain: The email domain of the author (source: SEARCH_AUTHOR_SNIPPETS)
185+
:param email_domain: The email domain of the author (source: SEARCH_AUTHOR_SNIPPETS, AUTHOR_PROFILE_PAGE)
186186
:param url_picture: The URL for the picture of the author
187187
:param citedby: The number of citations to all publications. (source: SEARCH_AUTHOR_SNIPPETS)
188188
:param filled: The set of sections filled out of the total set of sections that can be filled

scholarly/publication_parser.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,12 @@ def fill(self, publication: Publication)->Publication:
309309
# number of citation per year
310310
years = [int(y.text) for y in soup.find_all(class_='gsc_vcd_g_t')]
311311
cites = [int(c.text) for c in soup.find_all(class_='gsc_vcd_g_al')]
312-
publication['cites_per_year'] = dict(zip(years, cites))
312+
cites_year = [int(c.get('href')[-4:]) for c in soup.find_all(class_='gsc_vcd_g_a')]
313+
nonzero_cites_per_year = dict(zip(cites_year, cites))
314+
res_dict = {}
315+
for year in years:
316+
res_dict[year] = (nonzero_cites_per_year[year] if year in nonzero_cites_per_year else 0)
317+
publication['cites_per_year'] = res_dict
313318

314319
if soup.find('div', class_='gsc_vcd_title_ggi'):
315320
publication['eprint_url'] = soup.find(

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name='scholarly',
8-
version='1.0.2',
8+
version='1.0.3',
99
author='Steven A. Cholewiak, Panos Ipeirotis, Victor Silva',
1010
author_email='steven@cholewiak.com, panos@stern.nyu.edu, vsilva@ualberta.ca',
1111
description='Simple access to Google Scholar authors and citations',

test_module.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def setUp(self):
3939
proxy_generator.Tor_Internal(tor_cmd = tor_cmd)
4040
scholarly.use_proxy(proxy_generator)
4141
elif self.connection_method == "luminati":
42+
scholarly.set_retries(10)
4243
proxy_generator.Luminati(usr=os.getenv("USERNAME"),passwd=os.getenv("PASSWORD"),proxy_port = os.getenv("PORT"))
4344
scholarly.use_proxy(proxy_generator)
4445
elif self.connection_method == "freeproxy":

0 commit comments

Comments
 (0)