scholarly-python-package
diff --git a/‎.github/workflows/pythonpackage.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/pythonpackage.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/quickstart.rst‎
Lines changed: 72 additions & 28 deletions b/‎docs/quickstart.rst‎
Lines changed: 72 additions & 28 deletions
diff --git a/‎scholarly/_navigator.py‎
Lines changed: 8 additions & 7 deletions b/‎scholarly/_navigator.py‎
Lines changed: 8 additions & 7 deletions
@@ -47,5 +47,6 @@ jobs:
           PASSWORD: ${{ secrets.PASSWORD }}
           PORT: ${{ secrets.PORT }}
           USERNAME: ${{ secrets.USERNAME }}
+          SCRAPER_API_KEY: ${{ secrets.SCRAPER_API_KEY }}
         run: |
           python3 -m unittest -v test_module.py
@@ -123,7 +123,7 @@ Search by keyword and return a generator of Author objects.
      'source': 'SEARCH_AUTHOR_SNIPPETS',
      'url_picture': 'https://scholar.google.com/citations?view_op=medium_photo&user=lHrs3Y4AAAAJ'}
 
-``search_pubs`` 
+``search_pubs``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Search for articles/publications and return generator of Publication objects.
 #############################################################################
@@ -363,7 +363,8 @@ Using proxies
 -------------
 
 In general, Google Scholar does not like bots, and can often block
-scholarly. We are actively working towards making scholarly more robust
+scholarly, especially those pages that contain ``scholar?`` in the URL.
+We are actively working towards making scholarly more robust
 towards that front.
 
 The most common solution for avoiding network issues is to use proxies
@@ -390,12 +391,18 @@ come from the ProxyGenerator class:
 -  Tor\_Internal()
 -  Tor\_External()
 -  Luminati()
+-  ScraperAPI()
 -  FreeProxies()
--  SingleProxy() Example:
+-  SingleProxy()
+
+All of these methods return ``True`` if the proxy was setup successfully which
+you can check before beginning to use it with the ``use_proxy`` method.
+
+Example:
 
 .. code:: python
 
-    pg.SingleProxy(http = <your http proxy>, https = <your https proxy>)
+    success = pg.SingleProxy(http = <your http proxy>, https = <your https proxy>)
 
 Finally set scholarly to use this proxy for your actions
 
@@ -438,7 +445,7 @@ default password, but you may want to change it for your installation.)
     from scholarly import scholarly, ProxyGenerator
 
     pg = ProxyGenerator()
-    pg.Tor_External(tor_sock_port=9050, tor_control_port=9051, tor_password="scholarly_password")
+    success = pg.Tor_External(tor_sock_port=9050, tor_control_port=9051, tor_password="scholarly_password")
     scholarly.use_proxy(pg)
 
     author = next(scholarly.search_author('Steven A Cholewiak'))
@@ -458,26 +465,7 @@ executable in your system.
     from scholarly import scholarly, ProxyGenerator
 
     pg = ProxyGenerator()
-    pg.Tor_Internal(tor_cmd = "tor")
-    scholarly.use_proxy(pg)
-
-    author = next(scholarly.search_author('Steven A Cholewiak'))
-    scholarly.pprint(author)
-
-``FreeProxies``
-^^^^^^^^^^^^^^^^^^^^
-pg.FreeProxies()
-################
-
-This uses the ``free-proxy`` pip library to add a proxy to your
-configuration.
-
-.. code:: python
-
-    from scholarly import scholarly, ProxyGenerator
-
-    pg = ProxyGenerator()
-    pg.FreeProxies()
+    success = pg.Tor_Internal(tor_cmd = "tor")
     scholarly.use_proxy(pg)
 
     author = next(scholarly.search_author('Steven A Cholewiak'))
@@ -502,7 +490,7 @@ You can use your own configuration
 
 .. code:: python
 
-    pg.Luminati(usr= "your_username",passwd ="your_password", port = "your_port" )
+    success = pg.Luminati(usr= "your_username",passwd ="your_password", port = "your_port" )
 
 Or alternatively you can use the environment variables set in your .env
 file
@@ -519,6 +507,61 @@ file
     author = next(scholarly.search_author('Steven A Cholewiak'))
     scholarly.pprint(author)
 
+``ScraperAPI``
+^^^^^^^^^^^^^^
+pg.ScraperAPI()
+###############
+
+.. code:: python
+
+    from scholarly import scholarly, ProxyGenerator
+
+    pg = ProxyGenerator()
+
+You will have to provide your ScraperAPI key
+
+.. code:: python
+
+    success = pg.ScraperAPI(YOUR_SCRAPER_API_KEY)
+
+Or alternatively you can use the environment variables as in the case of Luminati example.
+
+If you have Startup or higher paid plans, you can use additional options that are allowed for your plan.
+
+.. code:: python
+
+    success = pg.ScraperAPI(YOUR_SCRAPER_API_KEY, country_code='fr', premium=True, render=True)
+
+See https://www.scraperapi.com/pricing/ to see which options are enable for your plan.
+
+Finally, you can route your query through the ScraperAPI proxy
+
+.. code:: python
+
+    scholarly.use_proxy(pg)
+
+    author = next(scholarly.search_author('Steven A Cholewiak'))
+    scholarly.pprint(author)
+
+``FreeProxies``
+^^^^^^^^^^^^^^^^^^^^
+pg.FreeProxies()
+################
+
+This uses the ``free-proxy`` pip library to add a proxy to your
+configuration.
+
+.. code:: python
+
+    from scholarly import scholarly, ProxyGenerator
+
+    pg = ProxyGenerator()
+    success = pg.FreeProxies()
+    scholarly.use_proxy(pg)
+
+    author = next(scholarly.search_author('Steven A Cholewiak'))
+    scholarly.pprint(author)
+
 ``SingleProxy``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 pg.SingleProxy(http: str, https:str)
@@ -531,7 +574,7 @@ If you want to use a proxy of your choice, feel free to use this option.
     from scholarly import scholarly, ProxyGenerator
 
     pg = ProxyGenerator()
-    pg.SingleProxy(http = <your http proxy>, https = <your https proxy>)
+    success = pg.SingleProxy(http = <your http proxy>, https = <your https proxy>)
     scholarly.use_proxy(pg)
 
     author = next(scholarly.search_author('Steven A Cholewiak'))
@@ -556,7 +599,8 @@ the working directory of the ``test_module.py`` as:
 
 Define the connection method for the Tests, among these options:
 
--  luminati (if you have a luminati proxy service)
+-  luminati (if you have a Luminati proxy service)
+-  scraperapi (if you have a ScraperAPI proxy service)
 -  freeproxy
 -  tor
 -  tor\_internal
 
@@ -35,7 +35,7 @@ class DOSException(Exception):
     """DOS attack was detected."""
 
 class MaxTriesExceededException(Exception):
-    pass
+    """Maximum number of tries by scholarly reached"""
 
 class Singleton(type):
     _instances = {}
@@ -82,27 +82,28 @@ def _new_session(self):
         self.got_403 = False
         self._session = self.pm._new_session()
 
-    
+
     def _get_page(self, pagerequest: str) -> str:
         """Return the data from a webpage
 
         :param pagerequest: the page url
         :type pagerequest: str
         :returns: the text from a webpage
         :rtype: {str}
-        :raises: Exception
+        :raises: MaxTriesExceededException, DOSException
         """
         self.logger.info("Getting %s", pagerequest)
         resp = None
         tries = 0
+        if self.pm._use_scraperapi:
+            self.set_timeout(60)
         timeout=self._TIMEOUT
         while tries < self._max_retries:
             try:
                 w = random.uniform(1,2)
                 time.sleep(w)
-                
                 resp = self._session.get(pagerequest, timeout=timeout)
-                self.logger.info("Session proxy config is {}".format(self._session.proxies))
+                self.logger.debug("Session proxy config is {}".format(self._session.proxies))
 
                 has_captcha = self._requests_has_captcha(resp.text)
 
@@ -125,7 +126,7 @@ def _get_page(self, pagerequest: str) -> str:
                                 time.sleep(w)
                         self._new_session()
                         self.got_403 = True
-                        
+
                         continue # Retry request within same session
                     else:
                         self.logger.info("We can use another connection... let's try that.")
@@ -215,7 +216,7 @@ def _get_soup(self, url: str) -> BeautifulSoup:
     def search_authors(self, url: str)->Author:
         """Generator that returns Author objects from the author search page"""
         soup = self._get_soup(url)
-         
+
         author_parser = AuthorParser(self)
         while True:
             rows = soup.find_all('div', 'gsc_1usr')