[MediaWiki-commits] [Gerrit] pywikibot/core[master]: Add retry logic to timeouted requests

2017-08-13 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/371697 )

Change subject: Add retry logic to timeouted requests
..


Add retry logic to timeouted requests

Reuses the same basic logic as api.py. The retry logic is needed
for sparql queries which time out (http-wise) but actually terminate
successfully so are cached the next time you make the same query.

Change-Id: I2e4feff5338eef3c669ec4f0e5bef8412f12bbfb
---
M pywikibot/data/api.py
M pywikibot/data/sparql.py
M pywikibot/exceptions.py
3 files changed, 57 insertions(+), 18 deletions(-)

Approvals:
  jenkins-bot: Verified
  Xqt: Looks good to me, approved



diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 94be83c..6dc86d6 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -35,8 +35,7 @@
 from pywikibot.comms import http
 from pywikibot.exceptions import (
 Server504Error, Server414Error, FatalServerError, NoUsername,
-Error,
-InvalidTitle
+Error, TimeoutError, InvalidTitle
 )
 from pywikibot.tools import (
 MediaWikiVersion, deprecated, itergroup, ip, PY2, getargspec,
@@ -1241,11 +1240,6 @@
 def __len__(self):
 """Return the number of enabled and disabled options."""
 return len(self._enabled) + len(self._disabled)
-
-
-class TimeoutError(Error):
-
-"""API request failed with a timeout error."""
 
 
 class EnableSSLSiteWrapper(object):
diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py
index b577bb0..392f8a8 100644
--- a/pywikibot/data/sparql.py
+++ b/pywikibot/data/sparql.py
@@ -9,14 +9,18 @@
 
 import json
 import sys
+import time
 if sys.version_info[0] > 2:
 from urllib.parse import quote
 else:
 from urllib2 import quote
 
-from pywikibot import Site, Error
+from requests.exceptions import Timeout
+
+from pywikibot import config, warning, Site
 from pywikibot.comms import http
 from pywikibot.tools import UnicodeMixin, py2_encode_utf_8
+from pywikibot.exceptions import Error, TimeoutError
 
 DEFAULT_HEADERS = {'cache-control': 'no-cache',
'Accept': 'application/sparql-results+json'}
@@ -29,7 +33,8 @@
 This class allows to run SPARQL queries against any SPARQL endpoint.
 """
 
-def __init__(self, endpoint=None, entity_url=None, repo=None):
+def __init__(self, endpoint=None, entity_url=None, repo=None,
+ max_retries=None, retry_wait=None):
 """
 Create endpoint.
 
@@ -38,9 +43,16 @@
 @param entity_url: URL prefix for any entities returned in a query.
 @type entity_url: string
 @param repo: The Wikibase site which we want to run queries on. If
- provided this overrides any value in endpoint and 
entity_url.
- Defaults to Wikidata.
+provided this overrides any value in endpoint and entity_url.
+Defaults to Wikidata.
 @type repo: pywikibot.site.DataSite
+@param max_retries: (optional) Maximum number of times to retry after
+   errors, defaults to config.max_retries.
+@type max_retries: int
+@param retry_wait: (optional) Minimum time in seconds to wait after an
+   error, defaults to config.retry_wait seconds (doubles each retry
+   until max of 120 seconds is reached).
+@type retry_wait: float
 """
 # default to Wikidata
 if not repo and not endpoint:
@@ -67,6 +79,15 @@
 self.entity_url = entity_url
 
 self.last_response = None
+
+if max_retries is None:
+self.max_retries = config.max_retries
+else:
+self.max_retries = max_retries
+if retry_wait is None:
+self.retry_wait = config.retry_wait
+else:
+self.retry_wait = retry_wait
 
 def get_last_response(self):
 """
@@ -120,13 +141,28 @@
 @type query: string
 """
 url = '%s?query=%s' % (self.endpoint, quote(query))
-self.last_response = http.fetch(url, headers=headers)
-if not self.last_response.content:
-return None
-try:
-return json.loads(self.last_response.content)
-except ValueError:
-return None
+while True:
+try:
+self.last_response = http.fetch(url, headers=headers)
+if not self.last_response.content:
+return None
+try:
+return json.loads(self.last_response.content)
+except ValueError:
+return None
+except Timeout:
+self.wait()
+continue
+
+def wait(self):
+"""Determine how long to wait after a failed request."""
+self.max_retries -= 1
+if self.max_retries < 0:
+raise TimeoutError('Maximum retries attempted without 

[MediaWiki-commits] [Gerrit] pywikibot/core[master]: Add retry logic to timeouted requests

2017-08-12 Thread Lokal Profil (Code Review)
Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/371697 )

Change subject: Add retry logic to timeouted requests
..

Add retry logic to timeouted requests

Reuses the same basic logic as api.py. The retry logic is needed
for sparql queries which time out (http-wise) but actually terminate
successfully so are cached the next time you make the same query.

Change-Id: I2e4feff5338eef3c669ec4f0e5bef8412f12bbfb
---
M pywikibot/data/api.py
M pywikibot/data/sparql.py
M pywikibot/exceptions.py
3 files changed, 55 insertions(+), 16 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/97/371697/1

diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index f905bc6..45975e1 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -35,8 +35,7 @@
 from pywikibot.comms import http
 from pywikibot.exceptions import (
 Server504Error, Server414Error, FatalServerError, NoUsername,
-Error,
-InvalidTitle
+Error, TimeoutError, InvalidTitle
 )
 from pywikibot.tools import (
 MediaWikiVersion, deprecated, itergroup, ip, PY2, getargspec,
@@ -1241,11 +1240,6 @@
 def __len__(self):
 """Return the number of enabled and disabled options."""
 return len(self._enabled) + len(self._disabled)
-
-
-class TimeoutError(Error):
-
-"""API request failed with a timeout error."""
 
 
 class EnableSSLSiteWrapper(object):
diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py
index b577bb0..b02d038 100644
--- a/pywikibot/data/sparql.py
+++ b/pywikibot/data/sparql.py
@@ -9,14 +9,18 @@
 
 import json
 import sys
+import time
 if sys.version_info[0] > 2:
 from urllib.parse import quote
 else:
 from urllib2 import quote
 
-from pywikibot import Site, Error
+from requests.exceptions import Timeout
+
+from pywikibot import config, warning, Site
 from pywikibot.comms import http
 from pywikibot.tools import UnicodeMixin, py2_encode_utf_8
+from pywikibot.exceptions import Error, TimeoutError
 
 DEFAULT_HEADERS = {'cache-control': 'no-cache',
'Accept': 'application/sparql-results+json'}
@@ -29,7 +33,8 @@
 This class allows to run SPARQL queries against any SPARQL endpoint.
 """
 
-def __init__(self, endpoint=None, entity_url=None, repo=None):
+def __init__(self, endpoint=None, entity_url=None, repo=None,
+ max_retries=None, retry_wait=None):
 """
 Create endpoint.
 
@@ -41,6 +46,13 @@
  provided this overrides any value in endpoint and 
entity_url.
  Defaults to Wikidata.
 @type repo: pywikibot.site.DataSite
+@param max_retries: (optional) Maximum number of times to retry after
+   errors, defaults to 25
+@type max_retries: int
+@param retry_wait: (optional) Minimum time to wait after an error,
+   defaults to 5 seconds (doubles each retry until max of 120 is
+   reached)
+@type retry_wait: float
 """
 # default to Wikidata
 if not repo and not endpoint:
@@ -67,6 +79,15 @@
 self.entity_url = entity_url
 
 self.last_response = None
+
+if max_retries is None:
+self.max_retries = config.max_retries
+else:
+self.max_retries = max_retries
+if retry_wait is None:
+self.retry_wait = config.retry_wait
+else:
+self.retry_wait = retry_wait
 
 def get_last_response(self):
 """
@@ -120,13 +141,28 @@
 @type query: string
 """
 url = '%s?query=%s' % (self.endpoint, quote(query))
-self.last_response = http.fetch(url, headers=headers)
-if not self.last_response.content:
-return None
-try:
-return json.loads(self.last_response.content)
-except ValueError:
-return None
+while True:
+try:
+self.last_response = http.fetch(url, headers=headers)
+if not self.last_response.content:
+return None
+try:
+return json.loads(self.last_response.content)
+except ValueError:
+return None
+except Timeout:
+self.wait()
+continue
+
+def wait(self):
+"""Determine how long to wait after a failed request."""
+self.max_retries -= 1
+if self.max_retries < 0:
+raise TimeoutError('Maximum retries attempted without success.')
+warning('Waiting {0} seconds before retrying.'.format(self.retry_wait))
+time.sleep(self.retry_wait)
+# double the next wait, but do not exceed 120 seconds
+self.retry_wait = min(120, self.retry_wait * 2)
 
 def ask(self, query, headers=DEFAULT_HEADERS):
 """
diff --git