[MediaWiki-commits] [Gerrit] pywikibot/core[master]: Add retry logic to timeouted requests
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/371697 ) Change subject: Add retry logic to timeouted requests .. Add retry logic to timeouted requests Reuses the same basic logic as api.py. The retry logic is needed for sparql queries which time out (http-wise) but actually terminate successfully so are cached the next time you make the same query. Change-Id: I2e4feff5338eef3c669ec4f0e5bef8412f12bbfb --- M pywikibot/data/api.py M pywikibot/data/sparql.py M pywikibot/exceptions.py 3 files changed, 57 insertions(+), 18 deletions(-) Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index 94be83c..6dc86d6 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -35,8 +35,7 @@ from pywikibot.comms import http from pywikibot.exceptions import ( Server504Error, Server414Error, FatalServerError, NoUsername, -Error, -InvalidTitle +Error, TimeoutError, InvalidTitle ) from pywikibot.tools import ( MediaWikiVersion, deprecated, itergroup, ip, PY2, getargspec, @@ -1241,11 +1240,6 @@ def __len__(self): """Return the number of enabled and disabled options.""" return len(self._enabled) + len(self._disabled) - - -class TimeoutError(Error): - -"""API request failed with a timeout error.""" class EnableSSLSiteWrapper(object): diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py index b577bb0..392f8a8 100644 --- a/pywikibot/data/sparql.py +++ b/pywikibot/data/sparql.py @@ -9,14 +9,18 @@ import json import sys +import time if sys.version_info[0] > 2: from urllib.parse import quote else: from urllib2 import quote -from pywikibot import Site, Error +from requests.exceptions import Timeout + +from pywikibot import config, warning, Site from pywikibot.comms import http from pywikibot.tools import UnicodeMixin, py2_encode_utf_8 +from pywikibot.exceptions import Error, TimeoutError DEFAULT_HEADERS = {'cache-control': 'no-cache', 'Accept': 'application/sparql-results+json'} @@ -29,7 +33,8 @@ This class allows to run SPARQL queries against any SPARQL endpoint. """ -def __init__(self, endpoint=None, entity_url=None, repo=None): +def __init__(self, endpoint=None, entity_url=None, repo=None, + max_retries=None, retry_wait=None): """ Create endpoint. @@ -38,9 +43,16 @@ @param entity_url: URL prefix for any entities returned in a query. @type entity_url: string @param repo: The Wikibase site which we want to run queries on. If - provided this overrides any value in endpoint and entity_url. - Defaults to Wikidata. +provided this overrides any value in endpoint and entity_url. +Defaults to Wikidata. @type repo: pywikibot.site.DataSite +@param max_retries: (optional) Maximum number of times to retry after + errors, defaults to config.max_retries. +@type max_retries: int +@param retry_wait: (optional) Minimum time in seconds to wait after an + error, defaults to config.retry_wait seconds (doubles each retry + until max of 120 seconds is reached). +@type retry_wait: float """ # default to Wikidata if not repo and not endpoint: @@ -67,6 +79,15 @@ self.entity_url = entity_url self.last_response = None + +if max_retries is None: +self.max_retries = config.max_retries +else: +self.max_retries = max_retries +if retry_wait is None: +self.retry_wait = config.retry_wait +else: +self.retry_wait = retry_wait def get_last_response(self): """ @@ -120,13 +141,28 @@ @type query: string """ url = '%s?query=%s' % (self.endpoint, quote(query)) -self.last_response = http.fetch(url, headers=headers) -if not self.last_response.content: -return None -try: -return json.loads(self.last_response.content) -except ValueError: -return None +while True: +try: +self.last_response = http.fetch(url, headers=headers) +if not self.last_response.content: +return None +try: +return json.loads(self.last_response.content) +except ValueError: +return None +except Timeout: +self.wait() +continue + +def wait(self): +"""Determine how long to wait after a failed request.""" +self.max_retries -= 1 +if self.max_retries < 0: +raise TimeoutError('Maximum retries attempted without
[MediaWiki-commits] [Gerrit] pywikibot/core[master]: Add retry logic to timeouted requests
Lokal Profil has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/371697 ) Change subject: Add retry logic to timeouted requests .. Add retry logic to timeouted requests Reuses the same basic logic as api.py. The retry logic is needed for sparql queries which time out (http-wise) but actually terminate successfully so are cached the next time you make the same query. Change-Id: I2e4feff5338eef3c669ec4f0e5bef8412f12bbfb --- M pywikibot/data/api.py M pywikibot/data/sparql.py M pywikibot/exceptions.py 3 files changed, 55 insertions(+), 16 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/97/371697/1 diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index f905bc6..45975e1 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -35,8 +35,7 @@ from pywikibot.comms import http from pywikibot.exceptions import ( Server504Error, Server414Error, FatalServerError, NoUsername, -Error, -InvalidTitle +Error, TimeoutError, InvalidTitle ) from pywikibot.tools import ( MediaWikiVersion, deprecated, itergroup, ip, PY2, getargspec, @@ -1241,11 +1240,6 @@ def __len__(self): """Return the number of enabled and disabled options.""" return len(self._enabled) + len(self._disabled) - - -class TimeoutError(Error): - -"""API request failed with a timeout error.""" class EnableSSLSiteWrapper(object): diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py index b577bb0..b02d038 100644 --- a/pywikibot/data/sparql.py +++ b/pywikibot/data/sparql.py @@ -9,14 +9,18 @@ import json import sys +import time if sys.version_info[0] > 2: from urllib.parse import quote else: from urllib2 import quote -from pywikibot import Site, Error +from requests.exceptions import Timeout + +from pywikibot import config, warning, Site from pywikibot.comms import http from pywikibot.tools import UnicodeMixin, py2_encode_utf_8 +from pywikibot.exceptions import Error, TimeoutError DEFAULT_HEADERS = {'cache-control': 'no-cache', 'Accept': 'application/sparql-results+json'} @@ -29,7 +33,8 @@ This class allows to run SPARQL queries against any SPARQL endpoint. """ -def __init__(self, endpoint=None, entity_url=None, repo=None): +def __init__(self, endpoint=None, entity_url=None, repo=None, + max_retries=None, retry_wait=None): """ Create endpoint. @@ -41,6 +46,13 @@ provided this overrides any value in endpoint and entity_url. Defaults to Wikidata. @type repo: pywikibot.site.DataSite +@param max_retries: (optional) Maximum number of times to retry after + errors, defaults to 25 +@type max_retries: int +@param retry_wait: (optional) Minimum time to wait after an error, + defaults to 5 seconds (doubles each retry until max of 120 is + reached) +@type retry_wait: float """ # default to Wikidata if not repo and not endpoint: @@ -67,6 +79,15 @@ self.entity_url = entity_url self.last_response = None + +if max_retries is None: +self.max_retries = config.max_retries +else: +self.max_retries = max_retries +if retry_wait is None: +self.retry_wait = config.retry_wait +else: +self.retry_wait = retry_wait def get_last_response(self): """ @@ -120,13 +141,28 @@ @type query: string """ url = '%s?query=%s' % (self.endpoint, quote(query)) -self.last_response = http.fetch(url, headers=headers) -if not self.last_response.content: -return None -try: -return json.loads(self.last_response.content) -except ValueError: -return None +while True: +try: +self.last_response = http.fetch(url, headers=headers) +if not self.last_response.content: +return None +try: +return json.loads(self.last_response.content) +except ValueError: +return None +except Timeout: +self.wait() +continue + +def wait(self): +"""Determine how long to wait after a failed request.""" +self.max_retries -= 1 +if self.max_retries < 0: +raise TimeoutError('Maximum retries attempted without success.') +warning('Waiting {0} seconds before retrying.'.format(self.retry_wait)) +time.sleep(self.retry_wait) +# double the next wait, but do not exceed 120 seconds +self.retry_wait = min(120, self.retry_wait * 2) def ask(self, query, headers=DEFAULT_HEADERS): """ diff --git