jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Support HTTP GET requests
......................................................................
[FEAT] Support HTTP GET requests
This uses a HTTP GET request if possible instead of an HTTP POST if not
one of the following conditions is applying:
- The action is one of the actions which requires a POST request (e.g.
action=upload). If the action is query, the query modules requires a
POST request.
- The config variable (maximum_GET_length) is lower than the actual
param string. The family can overwrite this maximum length. If it is 0
(it accepts also ints < 0, but those don't make sense), it'll never
use a GET request.
- If after the first request the server returns a 414 it'll switch to
the POST request mode and retry it.
- If the site is not using SSL, but could be configured.
The Request instance itself has an attribute 'use_get' which can
overwrite this behaviour. If it's True the first try will always be a
GET request (even if the action doesn't support it), if it's False it'll
never try it with a GET request and if it's None, it'll check the action
and if requires the querymodule.
The value of 'True' is defined to allow calling action=paraminfo inside
the request, which would itself call action=paraminfo if it didn't know
that this doesn't require a GET request. It also ignores 'meta'
parameters as those are all GETable and because ParamInfo requries
meta=siteinfo and meta=userinfo.
Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42
---
M pywikibot/comms/http.py
M pywikibot/config2.py
M pywikibot/data/api.py
M pywikibot/exceptions.py
M pywikibot/family.py
5 files changed, 73 insertions(+), 4 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 660e597..f3e93c8 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -66,7 +66,9 @@
from urllib2 import quote
from pywikibot import config
-from pywikibot.exceptions import FatalServerError, Server504Error
+from pywikibot.exceptions import (
+ FatalServerError, Server504Error, Server414Error
+)
from pywikibot.comms import threadedhttp
from pywikibot.tools import deprecate_arg
import pywikibot.version
@@ -260,6 +262,9 @@
if request.data[0].status == 504:
raise Server504Error("Server %s timed out" % site.hostname())
+ if request.data[0].status == 414:
+ raise Server414Error('Too long GET request')
+
# HTTP status 207 is also a success status for Webdav FINDPROP,
# used by the version module.
if request.data[0].status not in (200, 207):
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index e24ebca..0020d5d 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -89,6 +89,14 @@
# number of days to cache namespaces, api configuration, etc.
API_config_expiry = 30
+# The maximum number of bytes which uses a GET request, if not positive
+# it'll always use POST requests
+maximum_GET_length = 255
+# Some networks modify GET requests when they are not encrypted, to avoid
+# bug reports related to that disable those. If we are confident that bug
+# related to this are really because of the network this could be changed.
+enable_GET_without_SSL = False
+
# Solve captchas in the webbrowser. Setting this to False will result in the
# exception CaptchaError being thrown if a captcha is encountered.
solve_captcha = True
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 01d36d9..3eb69a1 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -26,7 +26,9 @@
import pywikibot
from pywikibot import config, login
from pywikibot.tools import MediaWikiVersion as LV, deprecated, itergroup
-from pywikibot.exceptions import Server504Error, FatalServerError, Error
+from pywikibot.exceptions import (
+ Server504Error, Server414Error, FatalServerError, Error
+)
import sys
@@ -297,6 +299,7 @@
params = {
'expiry': config.API_config_expiry,
+ 'use_get': True, # Request need ParamInfo to determine use_get
'site': self.site,
'action': 'paraminfo',
}
@@ -534,6 +537,9 @@
@kwarg retry_wait: (optional) Minimum time to wait after an error,
defaults to 5 seconds (doubles each retry until max of 120 is
reached)
+ @kwarg use_get: (optional) Use HTTP GET request if possible. If False
+ it uses a POST request. If None, it'll try to determine via
+ action=paraminfo if the action requires a POST.
@kwarg format: (optional) Defaults to "json"
"""
try:
@@ -549,6 +555,7 @@
else:
self.mime = kwargs.pop('mime', False)
self.throttle = kwargs.pop('throttle', True)
+ self.use_get = kwargs.pop('use_get', None)
self.max_retries = kwargs.pop("max_retries",
pywikibot.config.max_retries)
self.retry_wait = kwargs.pop("retry_wait", pywikibot.config.retry_wait)
self._params = {}
@@ -877,6 +884,27 @@
"""
self._add_defaults()
+ if (not config.enable_GET_without_SSL and
+ self.site.protocol() != 'https'):
+ use_get = False
+ elif self.use_get is None:
+ if self.action == 'query':
+ # for queries check the query module
+ modules = set()
+ for mod_type_name in ('list', 'prop', 'generator'):
+ modules.update(self._params.get(mod_type_name, []))
+ else:
+ modules = set([self.action])
+ if modules:
+ self.site._paraminfo.fetch(modules)
+ use_get = all(['mustbeposted' not in self.site._paraminfo[mod]
+ for mod in modules])
+ else:
+ # If modules is empty, just 'meta' was given, which doesn't
+ # require POSTs, and is required for ParamInfo
+ use_get = True
+ else:
+ use_get = self.use_get
while True:
paramstring = self._http_param_string()
simulate = self._simulate(self.action)
@@ -892,17 +920,35 @@
if self.mime:
(headers, body) = Request._build_mime_request(
self._encoded_items(), self.mime_params)
+ use_get = False # MIME requests require HTTP POST
else:
headers = {'Content-Type':
'application/x-www-form-urlencoded'}
- body = paramstring
+ if (not self.site.maximum_GET_length() or
+ self.site.maximum_GET_length() < len(paramstring)):
+ use_get = False
+ if use_get:
+ uri = '{0}?{1}'.format(uri, paramstring)
+ body = None # default in httplib2
+ else:
+ body = paramstring
rawdata = http.request(
- self.site, uri, method="POST",
+ self.site, uri, method='GET' if use_get else 'POST',
headers=headers, body=body)
except Server504Error:
pywikibot.log(u"Caught HTTP 504 error; retrying")
self.wait()
continue
+ except Server414Error:
+ if use_get:
+ pywikibot.log('Caught HTTP 414 error; retrying')
+ use_get = False
+ self.wait()
+ continue
+ else:
+ pywikibot.warning('Caught HTTP 414 error, although not '
+ 'using GET.')
+ raise
except FatalServerError:
# This error is not going to be fixed by just waiting
pywikibot.error(traceback.format_exc())
diff --git a/pywikibot/exceptions.py b/pywikibot/exceptions.py
index 298ef6c..b5013df 100644
--- a/pywikibot/exceptions.py
+++ b/pywikibot/exceptions.py
@@ -338,6 +338,13 @@
pass
+class Server414Error(Error):
+
+ """Server returned with HTTP 414 code."""
+
+ pass
+
+
class BadTitle(Error):
"""Server responded with BadTitle."""
diff --git a/pywikibot/family.py b/pywikibot/family.py
index f2f13e3..9daf228 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -1086,6 +1086,9 @@
else:
return code
+ def maximum_GET_length(self, code):
+ return config.maximum_GET_length
+
def dbName(self, code):
# returns the name of the MySQL database
return '%s%s' % (code, self.name)
--
To view, visit https://gerrit.wikimedia.org/r/173055
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42
Gerrit-PatchSet: 6
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits