XZise has uploaded a new change for review.
https://gerrit.wikimedia.org/r/173055
Change subject: [WIP] [FEAT] Support HTTP GET requests
......................................................................
[WIP] [FEAT] Support HTTP GET requests
This uses a HTTP GET request if possible instead of an HTTP POST if not
one of the following conditions is applying:
- The action is one of the actions which requires a POST request (e.g.
action=upload). If the action is query, the query modules requires a
POST request.
It is intended to use action=paraminfo to determine this, but not
implemented yet. I pulled the data of all actions/querymodules from
the English Wikipedia, and if it doesn't require a POST request added
it to one of the sets.
- The config variable (maximum_GET_length) is lower than the actual
param string. The family can overwrite this maximum length. If it is 0
(it accepts also ints < 0, but those don't make sense), it'll never
use a GET request.
- If after the first request the server returns a 414 it'll switch to
the POST request mode and retry it.
The Request instance itself has an attribute 'use_get' which can
overwrite this behaviour. If it's True the first try will always be a
GET request (even if the action doesn't support it), if it's Fals it'll
never try it with a GET request and if it's None, it'll check the action
and if requires the querymodule.
The value of 'True' is defined to allow calling action=paraminfo inside
the request, which would itself call action=paraminfo if it didn't know
that this doesn't require a GET request.
Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42
---
M pywikibot/comms/http.py
M pywikibot/config2.py
M pywikibot/data/api.py
M pywikibot/exceptions.py
M pywikibot/family.py
5 files changed, 103 insertions(+), 4 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/55/173055/1
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 660e597..0e613c7 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -66,7 +66,7 @@
from urllib2 import quote
from pywikibot import config
-from pywikibot.exceptions import FatalServerError, Server504Error
+from pywikibot.exceptions import FatalServerError, Server504Error,
Server414Error
from pywikibot.comms import threadedhttp
from pywikibot.tools import deprecate_arg
import pywikibot.version
@@ -260,6 +260,9 @@
if request.data[0].status == 504:
raise Server504Error("Server %s timed out" % site.hostname())
+ if request.data[0].status == 414:
+ raise Server414Error('To long GET request')
+
# HTTP status 207 is also a success status for Webdav FINDPROP,
# used by the version module.
if request.data[0].status not in (200, 207):
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index e24ebca..51323c1 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -89,6 +89,10 @@
# number of days to cache namespaces, api configuration, etc.
API_config_expiry = 30
+# The maximum number of bytes which uses a GET request, if not positive
+# it'll always use POST requests
+maximum_GET_length = 255
+
# Solve captchas in the webbrowser. Setting this to False will result in the
# exception CaptchaError being thrown if a captcha is encountered.
solve_captcha = True
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 83ef419..92af97f 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -26,7 +26,7 @@
import pywikibot
from pywikibot import config, login
from pywikibot.tools import MediaWikiVersion as LV, deprecated
-from pywikibot.exceptions import Server504Error, FatalServerError, Error
+from pywikibot.exceptions import Server504Error, Server414Error,
FatalServerError, Error
import sys
@@ -219,6 +219,9 @@
@kwarg retry_wait: (optional) Minimum time to wait after an error,
defaults to 5 seconds (doubles each retry until max of 120 is
reached)
+ @kwarg use_get: (optional) Use HTTP GET request if possible. If False
+ it uses a POST request. If None, it'll try to determine via
+ action=paraminfo if the action requires a POST.
@kwarg format: (optional) Defaults to "json"
"""
try:
@@ -234,6 +237,7 @@
else:
self.mime = kwargs.pop('mime', False)
self.throttle = kwargs.pop('throttle', True)
+ self.use_get = kwargs.pop('use_get', None)
self.max_retries = kwargs.pop("max_retries",
pywikibot.config.max_retries)
self.retry_wait = kwargs.pop("retry_wait", pywikibot.config.retry_wait)
self._params = {}
@@ -555,6 +559,48 @@
not self._warning_handler(mod, single_warning)):
pywikibot.warning(u"API warning (%s): %s" % (mod,
single_warning))
+ # TODO: Those to sets should be automatically determined using paraminfo
+ # Those sets are not complete, but if an entry is missing it won't fail
+ # because it's using GET instead of POST. They were generated from the
+ # English Wikipedia (1.25wmf7)
+ # All actions which support HTTP GET (except query)
+ _get_actions = set([
+ 'abusefilterevalexpression', 'centralauthtoken', 'feedwatchlist',
+ 'centralnoticeallocations', 'cirrus-settings-dump',
+ 'editmassmessagelist', 'languagesearch', 'clearhasmsg', 'featuredfeed',
+ 'mobileview', 'jsonconfig', 'antispoof', 'fancycaptchareload',
+ 'centralnoticequerycampaign', 'cirrus-config-dump', 'flagconfig',
+ 'feedcontributions', 'cirrus-mapping-dump', 'compare', 'flow', 'help',
+ 'abusefiltercheckmatch', 'liststudents', 'expandtemplates',
+ 'abusefilterchecksyntax', 'flow-parsoid-utils', 'opensearch',
+ 'feedrecentchanges', 'logout', 'imagerotate', 'paraminfo',
+ ])
+ # if action == query, all "querymodules" which support HTTP GET
+ _get_queries = set([
+ # prop=…
+ 'contributors', 'flagged', 'linkshere', 'globalusage', 'extlinks',
+ 'flowinfo', 'duplicatefiles', 'images', 'imageinfo', 'templates',
+ 'pageimages', 'categories', 'coordinates', 'fileusage', 'iwlinks',
+ 'stashimageinfo', 'revisions', 'transcodestatus', 'redirects', 'links',
+ 'videoinfo', 'langlinks', 'transcludedin', 'pageprops', 'categoryinfo',
+ 'deletedrevisions', 'info', 'extracts',
+ # list=…
+ 'allpages', 'oldreviewedpages', 'alltransclusions', 'allimages',
+ 'backlinks', 'alllinks', 'imageusage', 'mmsites', 'recentchanges',
+ 'abuselog', 'alldeletedrevisions', 'betafeatures', 'random',
+ 'globalallusers', 'pageswithprop', 'pagepropnames', 'allcategories',
+ 'centralnoticelogs', 'embeddedin', 'abusefilters', 'prefixsearch',
+ 'querypage', 'logevents', 'watchlist', 'watchlistraw', 'allfileusages',
+ 'wikisets', 'globalgroups', 'checkuserlog', 'iwbacklinks', 'users',
+ 'allusers', 'allredirects', 'deletedrevs', 'exturlusage',
+ 'gadgetcategories', 'filearchive', 'tags', 'globalblocks',
+ 'categorymembers', 'usercontribs', 'blocks', 'gettingstartedgetpages',
+ 'gadgets', 'protectedtitles', 'search', 'langbacklinks', 'geosearch',
+ # meta=…
+ 'userinfo', 'tokens', 'allmessages', 'notifications', 'wikibase',
+ 'filerepoinfo', 'globaluserinfo', 'siteinfo',
+ ])
+
def submit(self):
"""Submit a query and parse the response.
@@ -562,6 +608,22 @@
"""
self._add_defaults()
+ # TODO: Determine if this requires a POST via action=paraminfo
+ if self.use_get is None:
+ if self.action == 'query':
+ # also check "querymodule"
+ use_get = True
+ for querymodule_key in ('list', 'prop', 'meta'):
+ for querymodule in self._params.get(querymodule_key, []):
+ if querymodule not in Request._get_queries:
+ use_get = False
+ break
+ if not use_get:
+ break
+ else:
+ use_get = self.action in Request._get_actions
+ else:
+ use_get = self.use_get
while True:
paramstring = self._http_param_string()
simulate = self._simulate(self.action)
@@ -577,17 +639,37 @@
if self.mime:
(headers, body) = Request._build_mime_request(
self._encoded_items(), self.mime_params)
+ use_get = False # MIME requests require HTTP POST
else:
headers = {'Content-Type':
'application/x-www-form-urlencoded'}
- body = paramstring
+ if (not self.site.maximum_GET_length() or
+ self.site.maximum_GET_length() < len(paramstring)):
+ use_get = False
+ if use_get:
+ # TODO: paramstring is in binary
+ uri = '{0}?{1}'.format(uri, paramstring)
+ # TODO: body so valid? or None?
+ body = ''
+ else:
+ body = paramstring
rawdata = http.request(
- self.site, uri, method="POST",
+ self.site, uri, method='GET' if use_get else 'POST',
headers=headers, body=body)
except Server504Error:
pywikibot.log(u"Caught HTTP 504 error; retrying")
self.wait()
continue
+ except Server414Error:
+ if use_get:
+ pywikibot.log('Caught HTTP 414 error; retrying')
+ use_get = False
+ self.wait()
+ continue
+ else:
+ pywikibot.warning('Caught HTTP 414 error, although not '
+ 'using GET.')
+ raise
except FatalServerError:
# This error is not going to be fixed by just waiting
pywikibot.error(traceback.format_exc())
diff --git a/pywikibot/exceptions.py b/pywikibot/exceptions.py
index 298ef6c..b5013df 100644
--- a/pywikibot/exceptions.py
+++ b/pywikibot/exceptions.py
@@ -338,6 +338,13 @@
pass
+class Server414Error(Error):
+
+ """Server returned with HTTP 414 code."""
+
+ pass
+
+
class BadTitle(Error):
"""Server responded with BadTitle."""
diff --git a/pywikibot/family.py b/pywikibot/family.py
index f2f13e3..9daf228 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -1086,6 +1086,9 @@
else:
return code
+ def maximum_GET_length(self, code):
+ return config.maximum_GET_length
+
def dbName(self, code):
# returns the name of the MySQL database
return '%s%s' % (code, self.name)
--
To view, visit https://gerrit.wikimedia.org/r/173055
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits