XZise has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/173055

Change subject: [WIP] [FEAT] Support HTTP GET requests
......................................................................

[WIP] [FEAT] Support HTTP GET requests

This uses a HTTP GET request if possible instead of an HTTP POST if not
one of the following conditions is applying:

- The action is one of the actions which requires a POST request (e.g.
  action=upload). If the action is query, the query modules requires a
  POST request.
  It is intended to use action=paraminfo to determine this, but not
  implemented yet. I pulled the data of all actions/querymodules from
  the English Wikipedia, and if it doesn't require a POST request added
  it to one of the sets.
- The config variable (maximum_GET_length) is lower than the actual
  param string. The family can overwrite this maximum length. If it is 0
  (it accepts also ints < 0, but those don't make sense), it'll never
  use a GET request.
- If after the first request the server returns a 414 it'll switch to
  the POST request mode and retry it.

The Request instance itself has an attribute 'use_get' which can
overwrite this behaviour. If it's True the first try will always be a
GET request (even if the action doesn't support it), if it's Fals it'll
never try it with a GET request and if it's None, it'll check the action
and if requires the querymodule.

The value of 'True' is defined to allow calling action=paraminfo inside
the request, which would itself call action=paraminfo if it didn't know
that this doesn't require a GET request.

Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42
---
M pywikibot/comms/http.py
M pywikibot/config2.py
M pywikibot/data/api.py
M pywikibot/exceptions.py
M pywikibot/family.py
5 files changed, 103 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/55/173055/1

diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 660e597..0e613c7 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -66,7 +66,7 @@
     from urllib2 import quote
 
 from pywikibot import config
-from pywikibot.exceptions import FatalServerError, Server504Error
+from pywikibot.exceptions import FatalServerError, Server504Error, 
Server414Error
 from pywikibot.comms import threadedhttp
 from pywikibot.tools import deprecate_arg
 import pywikibot.version
@@ -260,6 +260,9 @@
     if request.data[0].status == 504:
         raise Server504Error("Server %s timed out" % site.hostname())
 
+    if request.data[0].status == 414:
+        raise Server414Error('To long GET request')
+
     # HTTP status 207 is also a success status for Webdav FINDPROP,
     # used by the version module.
     if request.data[0].status not in (200, 207):
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index e24ebca..51323c1 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -89,6 +89,10 @@
 # number of days to cache namespaces, api configuration, etc.
 API_config_expiry = 30
 
+# The maximum number of bytes which uses a GET request, if not positive
+# it'll always use POST requests
+maximum_GET_length = 255
+
 # Solve captchas in the webbrowser. Setting this to False will result in the
 # exception CaptchaError being thrown if a captcha is encountered.
 solve_captcha = True
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 83ef419..92af97f 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -26,7 +26,7 @@
 import pywikibot
 from pywikibot import config, login
 from pywikibot.tools import MediaWikiVersion as LV, deprecated
-from pywikibot.exceptions import Server504Error, FatalServerError, Error
+from pywikibot.exceptions import Server504Error, Server414Error, 
FatalServerError, Error
 
 import sys
 
@@ -219,6 +219,9 @@
         @kwarg retry_wait: (optional) Minimum time to wait after an error,
                defaults to 5 seconds (doubles each retry until max of 120 is
                reached)
+        @kwarg use_get: (optional) Use HTTP GET request if possible. If False
+               it uses a POST request. If None, it'll try to determine via
+               action=paraminfo if the action requires a POST.
         @kwarg format: (optional) Defaults to "json"
         """
         try:
@@ -234,6 +237,7 @@
         else:
             self.mime = kwargs.pop('mime', False)
         self.throttle = kwargs.pop('throttle', True)
+        self.use_get = kwargs.pop('use_get', None)
         self.max_retries = kwargs.pop("max_retries", 
pywikibot.config.max_retries)
         self.retry_wait = kwargs.pop("retry_wait", pywikibot.config.retry_wait)
         self._params = {}
@@ -555,6 +559,48 @@
                             not self._warning_handler(mod, single_warning)):
                         pywikibot.warning(u"API warning (%s): %s" % (mod, 
single_warning))
 
+    # TODO: Those to sets should be automatically determined using paraminfo
+    # Those sets are not complete, but if an entry is missing it won't fail
+    # because it's using GET instead of POST. They were generated from the
+    # English Wikipedia (1.25wmf7)
+    # All actions which support HTTP GET (except query)
+    _get_actions = set([
+        'abusefilterevalexpression', 'centralauthtoken', 'feedwatchlist',
+        'centralnoticeallocations', 'cirrus-settings-dump',
+        'editmassmessagelist', 'languagesearch', 'clearhasmsg', 'featuredfeed',
+        'mobileview', 'jsonconfig', 'antispoof', 'fancycaptchareload',
+        'centralnoticequerycampaign', 'cirrus-config-dump', 'flagconfig',
+        'feedcontributions', 'cirrus-mapping-dump', 'compare', 'flow', 'help',
+        'abusefiltercheckmatch', 'liststudents', 'expandtemplates',
+        'abusefilterchecksyntax', 'flow-parsoid-utils', 'opensearch',
+        'feedrecentchanges', 'logout', 'imagerotate', 'paraminfo',
+    ])
+    # if action == query, all "querymodules" which support HTTP GET
+    _get_queries = set([
+        # prop=…
+        'contributors', 'flagged', 'linkshere', 'globalusage', 'extlinks',
+        'flowinfo', 'duplicatefiles', 'images', 'imageinfo', 'templates',
+        'pageimages', 'categories', 'coordinates', 'fileusage', 'iwlinks',
+        'stashimageinfo', 'revisions', 'transcodestatus', 'redirects', 'links',
+        'videoinfo', 'langlinks', 'transcludedin', 'pageprops', 'categoryinfo',
+        'deletedrevisions', 'info', 'extracts',
+        # list=…
+        'allpages', 'oldreviewedpages', 'alltransclusions', 'allimages',
+        'backlinks', 'alllinks', 'imageusage', 'mmsites', 'recentchanges',
+        'abuselog', 'alldeletedrevisions', 'betafeatures', 'random',
+        'globalallusers', 'pageswithprop', 'pagepropnames', 'allcategories',
+        'centralnoticelogs', 'embeddedin', 'abusefilters', 'prefixsearch',
+        'querypage', 'logevents', 'watchlist', 'watchlistraw', 'allfileusages',
+        'wikisets', 'globalgroups', 'checkuserlog', 'iwbacklinks', 'users',
+        'allusers', 'allredirects', 'deletedrevs', 'exturlusage',
+        'gadgetcategories', 'filearchive', 'tags', 'globalblocks',
+        'categorymembers', 'usercontribs', 'blocks', 'gettingstartedgetpages',
+        'gadgets', 'protectedtitles', 'search', 'langbacklinks', 'geosearch',
+        # meta=…
+        'userinfo', 'tokens', 'allmessages', 'notifications', 'wikibase',
+        'filerepoinfo', 'globaluserinfo', 'siteinfo',
+    ])
+
     def submit(self):
         """Submit a query and parse the response.
 
@@ -562,6 +608,22 @@
 
         """
         self._add_defaults()
+        # TODO: Determine if this requires a POST via action=paraminfo
+        if self.use_get is None:
+            if self.action == 'query':
+                # also check "querymodule"
+                use_get = True
+                for querymodule_key in ('list', 'prop', 'meta'):
+                    for querymodule in self._params.get(querymodule_key, []):
+                        if querymodule not in Request._get_queries:
+                            use_get = False
+                            break
+                    if not use_get:
+                        break
+            else:
+                use_get = self.action in Request._get_actions
+        else:
+            use_get = self.use_get
         while True:
             paramstring = self._http_param_string()
             simulate = self._simulate(self.action)
@@ -577,17 +639,37 @@
                 if self.mime:
                     (headers, body) = Request._build_mime_request(
                         self._encoded_items(), self.mime_params)
+                    use_get = False  # MIME requests require HTTP POST
                 else:
                     headers = {'Content-Type': 
'application/x-www-form-urlencoded'}
-                    body = paramstring
+                    if (not self.site.maximum_GET_length() or
+                            self.site.maximum_GET_length() < len(paramstring)):
+                        use_get = False
+                    if use_get:
+                        # TODO: paramstring is in binary
+                        uri = '{0}?{1}'.format(uri, paramstring)
+                        # TODO: body so valid? or None?
+                        body = ''
+                    else:
+                        body = paramstring
 
                 rawdata = http.request(
-                    self.site, uri, method="POST",
+                    self.site, uri, method='GET' if use_get else 'POST',
                     headers=headers, body=body)
             except Server504Error:
                 pywikibot.log(u"Caught HTTP 504 error; retrying")
                 self.wait()
                 continue
+            except Server414Error:
+                if use_get:
+                    pywikibot.log('Caught HTTP 414 error; retrying')
+                    use_get = False
+                    self.wait()
+                    continue
+                else:
+                    pywikibot.warning('Caught HTTP 414 error, although not '
+                                      'using GET.')
+                    raise
             except FatalServerError:
                 # This error is not going to be fixed by just waiting
                 pywikibot.error(traceback.format_exc())
diff --git a/pywikibot/exceptions.py b/pywikibot/exceptions.py
index 298ef6c..b5013df 100644
--- a/pywikibot/exceptions.py
+++ b/pywikibot/exceptions.py
@@ -338,6 +338,13 @@
     pass
 
 
+class Server414Error(Error):
+
+    """Server returned with HTTP 414 code."""
+
+    pass
+
+
 class BadTitle(Error):
 
     """Server responded with BadTitle."""
diff --git a/pywikibot/family.py b/pywikibot/family.py
index f2f13e3..9daf228 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -1086,6 +1086,9 @@
         else:
             return code
 
+    def maximum_GET_length(self, code):
+        return config.maximum_GET_length
+
     def dbName(self, code):
         # returns the name of the MySQL database
         return '%s%s' % (code, self.name)

-- 
To view, visit https://gerrit.wikimedia.org/r/173055
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib045fecedd9638f4b3bcbc40bc7b37ebfea63c42
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to