Mpaa has submitted this change and it was merged.

Change subject: [FEAT] Chunked uploads
......................................................................


[FEAT] Chunked uploads

This allows chunked uploads by setting the parameter 'chunk_size'
to a value between 0 and the file size (both exclusive). It will
also only work if the version is 1.20 or newer.

The upload.py script supports this mode via the '-chunked'
parameter.

This also adds the capability to run the API request without
throttle so that it don't has to wait after each request.

See: https://www.mediawiki.org/wiki/API:Upload#Chunked_uploading

Change-Id: I80b2bba9e63832173d5b697db1f4ea419ca1122f
---
M pywikibot/data/api.py
M pywikibot/site.py
M scripts/upload.py
3 files changed, 156 insertions(+), 36 deletions(-)

Approvals:
  Mpaa: Looks good to me, approved



diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 8132a27..11df17b 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -129,6 +129,8 @@
     @param site: The Site to which the request will be submitted. If not
            supplied, uses the user's configured default Site.
     @param mime: If true, send in "multipart/form-data" format (default False)
+    @param mime_params: A dictionary of parameter which should only be
+           transferred via mime mode. If not None sets mime to True.
     @param max_retries: (optional) Maximum number of times to retry after
            errors, defaults to 25
     @param retry_wait: (optional) Minimum time to wait after an error,
@@ -143,7 +145,15 @@
             self.site = kwargs.pop("site")
         except KeyError:
             self.site = pywikibot.Site()
-        self.mime = kwargs.pop("mime", False)
+        if 'mime_params' in kwargs:
+            self.mime_params = kwargs.pop('mime_params')
+            # mime may not be different from mime_params
+            if 'mime' in kwargs and kwargs.pop('mime') != self.mime:
+                raise ValueError('If mime_params is set, mime may not differ '
+                                 'from it.')
+        else:
+            self.mime = kwargs.pop('mime', False)
+        self.throttle = kwargs.pop('throttle', False)
         self.max_retries = kwargs.pop("max_retries", 
pywikibot.config.max_retries)
         self.retry_wait = kwargs.pop("retry_wait", pywikibot.config.retry_wait)
         self.params = {}
@@ -210,6 +220,23 @@
     def iteritems(self):
         return iter(self.params.items())
 
+    @property
+    def mime(self):
+        """Return whether mime parameters are defined."""
+        return self.mime_params is not None
+
+    @mime.setter
+    def mime(self, value):
+        """
+        Change whether mime parameter should be defined.
+
+        This will clear the mime parameters.
+        """
+        try:
+            self.mime_params = dict(value)
+        except TypeError:
+            self.mime_params = {} if value else None
+
     def http_params(self):
         """Return the parameters formatted for inclusion in an HTTP request.
 
@@ -218,7 +245,9 @@
            unicode (may be |-separated list)
            str in site encoding (may be |-separated list)
         """
-
+        if self.mime_params and set(self.params.keys()) & 
set(self.mime_params.keys()):
+            raise ValueError('The mime_params and params may not share the '
+                             'same keys.')
         for key in self.params:
             if isinstance(self.params[key], bytes):
                 self.params[key] = 
self.params[key].decode(self.site.encoding())
@@ -296,6 +325,23 @@
             message = None
         return message == ERR_MSG
 
+    @staticmethod
+    def _generate_MIME_part(key, content, keytype, headers):
+        if not keytype:
+            try:
+                content.encode("ascii")
+                keytype = ("text", "plain")
+            except UnicodeError:
+                keytype = ("application", "octet-stream")
+        submsg = MIMENonMultipart(*keytype)
+        content_headers = {'name': key}
+        if headers:
+            content_headers.update(headers)
+        submsg.add_header("Content-disposition", "form-data",
+                          **content_headers)
+        submsg.set_payload(content)
+        return submsg
+
     def submit(self):
         """Submit a query and parse the response.
 
@@ -308,7 +354,10 @@
             simulate = self._simulate(action)
             if simulate:
                 return simulate
-            self.site.throttle(write=self.write)
+            if self.throttle:
+                self.site.throttle(write=self.write)
+            else:
+                pywikibot.log("Action '{0}' is submitted not 
throttled.".format(action))
             uri = self.site.scriptpath() + "/api.php"
             ssl = False
             if self.site.family.name in config.available_ssl_project:
@@ -328,22 +377,15 @@
                             filetype = mimetypes.guess_type(local_filename)[0] 
\
                                 or 'application/octet-stream'
                             file_content = file(local_filename, "rb").read()
-                            submsg = MIMENonMultipart(*filetype.split("/"))
-                            submsg.add_header("Content-disposition",
-                                              "form-data", name=key,
-                                              filename=local_filename)
-                            submsg.set_payload(file_content)
+                            submsg = Request._generate_MIME_part(
+                                key, file_content, filetype.split('/'),
+                                {'filename': local_filename})
                         else:
-                            try:
-                                self.params[key].encode("ascii")
-                                keytype = ("text", "plain")
-                            except UnicodeError:
-                                keytype = ("application", "octet-stream")
-                            submsg = MIMENonMultipart(*keytype)
-                            submsg.add_header("Content-disposition", 
"form-data",
-                                              name=key)
-                            submsg.set_payload(self.params[key])
+                            submsg = Request._generate_MIME_part(
+                                key, self.params[key], None, None)
                         container.attach(submsg)
+                    for key, value in self.mime_params.items():
+                        container.attach(Request._generate_MIME_part(key, 
*value))
                     # strip the headers to get the HTTP message body
                     body = container.as_string()
                     marker = "\n\n"  # separates headers from body
diff --git a/pywikibot/site.py b/pywikibot/site.py
index fb8202d..21e0989 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -3858,7 +3858,8 @@
 
     @deprecate_arg('imagepage', 'filepage')
     def upload(self, filepage, source_filename=None, source_url=None,
-               comment=None, text=None, watch=False, ignore_warnings=False):
+               comment=None, text=None, watch=False, ignore_warnings=False,
+               chunk_size=0):
         """Upload a file to the wiki.
 
         Either source_filename or source_url, but not both, must be provided.
@@ -3875,7 +3876,11 @@
         @param watch: If true, add filepage to the bot user's watchlist
         @param ignore_warnings: if true, ignore API warnings and force
             upload (for example, to overwrite an existing file); default False
-
+        @param chunk_size: The chunk size in bytesfor chunked uploading (see
+            U{https://www.mediawiki.org/wiki/API:Upload#Chunked_uploading}). It
+            will only upload in chunks, if the version number is 1.20 or higher
+            and the chunk size is positive but lower than the file size.
+        @type chunk_size: int
         """
         upload_warnings = {
             # map API warning codes to user error messages
@@ -3909,18 +3914,51 @@
         if not text:
             text = comment
         token = self.token(filepage, "edit")
+        result = None
         if source_filename:
             # upload local file
             # make sure file actually exists
             if not os.path.isfile(source_filename):
                 raise ValueError("File '%s' does not exist."
                                  % source_filename)
-            # TODO: if file size exceeds some threshold (to be determined),
-            #       upload by chunks (--> os.path.getsize(source_filename))
+            additional_parameters = {}
+            throttle = True
+            filesize = os.path.getsize(source_filename)
+            if (chunk_size > 0 and chunk_size < filesize and
+                    LV(self.version()) >= LV('1.20')):
+                offset = 0
+                file_key = None
+                with open(source_filename, 'rb') as f:
+                    while True:
+                        f.seek(offset)
+                        chunk = f.read(chunk_size)
+                        req = api.Request(site=self, action='upload', 
token=token,
+                                          stash='1', offset=offset, 
filesize=filesize,
+                                          
filename=filepage.title(withNamespace=False),
+                                          mime_params={}, throttle=throttle)
+                        req.mime_params['chunk'] = (chunk, None, {'filename': 
req.params['filename']})
+                        if file_key:
+                            req['filekey'] = file_key
+                        # TODO: Proper error and warning handling
+                        data = req.submit()['upload']
+                        if 'warnings' in data:
+                            result = data
+                            break
+                        file_key = data['filekey']
+                        throttle = False
+                        new_offset = int(data['offset'])
+                        if offset + len(chunk) != new_offset:
+                            pywikibot.warning('Unexpected offset.')
+                        offset = new_offset
+                        if data['result'] != 'Continue':  # finished
+                            additional_parameters['filekey'] = file_key
+                            break
+            else:
+                additional_parameters = {'file': source_filename, 'mime': True}
             req = api.Request(site=self, action="upload", token=token,
                               filename=filepage.title(withNamespace=False),
-                              file=source_filename, comment=comment,
-                              text=text, mime=True)
+                              comment=comment, text=text, throttle=throttle,
+                              **additional_parameters)
         else:
             # upload by URL
             if "upload_by_url" not in self.userinfo["rights"]:
@@ -3930,16 +3968,17 @@
             req = api.Request(site=self, action="upload", token=token,
                               filename=filepage.title(withNamespace=False),
                               url=source_url, comment=comment, text=text)
-        if watch:
-            req["watch"] = ""
-        if ignore_warnings:
-            req["ignorewarnings"] = ""
-        try:
-            result = req.submit()
-        except api.APIError:
-            # TODO: catch and process foreseeable errors
-            raise
-        result = result["upload"]
+        if not result:
+            if watch:
+                req["watch"] = ""
+            if ignore_warnings:
+                req["ignorewarnings"] = ""
+            try:
+                result = req.submit()
+            except api.APIError:
+                # TODO: catch and process foreseeable errors
+                raise
+            result = result["upload"]
         pywikibot.debug(result, _logger)
         if "warnings" in result:
             warning = list(result["warnings"].keys())[0]
diff --git a/scripts/upload.py b/scripts/upload.py
index 8a9a44d..39f9df5 100755
--- a/scripts/upload.py
+++ b/scripts/upload.py
@@ -11,6 +11,15 @@
                 is given
   -abortonwarn: Abort upload on the specified warning type. If no warning type
                 is specified abort on all warnings.
+  -chunked:     Upload the file in chunks (more overhead, but restartable). If
+                no value is specified the chunk size is 1 MiB. The value must
+                be a number which can be preceded by a suffix. The units are:
+                  No suffix: Bytes
+                  'k': Kilobytes (1000 B)
+                  'M': Megabytes (1000000 B)
+                  'Ki': Kibibytes (1024 B)
+                  'Mi': Mebibytes (1024x1024 B)
+                The suffixes are case insenstive.
 
 If any other arguments are given, the first is the URL or filename to upload,
 and the rest is a proposed description to go with the upload. If none of these
@@ -34,6 +43,8 @@
 import urllib
 import urlparse
 import tempfile
+import re
+import math
 import pywikibot
 import pywikibot.data.api
 from pywikibot import config
@@ -43,7 +54,7 @@
     def __init__(self, url, urlEncoding=None, description=u'',
                  useFilename=None, keepFilename=False,
                  verifyDescription=True, ignoreWarning=False,
-                 targetSite=None, uploadByUrl=False, aborts=[]):
+                 targetSite=None, uploadByUrl=False, aborts=[], chunk_size=0):
         """
         @param ignoreWarning: Set this to True if you want to upload even if
             another file would be overwritten or another mistake would be
@@ -58,6 +69,7 @@
         self.verifyDescription = verifyDescription
         self.ignoreWarning = ignoreWarning
         self.aborts = aborts
+        self.chunk_size = chunk_size
         if config.upload_to_commons:
             self.targetSite = targetSite or pywikibot.Site('commons',
                                                            'commons')
@@ -224,7 +236,8 @@
                 else:
                     temp = self.url
                 site.upload(imagepage, source_filename=temp,
-                            ignore_warnings=self.ignoreWarning)
+                            ignore_warnings=self.ignoreWarning,
+                            chunk_size=self.chunk_size)
 
         except pywikibot.data.api.UploadWarning as warn:
             pywikibot.output(u"We got a warning message: 
{0}".format(warn.message))
@@ -266,6 +279,8 @@
     useFilename = None
     verifyDescription = True
     aborts = set()
+    chunk_size = 0
+    chunk_size_regex = re.compile(r'^-chunked(?::(\d+(?:\.\d+)?)[ 
\t]*(k|ki|m|mi)?b?)?$', re.I)
 
     # process all global bot args
     # returns a list of non-global args, i.e. args for upload.py
@@ -282,6 +297,30 @@
                     aborts.add(arg[len('-abortonwarn:'):])
                 else:
                     aborts = True
+            elif arg.startswith('-chunked'):
+                match = chunk_size_regex.match(arg)
+                if match:
+                    if match.group(1):  # number was in there
+                        base = float(match.group(1))
+                        if match.group(2):  # suffix too
+                            suffix = match.group(2).lower()
+                            if suffix == "k":
+                                suffix = 1000
+                            elif suffix == "m":
+                                suffix = 1000000
+                            elif suffix == "ki":
+                                suffix = 1 << 10
+                            elif suffix == "mi":
+                                suffix = 1 << 20
+                            else:
+                                pass  # huh?
+                        else:
+                            suffix = 1
+                        chunk_size = math.trunc(base * suffix)
+                    else:
+                        chunk_size = 1 << 20  # default to 1 MiB
+                else:
+                    pywikibot.error('Chunk size parameter is not valid.')
             elif url == u'':
                 url = arg
             else:
@@ -290,7 +329,7 @@
     bot = UploadRobot(url, description=description, useFilename=useFilename,
                       keepFilename=keepFilename,
                       verifyDescription=verifyDescription,
-                      aborts=aborts)
+                      aborts=aborts, chunk_size=chunk_size)
     bot.run()
 
 if __name__ == "__main__":

-- 
To view, visit https://gerrit.wikimedia.org/r/156030
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I80b2bba9e63832173d5b697db1f4ea419ca1122f
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to