jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/633311 )

Change subject: [IMPR] Replace deprecated urllib.request.URLopener
......................................................................

[IMPR] Replace deprecated urllib.request.URLopener

Bug: T255575
Change-Id: I7c76bca366d72486234b44d1b773aa270d6494a0
---
M pywikibot/specialbots/_upload.py
1 file changed, 66 insertions(+), 58 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/specialbots/_upload.py b/pywikibot/specialbots/_upload.py
index 745eb73..ca53d19 100644
--- a/pywikibot/specialbots/_upload.py
+++ b/pywikibot/specialbots/_upload.py
@@ -10,18 +10,20 @@
 # Distributed under the terms of the MIT license.
 #
 import os
+import requests
 import tempfile

-from contextlib import closing
+from pathlib import Path
 from urllib.parse import urlparse
-from urllib.request import URLopener

 import pywikibot
+import pywikibot.comms.http as http
 import pywikibot.data.api

-from pywikibot import config

+from pywikibot import config
 from pywikibot.bot import BaseBot, QuitKeyboardInterrupt
+from pywikibot.exceptions import FatalServerError
 from pywikibot.tools import deprecated_args
 from pywikibot.tools.formatter import color_format

@@ -115,62 +117,65 @@
             pywikibot.warning('file_url is not given. '
                               'Set to self.url by default.')
         pywikibot.output('Reading file {}'.format(file_url))
-        resume = False
-        rlen = 0
-        _contents = None
-        dt = 15
-        uo = URLopener()
-        retrieved = False

-        while not retrieved:
-            if resume:
-                pywikibot.output('Resume download...')
-                uo.addheader('Range', 'bytes={}-'.format(rlen))
-
-            with closing(uo.open(file_url)) as infile:
-                info = infile.info()
-
-                info_get = info.get
-                content_type = info_get('Content-Type')
-                content_len = info_get('Content-Length')
-                accept_ranges = info_get('Accept-Ranges')
-
-                if 'text/html' in content_type:
-                    pywikibot.output(
-                        "Couldn't download the image: "
-                        'the requested URL was not found on server.')
-                    return
-
-                valid_ranges = accept_ranges == 'bytes'
-
-                if resume:
-                    _contents += infile.read()
-                else:
-                    _contents = infile.read()
-
-            retrieved = True
-            if content_len:
-                rlen = len(_contents)
-                content_len = int(content_len)
-                if rlen < content_len:
-                    retrieved = False
-                    pywikibot.output(
-                        'Connection closed at byte {} ({} left)'
-                        .format(rlen, content_len))
-                    if valid_ranges and rlen > 0:
-                        resume = True
-                    pywikibot.output('Sleeping for {} seconds...'.format(dt))
-                    pywikibot.sleep(dt)
-                    if dt <= 60:
-                        dt += 15
-                    elif dt < 360:
-                        dt += 60
-            else:
-                pywikibot.log(
-                    'WARNING: length check of retrieved data not possible.')
         handle, tempname = tempfile.mkstemp()
-        with os.fdopen(handle, 'wb') as t:
-            t.write(_contents)
+        path = Path(tempname)
+        size = 0
+
+        dt_gen = (el for el in (15, 30, 45, 60, 120, 180, 240, 300))
+        while True:
+            file_len = path.stat().st_size
+            if file_len:
+                pywikibot.output('Download resumed.')
+                headers = {'Range': 'bytes={}-'.format(file_len)}
+            else:
+                headers = {}
+
+            with open(path, 'ab') as fd:
+                os.lseek(handle, file_len, 0)
+                try:
+                    r = http.fetch(file_url, stream=True, headers=headers)
+                    response = r.data
+                    response.raise_for_status()
+
+                    # get download info, if available
+                    # Note: this is not enough to exclude pages
+                    #       e.g. 'application/json' is also not a media
+                    if 'text/' in response.headers['Content-Type']:
+                        raise FatalServerError('The requested URL was not '
+                                               'found on server.')
+                    size = max(size,
+                               int(response.headers.get('Content-Length', 0)))
+
+                    # stream content to temp file (in chunks of 1Mb)
+                    for chunk in response.iter_content(chunk_size=1024 * 1024):
+                        fd.write(chunk)
+
+                # raised from connection lost during response.iter_content()
+                except requests.ConnectionError:
+                    fd.flush()
+                    pywikibot.output(
+                        'Connection closed at byte %s' % path.stat().st_size)
+                # raised from response.raise_for_status()
+                except requests.HTTPError as e:
+                    # exit criteria if size is not available
+                    # error on last iteration is OK, we're requesting
+                    #    {'Range': 'bytes=file_len-'}
+                    if response.status_code == 416 and path.stat().st_size:
+                        break
+                    else:
+                        raise FatalServerError(str(e)) from e
+
+            if size and size == path.stat().st_size:
+                break
+            try:
+                dt = next(dt_gen)
+                pywikibot.output('Sleeping for {} seconds ...'.format(dt))
+                pywikibot.sleep(dt)
+            except StopIteration:
+                raise FatalServerError('Download failed, too many retries!')
+
+        pywikibot.output('Downloaded {} bytes'.format(path.stat().st_size))
         return tempname

     def _handle_warning(self, warning):
@@ -397,7 +402,10 @@

         ignore_warnings = self.ignore_warning is True or self._handle_warnings
         if '://' in file_url and not site.has_right('upload_by_url'):
-            file_url = self.read_file_content(file_url)
+            try:
+                file_url = self.read_file_content(file_url)
+            except FatalServerError:
+                return None

         try:
             success = imagepage.upload(file_url,

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/633311
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I7c76bca366d72486234b44d1b773aa270d6494a0
Gerrit-Change-Number: 633311
Gerrit-PatchSet: 7
Gerrit-Owner: Mpaa <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to