Hi there! We're trying to remove Python 2 support in Debian, and therefore, I started working on converting Yum to Python 3. This starts with urlgrabber, which is in use there.
Here's a patch for that. Please let me know your thoughts about this patch ASAP, so we can move forward in Debian without removing yum. Cheers, Thomas Goirand (zigo) P.S: Please CC me, I'm not sure I'm subscribed.
Description: Add Python 3 compatibility Author: Thomas Goirand <z...@debian.org> Forwarded: no Last-Update: 2019-07-30 --- urlgrabber-3.10.2.orig/setup.py +++ urlgrabber-3.10.2/setup.py @@ -55,7 +55,7 @@ classifiers = [ if __name__ == '__main__': config = globals().copy() keys = config.keys() - for k in keys: + for k in list(keys): #print '%-20s -> %s' % (k, config[k]) if k.startswith('_'): del config[k] --- urlgrabber-3.10.2.orig/test/grabberperf.py +++ urlgrabber-3.10.2/test/grabberperf.py @@ -65,9 +65,9 @@ def speedtest(size): try: from urlgrabber.progress import text_progress_meter - except ImportError, e: + except ImportError as e: tpm = None - print 'not using progress meter' + print('not using progress meter') else: tpm = text_progress_meter(fo=open('/dev/null', 'w')) @@ -83,11 +83,11 @@ def speedtest(size): # module. # get it nicely cached before we start comparing - if DEBUG: print 'pre-caching' + if DEBUG: print('pre-caching') for i in range(100): urlgrab(tempsrc, tempdst, copy_local=1, throttle=None, proxies=proxies) - if DEBUG: print 'running speed test.' + if DEBUG: print('running speed test.') reps = 500 for i in range(reps): if DEBUG: --- urlgrabber-3.10.2.orig/test/munittest.py +++ urlgrabber-3.10.2/test/munittest.py @@ -190,7 +190,7 @@ class TestResult: def _exc_info_to_string(self, err): """Converts a sys.exc_info()-style tuple of values into a string.""" - return string.join(traceback.format_exception(*err), '') + return traceback.format_exception(*err).join('') def __repr__(self): return "<%s run=%i errors=%i failures=%i>" % \ @@ -276,7 +276,7 @@ class TestCase: the specified test method's docstring. """ doc = self._testMethodDoc - return doc and string.strip(string.split(doc, "\n")[0]) or None + return doc and doc.split("\n")[0].strip() or None def id(self): return "%s.%s" % (_strclass(self.__class__), self._testMethodName) @@ -361,15 +361,15 @@ class TestCase: def fail(self, msg=None): """Fail immediately, with the given message.""" - raise self.failureException, msg + raise self.failureException(msg) def failIf(self, expr, msg=None): "Fail the test if the expression is true." - if expr: raise self.failureException, msg + if expr: raise self.failureException(msg) def failUnless(self, expr, msg=None): """Fail the test unless the expression is true.""" - if not expr: raise self.failureException, msg + if not expr: raise self.failureException(msg) def failUnlessRaises(self, excClass, callableObj, *args, **kwargs): """Fail unless an exception of class excClass is thrown @@ -386,7 +386,7 @@ class TestCase: else: if hasattr(excClass,'__name__'): excName = excClass.__name__ else: excName = str(excClass) - raise self.failureException, excName + raise self.failureException(excName) def failUnlessEqual(self, first, second, msg=None): """Fail if the two objects are unequal as determined by the '==' @@ -442,15 +442,15 @@ class TestCase: def skip(self, msg=None): """Skip the test""" - raise self.skipException, msg + raise self.skipException(msg) def skipIf(self, expr, msg=None): "Skip the test if the expression is true." - if expr: raise self.skipException, msg + if expr: raise self.skipException(msg) def skipUnless(self, expr, msg=None): """Skip the test unless the expression is true.""" - if not expr: raise self.skipException, msg + if not expr: raise self.skipException(msg) @@ -554,7 +554,7 @@ class FunctionTestCase(TestCase): def shortDescription(self): if self._description is not None: return self._description doc = self._testFunc.__doc__ - return doc and string.strip(string.split(doc, "\n")[0]) or None + return doc and doc.split("\n")[0].strip() or None @@ -603,7 +603,7 @@ class TestLoader: The method optionally resolves the names relative to a given module. """ - parts = string.split(name, '.') + parts = name.split('.') if module is None: if not parts: raise ValueError, "incomplete test name: %s" % name @@ -611,7 +611,7 @@ class TestLoader: parts_copy = parts[:] while parts_copy: try: - module = __import__(string.join(parts_copy,'.')) + module = __import__(parts_copy.join('.')) break except ImportError: del parts_copy[-1] @@ -871,7 +871,7 @@ Examples: argv=None, testRunner=None, testLoader=defaultTestLoader): if type(module) == type(''): self.module = __import__(module) - for part in string.split(module,'.')[1:]: + for part in module.split('.')[1:]: self.module = getattr(self.module, part) else: self.module = module @@ -886,7 +886,7 @@ Examples: self.runTests() def usageExit(self, msg=None): - if msg: print msg + if msg: print(msg) print self.USAGE % self.__dict__ sys.exit(2) @@ -910,7 +910,7 @@ Examples: else: self.testNames = (self.defaultTest,) self.createTests() - except getopt.error, msg: + except getopt.error as msg: self.usageExit(msg) def createTests(self): --- urlgrabber-3.10.2.orig/test/runtests.py +++ urlgrabber-3.10.2/test/runtests.py @@ -54,7 +54,7 @@ def parse_args(): return (descriptions,verbosity) def usage(): - print __doc__ + print(__doc__) if __name__ == '__main__': main() --- urlgrabber-3.10.2.orig/test/test_byterange.py +++ urlgrabber-3.10.2/test/test_byterange.py @@ -25,7 +25,7 @@ import sys -from cStringIO import StringIO +from six.moves import cStringIO as StringIO from urlgrabber.byterange import RangeableFileObject from base_test_code import * --- urlgrabber-3.10.2.orig/test/test_grabber.py +++ urlgrabber-3.10.2/test/test_grabber.py @@ -26,7 +26,6 @@ import sys import os import string, tempfile, random, cStringIO, os -import urllib2 import socket from base_test_code import * @@ -37,6 +36,8 @@ from urlgrabber.grabber import URLGrabbe URLParser from urlgrabber.progress import text_progress_meter +from six.moves import urllib + class FileObjectTests(TestCase): def setUp(self): @@ -73,7 +74,7 @@ class FileObjectTests(TestCase): def test_readlines(self): "PyCurlFileObject .readlines() method" li = self.wrapper.readlines() - self.fo_output.write(string.join(li, '')) + self.fo_output.write(li.join('')) self.assert_(reference_data == self.fo_output.getvalue()) def test_smallread(self): @@ -149,7 +150,7 @@ class URLGrabberTestCase(TestCase): values into the URLGrabber constructor and checks that they've been set properly. """ - opener = urllib2.OpenerDirector() + opener = urllib.request.OpenerDirector() g = URLGrabber( progress_obj=self.meter, throttle=0.9, bandwidth=20, @@ -470,7 +471,7 @@ class FTPRegetTests(RegetTestBase, TestC # this tests to see if the server is available. If it's not, # then these tests will be skipped try: - fo = urllib2.urlopen(self.url).close() + fo = urllib.request.urlopen(self.url).close() except IOError: self.skip() @@ -545,7 +546,7 @@ class ProFTPDSucksTests(TestCase): def setUp(self): self.url = ref_proftp try: - fo = urllib2.urlopen(self.url).close() + fo = urllib.request.urlopen(self.url).close() except IOError: self.skip() @@ -592,7 +593,7 @@ class ProxyFTPAuthTests(ProxyHTTPAuthTes if not self.have_proxy(): self.skip() try: - fo = urllib2.urlopen(self.url).close() + fo = urllib.request.urlopen(self.url).close() except IOError: self.skip() self.g = URLGrabber() --- urlgrabber-3.10.2.orig/test/test_mirror.py +++ urlgrabber-3.10.2/test/test_mirror.py @@ -323,7 +323,7 @@ class HttpReplyCode(TestCase): # multi err = [] - self.mg.urlgrab('foo', async = True, failfunc = err.append) + self.mg.urlgrab('foo', myasync = True, failfunc = err.append) urlgrabber.grabber.parallel_wait() self.assertEquals([e.exception.errno for e in err], [256]) self.assertEquals(self.code, 503); del self.code @@ -374,7 +374,7 @@ class HttpReplyCode(TestCase): # multi opts.checkfunc = checkfunc_grab - self.mg.urlgrab('foo', async=True) + self.mg.urlgrab('foo', myasync=True) try: urlgrabber.grabber.parallel_wait() except URLGrabError as e: --- urlgrabber-3.10.2.orig/test/threading/batchgrabber.py +++ urlgrabber-3.10.2/test/threading/batchgrabber.py @@ -31,7 +31,7 @@ be pulled in multiple threads. import os.path, sys if __name__ == '__main__': - print os.path.dirname(sys.argv[0]) + print(os.path.dirname(sys.argv[0])) sys.path.insert(0, (os.path.dirname(sys.argv[0]) or '.') + '/../..') from threading import Thread, Semaphore @@ -81,14 +81,14 @@ class Worker(Thread): self.kwargs = kwargs def run(self): - if DEBUG: print "worker thread started." + if DEBUG: print("worker thread started.") grabber = self.parent.grabber progress_obj = grabber.opts.progress_obj if isinstance(progress_obj, MultiFileMeter): self.kwargs['progress_obj'] = progress_obj.newMeter() try: rslt = self.parent.grabber.urlgrab(self.url, self.filename, **self.kwargs) - except URLGrabError, e: + except URLGrabError as e: print '%s, %s' % (e, self.url) def main(): @@ -98,13 +98,13 @@ def main(): g = BatchURLGrabber(keepalive=1, progress_obj=progress_obj) for arg in sys.argv[1:]: g.urlgrab(arg) - if DEBUG: print "before batchgrab" + if DEBUG: print("before batchgrab") try: g.batchgrab() except KeyboardInterrupt: sys.exit(1) - if DEBUG: print "after batchgrab" + if DEBUG: print("after batchgrab") if __name__ == '__main__': main() --- urlgrabber-3.10.2.orig/urlgrabber/__init__.py +++ urlgrabber-3.10.2/urlgrabber/__init__.py @@ -52,4 +52,4 @@ __author__ = 'Michael D. Stenner <msten 'Zdenek Pavlas <zpav...@redhat.com>' __url__ = 'http://urlgrabber.baseurl.org/' -from grabber import urlgrab, urlopen, urlread +from .grabber import urlgrab, urlopen, urlread --- urlgrabber-3.10.2.orig/urlgrabber/byterange.py +++ urlgrabber-3.10.2/urlgrabber/byterange.py @@ -20,22 +20,48 @@ import os import stat -import urllib -import urllib2 -import rfc822 + +from six.moves import urllib + +import six + +if six.PY3: + import email.utils + from mailbox import Message + from urllib import request + BASEHANDLERCLASS = urllib.request.BaseHandler + BASEFILEHCLASS = urllib.request.FileHandler + BASEFTPHCLASS = urllib.request.FTPHandler +else: + import rfc822 + import urllib2 + from mimetools import Message + BASEHANDLERCLASS = urllib2.BaseHandler + BASEFILEHCLASS = urllib2.FileHandler + BASEFTPHCLASS = urllib2.FTPHandler + + +import ftplib +import socket +import sys +import mimetypes + DEBUG = None try: from cStringIO import StringIO -except ImportError, msg: - from StringIO import StringIO +except ImportError as msg: + try: + from StringIO import StringIO + except ImportError: + from io import StringIO class RangeError(IOError): """Error raised when an unsatisfiable range is requested.""" pass -class HTTPRangeHandler(urllib2.BaseHandler): +class HTTPRangeHandler(BASEHANDLERCLASS): """Handler that enables HTTP Range headers. This was extremely simple. The Range header is a HTTP feature to @@ -48,15 +74,15 @@ class HTTPRangeHandler(urllib2.BaseHandl import byterange range_handler = range.HTTPRangeHandler() - opener = urllib2.build_opener(range_handler) + opener = urllib.request.build_opener(range_handler) # install it - urllib2.install_opener(opener) + urllib.request.install_opener(opener) # create Request and set Range header - req = urllib2.Request('http://www.python.org/') + req = urllib.request.Request('http://www.python.org/') req.header['Range'] = 'bytes=30-50' - f = urllib2.urlopen(req) + f = urllib.request.urlopen(req) """ def http_error_206(self, req, fp, code, msg, hdrs): @@ -120,7 +146,7 @@ class RangeableFileObject: in self.fo. This includes methods.""" if hasattr(self.fo, name): return getattr(self.fo, name) - raise AttributeError, name + raise AttributeError(name) def tell(self): """Return the position within the range. @@ -211,25 +237,27 @@ class RangeableFileObject: raise RangeError(9, 'Requested Range Not Satisfiable') pos+= bufsize -class FileRangeHandler(urllib2.FileHandler): +class FileRangeHandler(BASEFILEHCLASS): """FileHandler subclass that adds Range support. This class handles Range headers exactly like an HTTP server would. """ def open_local_file(self, req): import mimetypes - import mimetools host = req.get_host() file = req.get_selector() localfile = urllib.url2pathname(file) stats = os.stat(localfile) size = stats[stat.ST_SIZE] - modified = rfc822.formatdate(stats[stat.ST_MTIME]) + if six.PY3: + modified = email.utils.formatdate(stats[stat.ST_MTIME]) + else: + modified = rfc822.formatdate(stats[stat.ST_MTIME]) mtype = mimetypes.guess_type(file)[0] if host: host, port = urllib.splitport(host) if port or socket.gethostbyname(host) not in self.get_names(): - raise urllib2.URLError('file not on local host') + raise urllib.error.URLError('file not on local host') fo = open(localfile,'rb') brange = req.headers.get('Range',None) brange = range_header_to_tuple(brange) @@ -241,7 +269,7 @@ class FileRangeHandler(urllib2.FileHandl raise RangeError(9, 'Requested Range Not Satisfiable') size = (lb - fb) fo = RangeableFileObject(fo, (fb,lb)) - headers = mimetools.Message(StringIO( + headers = Message(StringIO( 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified))) return urllib.addinfourl(fo, headers, 'file:'+file) @@ -254,19 +282,11 @@ class FileRangeHandler(urllib2.FileHandl # follows: # -- range support modifications start/end here -from urllib import splitport, splituser, splitpasswd, splitattr, \ - unquote, addclosehook, addinfourl -import ftplib -import socket -import sys -import mimetypes -import mimetools - -class FTPRangeHandler(urllib2.FTPHandler): +class FTPRangeHandler(BASEFTPHCLASS): def ftp_open(self, req): host = req.get_host() if not host: - raise IOError, ('ftp error', 'no host given') + raise IOError('ftp error', 'no host given') host, port = splitport(host) if port is None: port = ftplib.FTP_PORT @@ -274,22 +294,22 @@ class FTPRangeHandler(urllib2.FTPHandler port = int(port) # username/password handling - user, host = splituser(host) + user, host = urllib.splituser(host) if user: - user, passwd = splitpasswd(user) + user, passwd = urllib.splitpasswd(user) else: passwd = None - host = unquote(host) - user = unquote(user or '') - passwd = unquote(passwd or '') + host = urllib.unquote(host) + user = urllib.unquote(user or '') + passwd = urllib.unquote(passwd or '') try: host = socket.gethostbyname(host) - except socket.error, msg: - raise urllib2.URLError(msg) - path, attrs = splitattr(req.get_selector()) + except socket.error as msg: + raise urllib.error.URLError(msg) + path, attrs = urllib.splitattr(req.get_selector()) dirs = path.split('/') - dirs = map(unquote, dirs) + dirs = map(urllib.unquote, dirs) dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] @@ -297,7 +317,7 @@ class FTPRangeHandler(urllib2.FTPHandler fw = self.connect_ftp(user, passwd, host, port, dirs) type = file and 'I' or 'D' for attr in attrs: - attr, value = splitattr(attr) + attr, value = urllib.splitattr(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() @@ -336,16 +356,16 @@ class FTPRangeHandler(urllib2.FTPHandler if retrlen is not None and retrlen >= 0: headers += "Content-Length: %d\n" % retrlen sf = StringIO(headers) - headers = mimetools.Message(sf) - return addinfourl(fp, headers, req.get_full_url()) - except ftplib.all_errors, msg: - raise IOError, ('ftp error', msg), sys.exc_info()[2] + headers = Message(sf) + return urllib.addinfourl(fp, headers, req.get_full_url()) + except ftplib.all_errors as msg: + raise IOError(('ftp error', msg), sys.exc_info()[2]) def connect_ftp(self, user, passwd, host, port, dirs): fw = ftpwrapper(user, passwd, host, port, dirs) return fw -class ftpwrapper(urllib.ftpwrapper): +class ftpwrapper(BASEFTPHCLASS): # range support note: # this ftpwrapper code is copied directly from # urllib. The only enhancement is to add the rest @@ -364,22 +384,22 @@ class ftpwrapper(urllib.ftpwrapper): # Use nlst to see if the file exists at all try: self.ftp.nlst(file) - except ftplib.error_perm, reason: - raise IOError, ('ftp error', reason), sys.exc_info()[2] + except ftplib.error_perm as reason: + raise IOError(('ftp error', reason), sys.exc_info()[2]) # Restore the transfer mode! self.ftp.voidcmd(cmd) # Try to retrieve as a file try: cmd = 'RETR ' + file conn = self.ftp.ntransfercmd(cmd, rest) - except ftplib.error_perm, reason: + except ftplib.error_perm as reason: if str(reason)[:3] == '501': # workaround for REST not supported error fp, retrlen = self.retrfile(file, type) fp = RangeableFileObject(fp, (rest,'')) return (fp, retrlen) elif str(reason)[:3] != '550': - raise IOError, ('ftp error', reason), sys.exc_info()[2] + raise IOError(('ftp error', reason), sys.exc_info()[2]) if not conn: # Set transfer mode to ASCII! self.ftp.voidcmd('TYPE A') @@ -389,7 +409,7 @@ class ftpwrapper(urllib.ftpwrapper): conn = self.ftp.ntransfercmd(cmd) self.busy = 1 # Pass back both a suitably decorated object and a retrieval length - return (addclosehook(conn[0].makefile('rb'), + return (urllib.addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1]) --- urlgrabber-3.10.2.orig/urlgrabber/grabber.py +++ urlgrabber-3.10.2/urlgrabber/grabber.py @@ -290,7 +290,7 @@ GENERAL ARGUMENTS (kwargs) What type of name to IP resolving to use, default is to do both IPV4 and IPV6. - async = (key, limit) + myasync = (key, limit) When this option is set, the urlgrab() is not processed immediately but queued. parallel_wait() then processes grabs in parallel, limiting @@ -309,7 +309,7 @@ GENERAL ARGUMENTS (kwargs) default_speed, half_life - These options only affect the async mirror selection code. + These options only affect the myasync mirror selection code. The default_speed option sets the speed estimate for mirrors we have never downloaded from, and defaults to 1 MBps. @@ -519,22 +519,25 @@ BANDWIDTH THROTTLING import os import sys -import urlparse import time import string -import urllib -import urllib2 -from httplib import responses -import mimetools -import thread + +try: + from mimetools import Message +except ImportError: + from mailbox import Message + +try: + import thread +except ImportError: + import _thread + import types import stat import pycurl from ftplib import parse150 -from StringIO import StringIO -from httplib import HTTPException import socket, select, fcntl -from byterange import range_tuple_normalize, range_tuple_to_header, RangeError +from .byterange import range_tuple_normalize, range_tuple_to_header, RangeError try: import xattr @@ -543,6 +546,14 @@ try: except ImportError: xattr = None +from six import StringIO +from six.moves.http_client import HTTPException +from six.moves.http_client import responses +from six.moves import urllib +import subprocess + +import six + ######################################################################## # MODULE INITIALIZATION @@ -555,7 +566,7 @@ except: try: # this part isn't going to do much - need to talk to gettext from i18n import _ -except ImportError, msg: +except ImportError as msg: def _(st): return st ######################################################################## @@ -654,9 +665,9 @@ def _(st): # These functions are meant to be utilities for the urlgrabber library to use. def _to_utf8(obj, errors='replace'): - '''convert 'unicode' to an encoded utf-8 byte string ''' + '''convert 'six.text_type' to an encoded utf-8 byte string ''' # stolen from yum.i18n - if isinstance(obj, unicode): + if isinstance(obj, six.text_type): obj = obj.encode('utf-8', errors) return obj @@ -665,7 +676,7 @@ def exception2msg(e): return str(e) except UnicodeEncodeError: # always use byte strings - return unicode(e).encode('utf8') + return six.text_type(e).encode('utf8') ######################################################################## # END UTILITY FUNCTIONS @@ -718,10 +729,10 @@ class URLGrabError(IOError): You can use these error codes like so: try: urlgrab(url) - except URLGrabError, e: + except URLGrabError as e: if e.errno == 3: ... # or - print e.strerror + print(e.strerror) # or simply print e #### print '[Errno %i] %s' % (e.errno, e.strerror) """ @@ -811,14 +822,14 @@ class URLParser: if opts.prefix: url = self.add_prefix(url, opts.prefix) - parts = urlparse.urlparse(url) + parts = urllib.parse.urlparse(url) (scheme, host, path, parm, query, frag) = parts if not scheme or (len(scheme) == 1 and scheme in string.letters): # if a scheme isn't specified, we guess that it's "file:" if url[0] not in '/\\': url = os.path.abspath(url) - url = 'file:' + urllib.pathname2url(url) - parts = urlparse.urlparse(url) + url = 'file:' + urllib.request.pathname2url(url) + parts = urllib.parse.urlparse(url) quote = 0 # pathname2url quotes, so we won't do it again if scheme in ['http', 'https']: @@ -829,7 +840,7 @@ class URLParser: if quote: parts = self.quote(parts) - url = urlparse.urlunparse(parts) + url = urllib.parse.urlunparse(parts) return url, parts def add_prefix(self, url, prefix): @@ -853,7 +864,7 @@ class URLParser: passing into urlgrabber. """ (scheme, host, path, parm, query, frag) = parts - path = urllib.quote(path) + path = urllib.parse.quote(path) return (scheme, host, path, parm, query, frag) hexvals = '0123456789ABCDEF' @@ -870,7 +881,7 @@ class URLParser: (scheme, host, path, parm, query, frag) = parts if ' ' in path: return 1 - ind = string.find(path, '%') + ind = path.find('%') if ind > -1: while ind > -1: if len(path) < ind+3: @@ -879,7 +890,7 @@ class URLParser: if code[0] not in self.hexvals or \ code[1] not in self.hexvals: return 1 - ind = string.find(path, '%', ind+1) + ind = path.find('%', ind+1) return 0 return 1 @@ -899,7 +910,7 @@ class URLGrabberOptions: def __getattr__(self, name): if self.delegate and hasattr(self.delegate, name): return getattr(self.delegate, name) - raise AttributeError, name + raise AttributeError(name) def raw_throttle(self): """Calculate raw throttle value from throttle and bandwidth @@ -957,7 +968,7 @@ class URLGrabberOptions: def _set_attributes(self, **kwargs): """Update object attributes with those provided in kwargs.""" self.__dict__.update(kwargs) - if kwargs.has_key('range'): + if 'range' in kwargs: # normalize the supplied range value self.range = range_tuple_normalize(self.range) if not self.reget in [None, 'simple', 'check_timestamp']: @@ -1014,7 +1025,7 @@ class URLGrabberOptions: self.size = None # if we know how big the thing we're getting is going # to be. this is ultimately a MAXIMUM size for the file self.max_header_size = 2097152 #2mb seems reasonable for maximum header size - self.async = None # blocking by default + self.myasync = None # blocking by default self.mirror_group = None self.max_connections = 5 self.timedhosts = None @@ -1086,10 +1097,10 @@ class URLGrabber(object): r = apply(func, (opts,) + args, {}) if DEBUG: DEBUG.info('success') return r - except URLGrabError, e: + except URLGrabError as e: exception = e callback = opts.failure_callback - except KeyboardInterrupt, e: + except KeyboardInterrupt as e: exception = e callback = opts.interrupt_callback if not callback: @@ -1144,7 +1155,7 @@ class URLGrabber(object): (scheme, host, path, parm, query, frag) = parts opts.find_proxy(url, scheme) if filename is None: - filename = os.path.basename( urllib.unquote(path) ) + filename = os.path.basename( urllib.parse.unquote(path) ) if not filename: # This is better than nothing. filename = 'index.html' @@ -1171,7 +1182,7 @@ class URLGrabber(object): _run_callback(opts.checkfunc, obj) return path - if opts.async: + if opts.myasync: opts.url = url opts.filename = filename opts.size = int(opts.size or 0) @@ -1195,7 +1206,7 @@ class URLGrabber(object): try: return self._retry(opts, retryfunc, url, filename) - except URLGrabError, e: + except URLGrabError as e: _TH.update(url, 0, 0, e) opts.exception = e return _run_callback(opts.failfunc, opts) @@ -1260,13 +1271,13 @@ class PyCurlFileObject(object): self._hdr_dump = '' self._parsed_hdr = None self.url = url - self.scheme = urlparse.urlsplit(self.url)[0] + self.scheme = urllib.parse.urlsplit(self.url)[0] self.filename = filename self.append = False self.reget_time = None self.opts = opts if self.opts.reget == 'check_timestamp': - raise NotImplementedError, "check_timestamp regets are not implemented in this ver of urlgrabber. Please report this." + raise NotImplementedError("check_timestamp regets are not implemented in this ver of urlgrabber. Please report this.") self._complete = False self._rbuf = '' self._rbufsize = 1024*8 @@ -1291,7 +1302,7 @@ class PyCurlFileObject(object): if hasattr(self.fo, name): return getattr(self.fo, name) - raise AttributeError, name + raise AttributeError(name) def _retrieve(self, buf): try: @@ -1305,7 +1316,7 @@ class PyCurlFileObject(object): if self.opts.progress_obj: size = self.size + self._reget_length self.opts.progress_obj.start(self._prog_reportname, - urllib.unquote(self.url), + urllib.parse.unquote(self.url), self._prog_basename, size=size, text=self.opts.text) @@ -1323,7 +1334,7 @@ class PyCurlFileObject(object): self.fo.write(buf[max(start, 0):stop]) else: self.fo.write(buf) - except IOError, e: + except IOError as e: self._cb_error = URLGrabError(16, exception2msg(e)) return -1 return len(buf) @@ -1370,7 +1381,7 @@ class PyCurlFileObject(object): if buf.lower().find('location') != -1: location = ':'.join(buf.split(':')[1:]) location = location.strip() - self.scheme = urlparse.urlsplit(location)[0] + self.scheme = urllib.parse.urlsplit(location)[0] self.url = location self._hdr_dump += buf @@ -1390,7 +1401,7 @@ class PyCurlFileObject(object): hdrfp = StringIO() hdrfp.write(self._hdr_dump[statusend:]) hdrfp.seek(0) - self._parsed_hdr = mimetools.Message(hdrfp) + self._parsed_hdr = Message(hdrfp) return self._parsed_hdr hdr = property(_return_hdr_obj) @@ -1519,7 +1530,7 @@ class PyCurlFileObject(object): try: self.curl_obj.perform() - except pycurl.error, e: + except pycurl.error as e: # XXX - break some of these out a bit more clearly # to other URLGrabErrors from # http://curl.haxx.se/libcurl/c/libcurl-errors.html @@ -1527,7 +1538,7 @@ class PyCurlFileObject(object): code = self.http_code errcode = e.args[0] - errurl = urllib.unquote(self.url) + errurl = urllib.parse.unquote(self.url) if self._error[0]: errcode = self._error[0] @@ -1618,7 +1629,7 @@ class PyCurlFileObject(object): if self._error[1]: msg = self._error[1] err = URLGrabError(14, msg) - err.url = urllib.unquote(self.url) + err.url = urllib.parse.unquote(self.url) raise err def _do_open(self): @@ -1685,22 +1696,22 @@ class PyCurlFileObject(object): else: fo = opener.open(req) hdr = fo.info() - except ValueError, e: + except ValueError as e: err = URLGrabError(1, _('Bad URL: %s : %s') % (self.url, e, )) err.url = self.url raise err - except RangeError, e: + except RangeError as e: err = URLGrabError(9, _('%s on %s') % (e, self.url)) err.url = self.url raise err - except urllib2.HTTPError, e: + except urllib.error.HTTPError as e: new_e = URLGrabError(14, _('%s on %s') % (e, self.url)) new_e.code = e.code new_e.exception = e new_e.url = self.url raise new_e - except IOError, e: + except IOError as e: if hasattr(e, 'reason') and isinstance(e.reason, socket.timeout): err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) err.url = self.url @@ -1710,12 +1721,12 @@ class PyCurlFileObject(object): err.url = self.url raise err - except OSError, e: + except OSError as e: err = URLGrabError(5, _('%s on %s') % (e, self.url)) err.url = self.url raise err - except HTTPException, e: + except HTTPException as e: err = URLGrabError(7, _('HTTP Exception (%s) on %s: %s') % \ (e.__class__.__name__, self.url, e)) err.url = self.url @@ -1742,7 +1753,7 @@ class PyCurlFileObject(object): (self.filename, mode)) try: self.fo = open(self.filename, mode) - except IOError, e: + except IOError as e: err = URLGrabError(16, _(\ 'error opening local file from %s, IOError: %s') % (self.url, e)) err.url = self.url @@ -1761,7 +1772,7 @@ class PyCurlFileObject(object): try: self._do_perform() - except URLGrabError, e: + except URLGrabError as e: self.fo.flush() self.fo.close() raise e @@ -1784,7 +1795,7 @@ class PyCurlFileObject(object): if mod_time != -1: try: os.utime(self.filename, (mod_time, mod_time)) - except OSError, e: + except OSError as e: err = URLGrabError(16, _(\ 'error setting timestamp on file %s from %s, OSError: %s') % (self.filename, self.url, e)) @@ -1793,7 +1804,7 @@ class PyCurlFileObject(object): # re open it try: self.fo = open(self.filename, 'r') - except IOError, e: + except IOError as e: err = URLGrabError(16, _(\ 'error opening file from %s, IOError: %s') % (self.url, e)) err.url = self.url @@ -1839,17 +1850,17 @@ class PyCurlFileObject(object): else: readamount = min(amt, self._rbufsize) try: new = self.fo.read(readamount) - except socket.error, e: + except socket.error as e: err = URLGrabError(4, _('Socket Error on %s: %s') % (self.url, e)) err.url = self.url raise err - except socket.timeout, e: + except socket.timeout as e: raise URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) err.url = self.url raise err - except IOError, e: + except IOError as e: raise URLGrabError(4, _('IOError on %s: %s') %(self.url, e)) err.url = self.url raise err @@ -1865,7 +1876,7 @@ class PyCurlFileObject(object): #if self.opts.progress_obj: # self.opts.progress_obj.update(self._amount_read) - self._rbuf = string.join(buf, '') + self._rbuf = buf.join('') return def _progress_update(self, download_total, downloaded, upload_total, uploaded): @@ -1909,12 +1920,12 @@ class PyCurlFileObject(object): if not self._complete: self._do_grab() return self.fo.readline() - i = string.find(self._rbuf, '\n') + i = self._rbuf.find('\n') while i < 0 and not (0 < limit <= len(self._rbuf)): L = len(self._rbuf) self._fill_buffer(L + self._rbufsize) if not len(self._rbuf) > L: break - i = string.find(self._rbuf, '\n', L) + i = self._rbuf.find('\n', L) if i < 0: i = len(self._rbuf) else: i = i+1 @@ -2000,7 +2011,7 @@ def _dumps(v): if v is False: return 'False' if type(v) in (int, long, float): return str(v) - if type(v) == unicode: + if type(v) == six.text_type: v = v.encode('UTF8') if type(v) == str: def quoter(c): return _quoter_map.get(c, c) @@ -2009,7 +2020,7 @@ def _dumps(v): return "(%s)" % ','.join(map(_dumps, v)) if type(v) == list: return "[%s]" % ','.join(map(_dumps, v)) - raise TypeError, 'Can\'t serialize %s' % v + raise TypeError('Can\'t serialize %s' % v) def _loads(s): def decode(v): @@ -2067,7 +2078,6 @@ def _readlines(fd): buf += os.read(fd, 4096) return buf[:-1].split('\n') -import subprocess class _ExternalDownloader: def __init__(self): @@ -2136,7 +2146,7 @@ class _ExternalDownloader: if line[5] != '0': ug_err.code = int(line[5]) if DEBUG: DEBUG.info('failure: %s', ug_err) - _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0]) + _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.myasync[0]) ret.append((opts, size, ug_err)) return ret @@ -2152,7 +2162,7 @@ class _ExternalDownloaderPool: self.cache = {} def start(self, opts): - host = urlparse.urlsplit(opts.url).netloc + host = urllib.parse.urlsplit(opts.url).netloc dl = self.cache.pop(host, None) if not dl: dl = _ExternalDownloader() @@ -2175,7 +2185,7 @@ class _ExternalDownloaderPool: ret.extend(done) # dl finished, move it to the cache - host = urlparse.urlsplit(done[0][0].url).netloc + host = urllib.parse.urlsplit(done[0][0].url).netloc if host in self.cache: self.cache[host].abort() self.epoll.unregister(fd) self.cache[host] = self.running.pop(fd) @@ -2190,7 +2200,7 @@ class _ExternalDownloaderPool: ##################################################################### -# High level async API +# High level myasync API ##################################################################### _async_queue = [] @@ -2220,13 +2230,13 @@ def parallel_wait(meter=None): opts.tries = tries try: dl.start(opts) - except OSError, e: + except OSError as e: # can't spawn downloader, give up immediately opts.exception = URLGrabError(5, exception2msg(e)) _run_callback(opts.failfunc, opts) return - key, limit = opts.async + key, limit = opts.myasync host_con[key] = host_con.get(key, 0) + 1 if opts.progress_obj: if opts.multi_progress_obj: @@ -2237,13 +2247,13 @@ def parallel_wait(meter=None): def perform(): for opts, size, ug_err in dl.perform(): - key, limit = opts.async + key, limit = opts.myasync host_con[key] -= 1 if ug_err is None: if opts.checkfunc: try: _run_callback(opts.checkfunc, opts) - except URLGrabError, ug_err: pass + except URLGrabError as ug_err: pass if opts.progress_obj: if opts.multi_progress_obj: @@ -2274,7 +2284,7 @@ def parallel_wait(meter=None): if opts.failure_callback: opts.exception = ug_err try: _run_callback(opts.failure_callback, opts) - except URLGrabError, ug_err: + except URLGrabError as ug_err: retry = 0 # no retries if opts.tries < retry and ug_err.errno in opts.retrycodes: if ug_err.errno < 0 and opts.retry_no_cache: @@ -2364,7 +2374,7 @@ def parallel_wait(meter=None): # update the current mirror and limit key = best['mirror'] limit = best.get('kwargs', {}).get('max_connections') - opts.async = key, limit + opts.myasync = key, limit # update URL and proxy url = mg._join_url(key, opts.relative_url) @@ -2373,7 +2383,7 @@ def parallel_wait(meter=None): opts.url = url # check host limit, then start - key, limit = opts.async + key, limit = opts.myasync if key in single: limit = 1 while host_con.get(key, 0) >= (limit or 2): @@ -2382,7 +2392,7 @@ def parallel_wait(meter=None): DEBUG.info('max_connections(%s): %d/%s', key, host_con.get(key, 0), limit) start(opts, 1) - except IOError, e: + except IOError as e: if e.errno != 4: raise raise KeyboardInterrupt @@ -2435,7 +2445,7 @@ class _TH: def update(url, dl_size, dl_time, ug_err, baseurl=None): # Use hostname from URL. If it's a file:// URL, use baseurl. # If no baseurl, do not update timedhosts. - host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl + host = urllib.parse.urlsplit(url).netloc.split('@')[-1] or baseurl if not host: return _TH.load() @@ -2467,7 +2477,7 @@ class _TH: _TH.load() # Use just the hostname, unless it's a file:// baseurl. - host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl + host = urllib.parse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl default_speed = default_grabber.opts.default_speed try: speed, fail, ts = _TH.hosts[host] @@ -2483,68 +2493,67 @@ class _TH: def _main_test(): try: url, filename = sys.argv[1:3] except ValueError: - print 'usage:', sys.argv[0], \ - '<url> <filename> [copy_local=0|1] [close_connection=0|1]' + print('usage:', sys.argv[0], + '<url> <filename> [copy_local=0|1] [close_connection=0|1]') sys.exit() kwargs = {} for a in sys.argv[3:]: - k, v = string.split(a, '=', 1) + k, v = a.split('=', 1) kwargs[k] = int(v) set_throttle(1.0) set_bandwidth(32 * 1024) - print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, - default_grabber.bandwidth) + print("throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, + default_grabber.bandwidth)) try: from progress import text_progress_meter - except ImportError, e: pass + except ImportError as e: pass else: kwargs['progress_obj'] = text_progress_meter() try: name = apply(urlgrab, (url, filename), kwargs) - except URLGrabError, e: print e - else: print 'LOCAL FILE:', name + except URLGrabError as e: print(e) + else: print('LOCAL FILE:', name) def _retry_test(): try: url, filename = sys.argv[1:3] except ValueError: - print 'usage:', sys.argv[0], \ - '<url> <filename> [copy_local=0|1] [close_connection=0|1]' + print('usage:', sys.argv[0], \ + '<url> <filename> [copy_local=0|1] [close_connection=0|1]') sys.exit() kwargs = {} for a in sys.argv[3:]: - k, v = string.split(a, '=', 1) + k, v = a.split('=', 1) kwargs[k] = int(v) try: from progress import text_progress_meter - except ImportError, e: pass + except ImportError as e: pass else: kwargs['progress_obj'] = text_progress_meter() def cfunc(filename, hello, there='foo'): - print hello, there + print(hello, there) import random rnum = random.random() if rnum < .5: - print 'forcing retry' + print('forcing retry') raise URLGrabError(-1, 'forcing retry') if rnum < .75: - print 'forcing failure' + print('forcing failure') raise URLGrabError(-2, 'forcing immediate failure') - print 'success' + print('success') return kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'}) try: name = apply(retrygrab, (url, filename), kwargs) - except URLGrabError, e: print e - else: print 'LOCAL FILE:', name + except URLGrabError as e: print(e) + else: print('LOCAL FILE:', name) def _file_object_test(filename=None): - import cStringIO if filename is None: filename = __file__ - print 'using file "%s" for comparisons' % filename + print('using file "%s" for comparisons' % filename) fo = open(filename) s_input = fo.read() fo.close() @@ -2553,14 +2562,14 @@ def _file_object_test(filename=None): _test_file_object_readall, _test_file_object_readline, _test_file_object_readlines]: - fo_input = cStringIO.StringIO(s_input) - fo_output = cStringIO.StringIO() + fo_input = StringIO(s_input) + fo_output = StringIO() wrapper = PyCurlFileObject(fo_input, None, 0) - print 'testing %-30s ' % testfunc.__name__, + print('testing %-30s ' % testfunc.__name__) testfunc(wrapper, fo_output) s_output = fo_output.getvalue() - if s_output == s_input: print 'passed' - else: print 'FAILED' + if s_output == s_input: print('passed') + else: print('FAILED') def _test_file_object_smallread(wrapper, fo_output): while 1: @@ -2580,7 +2589,7 @@ def _test_file_object_readline(wrapper, def _test_file_object_readlines(wrapper, fo_output): li = wrapper.readlines() - fo_output.write(string.join(li, '')) + fo_output.write(li.join('')) if __name__ == '__main__': _main_test() --- urlgrabber-3.10.2.orig/urlgrabber/mirror.py +++ urlgrabber-3.10.2/urlgrabber/mirror.py @@ -100,6 +100,9 @@ from grabber import _run_callback, _do_r from grabber import exception2msg from grabber import _TH +import six + + def _(st): return st @@ -286,7 +289,7 @@ class MirrorGroup: def _parse_mirrors(self, mirrors): parsed_mirrors = [] for m in mirrors: - if isinstance(m, basestring): + if isinstance(m, six.string_types): m = {'mirror': _to_utf8(m)} parsed_mirrors.append(m) return parsed_mirrors @@ -423,7 +426,7 @@ class MirrorGroup: if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) try: return func_ref( *(fullurl,), opts=opts, **kw ) - except URLGrabError, e: + except URLGrabError as e: if DEBUG: DEBUG.info('MIRROR: failed') gr.errors.append((fullurl, exception2msg(e))) obj = CallbackObject() @@ -437,8 +440,8 @@ class MirrorGroup: def urlgrab(self, url, filename=None, **kwargs): kw = dict(kwargs) kw['filename'] = filename - if kw.get('async'): - # enable mirror failovers in async path + if kw.get('myasync'): + # enable mirror failovers in myasync path kw['mirror_group'] = self, [], {}, set() kw['relative_url'] = url else: @@ -446,7 +449,7 @@ class MirrorGroup: func = 'urlgrab' try: return self._mirror_try(func, url, kw) - except URLGrabError, e: + except URLGrabError as e: obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs) return _run_callback(kwargs.get('failfunc', _do_raise), obj) --- urlgrabber-3.10.2.orig/urlgrabber/progress.py +++ urlgrabber-3.10.2/urlgrabber/progress.py @@ -778,7 +778,7 @@ def format_number(number, SI=0, space=' depth = depth + 1 number = number / step - if type(number) == type(1) or type(number) == type(1L): + if type(number) == type(1) or type(number) == type(1): # it's an int or a long, which means it didn't get divided, # which means it's already short enough format = '%i%s%s' @@ -806,7 +806,7 @@ def _tst(fn, cur, tot, beg, size, *args) tm.end(size) def _mtst(datas, *args): - print '-' * 79 + print('-' * 79) tm = TextMultiFileMeter(threaded=False) dl_sizes = {}
_______________________________________________ Yum-devel mailing list Yum-devel@lists.baseurl.org http://lists.baseurl.org/mailman/listinfo/yum-devel