On Fri, 2011-10-21 at 16:28 +0200, Zdeněk Pavlas wrote: > When executed with a single argument 'DOWNLOADER', grabber.py > parses download requests on stdin, and reports the results to stdout. > --- > urlgrabber/grabber.py | 57 > ++++++++++++++++++++++++++++++++++++++++++++++++- > 1 files changed, 56 insertions(+), 1 deletions(-)
Ok, so this is the contained external downloader ... > diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py > index b64c943..6d75c31 100644 > --- a/urlgrabber/grabber.py > +++ b/urlgrabber/grabber.py > @@ -455,7 +455,7 @@ import pycurl > from ftplib import parse150 > from StringIO import StringIO > from httplib import HTTPException > -import socket > +import socket, select > from byterange import range_tuple_normalize, range_tuple_to_header, > RangeError > > try: > @@ -1899,6 +1899,58 @@ class _DirectDownloader: > fo._do_close_fo() > os.unlink(fo.opts.filename) > > +class _ProxyProgress: > + def start(*d1, **d2): pass > + def update(self, _amount_read): > + os.write(1, '%d %d\n' % (self._id, _amount_read)) Don't you need "end" here too? > +import simplejson Is this really necessary ... how big is the cost? > +def download_process(): > + ''' Download process > + - watch stdin for new requests, parse & issue em. > + - use ProxyProgress to send _amount_read during dl. > + - abort on EOF. > + ''' > + dl = _DirectDownloader() > + cnt = tout = 0 > + while True: > + fdset = dl.multi.fdset() > + fdset[0].append(0) > + if 0 in select.select(*(fdset + (tout,)))[0]: Again, select.poll() code is going to be 666 times easier to read. > + buf = os.read(0, 4096) > + if not buf: break # EOF > + while buf: > + try: line, buf = buf.split('\n', 1) > + except ValueError: > + buf += os.read(0, 4096) > + continue This is basically a blocking readline() call, which we can probably live with (although it can suck). But at least put it behind some method. > + # start new download > + cnt += 1 > + opts = URLGrabberOptions() > + opts._id = cnt > + opts.progress_obj = _ProxyProgress() > + opts.progress_obj._id = cnt > + for k in line.split(' '): > + k, v = k.split('=', 1) > + v = urllib.unquote(v) > + v = simplejson.loads(v) > + setattr(opts, k, v) > + dl.start(opts) > + > + # XXX: likely a CurlMulti() bug > + # fdset() is empty shortly after starting new request. > + # Do some polling to work this around. > + tout = 10e-3 Shocker, workarounds for CurlMulti weirdness. > + # perform requests > + for opts, ug_err, _amount_read in dl.perform(): > + ug_err = ug_err and '%d %s' % ug_err.args or 'OK' > + os.write(1, '%d %d %s\n' % (opts._id, _amount_read, ug_err)) > + tout = min(tout * 1.1, 5) > + dl.abort() > + sys.exit(0) > + > > ##################################################################### > # High level async API > @@ -2122,6 +2174,9 @@ def _test_file_object_readlines(wrapper, fo_output): > fo_output.write(string.join(li, '')) > > if __name__ == '__main__': > + if sys.argv[1:] == ['DOWNLOADER']: > + download_process() Is it a big benefit to use __file__ instead of creating something in libexec/whatever? > _main_test() > _retry_test() > _file_object_test('test') _______________________________________________ Yum-devel mailing list Yum-devel@lists.baseurl.org http://lists.baseurl.org/mailman/listinfo/yum-devel