On Fri, 2011-10-21 at 16:28 +0200, Zdeněk Pavlas wrote:
> When executed with a single argument 'DOWNLOADER', grabber.py
> parses download requests on stdin, and reports the results to stdout.
> ---
>  urlgrabber/grabber.py |   57 
> ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 56 insertions(+), 1 deletions(-)

 Ok, so this is the contained external downloader ... 

> diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
> index b64c943..6d75c31 100644
> --- a/urlgrabber/grabber.py
> +++ b/urlgrabber/grabber.py
> @@ -455,7 +455,7 @@ import pycurl
>  from ftplib import parse150
>  from StringIO import StringIO
>  from httplib import HTTPException
> -import socket
> +import socket, select
>  from byterange import range_tuple_normalize, range_tuple_to_header, 
> RangeError
>  
>  try:
> @@ -1899,6 +1899,58 @@ class _DirectDownloader:
>              fo._do_close_fo()
>              os.unlink(fo.opts.filename)
>  
> +class _ProxyProgress:
> +    def start(*d1, **d2): pass
> +    def update(self, _amount_read):
> +        os.write(1, '%d %d\n' % (self._id, _amount_read))

 Don't you need "end" here too?

> +import simplejson

 Is this really necessary ... how big is the cost?

> +def download_process():
> +    ''' Download process
> +        - watch stdin for new requests, parse & issue em.
> +        - use ProxyProgress to send _amount_read during dl.
> +        - abort on EOF.
> +    '''
> +    dl = _DirectDownloader()
> +    cnt = tout = 0
> +    while True:
> +        fdset = dl.multi.fdset()
> +        fdset[0].append(0)
> +        if 0 in select.select(*(fdset + (tout,)))[0]:

 Again, select.poll() code is going to be 666 times easier to read.

> +            buf = os.read(0, 4096)
> +            if not buf: break # EOF
> +            while buf:
> +                try: line, buf = buf.split('\n', 1)
> +                except ValueError:
> +                    buf += os.read(0, 4096)
> +                    continue

 This is basically a blocking readline() call, which we can probably
live with (although it can suck). But at least put it behind some
method.

> +                # start new download
> +                cnt += 1
> +                opts = URLGrabberOptions()
> +                opts._id = cnt
> +                opts.progress_obj = _ProxyProgress()
> +                opts.progress_obj._id = cnt
> +                for k in line.split(' '):
> +                    k, v = k.split('=', 1)
> +                    v = urllib.unquote(v)
> +                    v = simplejson.loads(v)
> +                    setattr(opts, k, v)
> +                dl.start(opts)
> +
> +            # XXX: likely a CurlMulti() bug
> +            # fdset() is empty shortly after starting new request.
> +            # Do some polling to work this around.
> +            tout = 10e-3

 Shocker, workarounds for CurlMulti weirdness.

> +        # perform requests
> +        for opts, ug_err, _amount_read in dl.perform():
> +            ug_err = ug_err and '%d %s' % ug_err.args or 'OK'
> +            os.write(1, '%d %d %s\n' % (opts._id, _amount_read, ug_err))
> +        tout = min(tout * 1.1, 5)
> +    dl.abort()
> +    sys.exit(0)
> +
>  
>  #####################################################################
>  #  High level async API
> @@ -2122,6 +2174,9 @@ def _test_file_object_readlines(wrapper, fo_output):
>      fo_output.write(string.join(li, ''))
>  
>  if __name__ == '__main__':
> +    if sys.argv[1:] == ['DOWNLOADER']:
> +        download_process()

 Is it a big benefit to use __file__ instead of creating something in
libexec/whatever?

>      _main_test()
>      _retry_test()
>      _file_object_test('test')


_______________________________________________
Yum-devel mailing list
Yum-devel@lists.baseurl.org
http://lists.baseurl.org/mailman/listinfo/yum-devel

Reply via email to