I suspect you're going to have to subclass the urllib2 Opener class (es) and use a timeoutsocket where they create sockets. Making this be an option upstream would be a valuable addition to Python, FWIW.

The other way to do this would be to ditch urllib2 and write a very simple HTTP client using asyncore (which is far more malleable). One that I created based on some code I found floating around in RDFLib is attached.

# this code based on Daniel Krech's RDFLib HTTP client code (see rdflib.net)

import sys
import socket
import asyncore
import asynchat
import base64
from urlparse import urlparse

CR="\x0d"
LF="\x0a"
CRLF=CR+LF

class Listener(object):

    def status(self, url, status):
        pass

    def error(self, url, error):
        pass
    
    def response_header(self, url, name, value):
        pass
    
    def done(self, url):
        pass

    def feed(self, url, data):
        print data

    def close(self, url):
        pass

class HTTPHandler(object, asynchat.async_chat):
    def __init__(self, listener, username='', password=None):
        super(HTTPHandler, self).__init__()
        asynchat.async_chat.__init__(self)
        self.listener = listener
        self.user_agent = 'Supervisor HTTP Client'
        self.buffer = ''
        self.set_terminator(CRLF)
        self.connected = 0
        self.part = self.status_line
        self.chunk_size = 0
        self.chunk_read = 0
        self.length_read = 0        
        self.length = 0
        self.encoding = None
        self.username = username
        self.password = password
        self.url = None
        self.error_handled = False

    def get(self, url):
        assert(self.url==None, "Already doing a get") #@@
        self.url = url
        scheme, host, path, params, query, fragment = urlparse(url)
        if not scheme=="http":
            raise NotImplementedError
        self.host = host
        if ":" in host:
            hostname, port = host.split(":", 1)
            port = int(port)
        else:
            hostname = host
            port = 80

        self.path = "?".join([path, query])
        self.port = port
        
        ip = hostname
        self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
        self.connect((ip, self.port))
            
    
    def close (self):
        self.listener.close(self.url)
        self.connected = 0
        self.del_channel()
        self.socket.close()
        self.url = "CLOSED"

    def header(self, name, value):
        self.push('%s: %s' % (name, value))
        self.push(CRLF)
        
    def handle_error (self):
        if self.error_handled == True:
            return
        if 1 or self.connected:
            t,v,tb = sys.exc_info()
            print t, v, tb
            msg = 'Cannot connect to %s, error: %s' % (self.url, t)
            self.part = self.ignore                
            self.close()
            print msg
            self.error_handled = True
        
    def handle_connect(self):
        self.connected = 1        
        method = "GET"
        version = "HTTP/1.1"
        self.push("%s %s %s" % (method, self.path, version))
        self.push(CRLF)
        self.header("Host", self.host)

        self.header('Accept-Encoding', 'chunked')
        self.header('Accept', '*/*')
        self.header('User-agent', self.user_agent)
        if self.password:
            auth = '%s:%s' % (self.username, self.password)
            auth = base64.encodestring(auth).strip()
            self.header('Authorization', 'Basic %s' % auth)
        self.push(CRLF)
        self.push(CRLF)


    def feed(self, data):
        self.listener.feed(self.url, data)
        
    def collect_incoming_data(self, bytes):
        self.buffer = self.buffer + bytes
        if self.part==self.body:
            self.feed(self.buffer)
            self.buffer = ''

    def found_terminator(self):
        self.part()
        self.buffer = ''        

    def ignore(self):
        self.buffer = ''
    
    def status_line(self):
        line = self.buffer

        version, status, reason = line.split(None, 2)
        status = int(status)
        if not version.startswith('HTTP/'):
            raise ValueError(line)
            
        self.listener.status(self.url, status)
        
        if status == 200:
            self.part = self.headers
        else:
            self.part = self.ignore
            print 'Cannot read %s, status code %s' % (self.url, status)
            self.close()
        return version, status, reason

    def headers(self):
        line = self.buffer
        if not line:
            if self.encoding=="chunked":
                self.part = self.chunked_size
            else:
                self.part = self.body
                self.set_terminator(self.length)
        else:
            name, value = line.split(":", 1)
            if name and value:
                name = name.lower()
                value = value.strip()
                if name=="Transfer-Encoding".lower():
                    self.encoding = value
                elif name=="Content-Length".lower():
                    self.length = int(value)
                self.response_header(name, value)

    def response_header(self, name, value):
        self.listener.response_header(self.url, name, value)
    
    def body(self):
        self.done()
        self.close()

    def done(self):
        self.listener.done(self.url)

    def chunked_size(self):
        line = self.buffer
        if not line:
            return
        chunk_size = int(line.split()[0], 16)
        if chunk_size==0:
            self.part = self.trailer
        else:
            self.set_terminator(chunk_size)
            self.part = self.chunked_body            
        self.length += chunk_size
        
    def chunked_body(self):
        line = self.buffer
        self.set_terminator(CRLF)
        self.part = self.chunked_size
        self.feed(line)

    def trailer(self):
        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
        # trailer        = *(entity-header CRLF)
        line = self.buffer
        if line==CRLF:
            self.done()
            self.close()

if __name__ == '__main__':
    url = sys.argv[1]
    listener = Listener()
    handler = HTTPHandler(listener)
    try:
        handler.get(url)
    except Exception, e:
        listener.error(url, "Error connecting '%s'" % e)

    asyncore.loop()
    print "-"


On May 29, 2006, at 7:27 PM, Michael Vartanyan wrote:

Hello All,

This is probably more a Python question but maybe you will have a quick solution for me - I guess the greatest *multi-threaded* Python application provides the greatest basis for this problem domain :-)

The situation: external method that is doing a http request using urllib2. I don't care about the response, and whether there was any response at all, I just need to send the request through - thus I want this request to time out very fast (let it be 2 seconds). I found no documented way to set the timeout for urllib2, after some googling I found an advice to manipulate the timeout on the lower- level socket module, something like this:

import socket
import urllib2

def do_request():
 timeout = 2
 socket.setdefaulttimeout(timeout)
 req = urllib2.Request(url='http://my.site.com/do_something_quick')
 response = urllib2.urlopen(req)

The problem is this way this default timeout is set for _all_ new socket created by this Python process using the socket module. Even if I return the default to its previous state after the request it won't help me much - there are three more threads in my Zope that should be able to work with default timeout. So there are two possible solutions - to find a (preferably documented) way of accessing and parameterizing the socket object created by urllib2 to make a request or to find a way to isolate(??) global module settings between Zope threads.

Zope 2.8.3, Python 2.4.2, FreeBSD 4.10 if this is relevant.

Any hints/TFMs?

Many thanks
Michael

_______________________________________________
Zope maillist  -  Zope@zope.org
http://mail.zope.org/mailman/listinfo/zope
**   No cross posts or HTML encoding!  **
(Related lists - http://mail.zope.org/mailman/listinfo/zope-announce
http://mail.zope.org/mailman/listinfo/zope-dev )


_______________________________________________
Zope maillist  -  Zope@zope.org
http://mail.zope.org/mailman/listinfo/zope
**   No cross posts or HTML encoding!  **
(Related lists - 
 http://mail.zope.org/mailman/listinfo/zope-announce
 http://mail.zope.org/mailman/listinfo/zope-dev )

Reply via email to