Python Web Servers and Page Retrievers

Subscriber123 Sun, 08 Apr 2007 14:44:22 -0700

I wrote most of the following script, useful for retrieving pages from the
web and serving web pages. Since it is so low level, it is much more
customizable than simpleHTTPserver, cgiHTTPserver, urllib, or urllib2 for
advanced users. For example, you can easily set your own headers when
retrieving and serving pages, such as the User-Agent header which you cannot
set in either urllib or urllib2.


(sorry for not putting in any comments!)

By the way, I just threw this together quickly, and haven't really had time
to test retrieve() very much. Please let me know if it is buggy.
I guess I should also write a dictToQuery() function. Oh well.


import socket


host,port='',80

sock=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
sock.bind((host,port))
sock.listen(1)

def serve(function=lambda *args:(args[2],200,'OK',{},'')):
    """\
def serve(function(method,filename,httpversion,headers,get,post))

    Serves one request, calling function() with the above
    parameters. function() must return (httpversion,code,
    accepted,headers,content) in that order. If you don't
    pass a function, then
    function=lambda *args:(args[2],200,'OK',{},'')
"""

    csock,caddr=sock.accept()
    rfile=csock.makefile('r',0)
    wfile=csock.makefile('w',0)

    # Protocol exchange - read request
    headers={}
    line=rfile.readline().strip()
    split1=line.find(' ')
    method,remainder=line[:split1].strip(),line[split1+1:].strip()
    split2=remainder.find(' ')

filename,httpversion=remainder[:split2].strip(),remainder[split2+1:].strip()
    while 1:
        line=rfile.readline().strip()
        print line
        if line=='':
            break
        else:
            split=line.find(':')
            key,value=line[:split],line[split+1:]
            headers[key.strip()]=value.strip()

    try:
        post=rfile.read(int(headers['Content-Length']))
    except:
        post=''
    get=queryToDict(filename)
    post=queryToDict(post)
    loc=filename.find("?")
    if loc>-1:
        filename=filename[:loc]
    print "get:",`get`
    print "post:",`post`

httpversion,code,accepted,headers,content=function(method,filename,httpversion,headers,get,post)
    wfile.write("%s %s %s\n"%(httpversion,code,accepted))
    for header in list(headers):
        wfile.write("%s: %s\n"%(header,headers[header]))
    wfile.write("\n%s\n"%content)
    wfile.close()
    csock.close()

def
retrieve(host,port=80,method='GET',filename='/',httpversion='HTTP/1.0',headers={},post=''):
    """\
Retrieves one web page from:
    http://host:port/filename
with the headers
"""
    sock.connect((host,port))
    rfile=sock.makefile('r',0)
    wfile=sock.makefile('w',0)
    wfile.write("%s %s %s\n"%(method,filename,httpversion))
    for header in list(headers):
        wfile.write("%s: %s\n"%(header,headers[header]))
    wfile.write('\n')
    wfile.write("%s\n"%post)

    headers={}
    line=rfile.readline().strip()
    split1=line.find(' ')
    httpversion,remainder=line[:split1].strip(),line[split1+1:].strip()
    split2=remainder.find(' ')
    code,accepted=remainder[:split2].strip(),remainder[split2+1:].strip()
    while 1:
        line=rfile.readline().strip()
        if line=='':
            break
        else:
            split=line.find(':')
            key,value=line[:split],line[split+1:]
            headers[key.strip()]=value.strip()
    return httpversion,code,accepted,headers,rfile

def queryToDict(query):
    if '?' in query:
        query=query[query.index('?')+1:]
    kvpairs=query.split("&")
    ret={}
    for kvpair in kvpairs:
        if '=' in kvpair:
            loc=kvpair.index('=')
            key,value=kvpair[:loc],kvpair[loc+1:]
            ret[key]=value
    return ret

if __name__=='__main__':
    i=0
    while True:
        i+=1
        print "\nserve #%d:"%i
        serve(lambda
*args:(args[2],200,'OK',{'Content-Type':'text/html'},'<h1>Go Away!</h1>'))

-- 
http://mail.python.org/mailman/listinfo/python-list

Python Web Servers and Page Retrievers

Reply via email to