Oops, I forgot some important quoting (important for the algorithm,
maybe not actually for the discussion)...

from urllib.parse import urlsplit, urlunsplit
import encodings.idna

# urllib.parse.quote both always returns str, and is not as
conservative in quoting as required here...
def quote_unsafe_bytes(b):
    result = []
    for c in b:
        if c < 0x20 or c >= 0x80:
            result.extend(('%%%02X' % c).encode('ASCII'))
        else:
            result.append(c)
    return bytes(result)

def encode_http_url(url, page_encoding='ASCII', errors='strict'):
    scheme, netloc, path, query, fragment = urlsplit(url)
    scheme = scheme.encode('ASCII', errors)
    auth = port = None
    if '@' in netloc:
        auth, netloc = netloc.split('@', 1)
    if ':' in netloc:
        netloc, port = netloc.split(':', 1)
    netloc = encodings.idna.ToASCII(netloc)
    if port:
        netloc = netloc + b':' + port.encode('ASCII', errors)
    if auth:
        netloc = quote_unsafe_bytes(auth.encode('UTF-8', errors)) +
b'@' + netloc
    path = quote_unsafe_bytes(path.encode('UTF-8', errors))
    query = quote_unsafe_bytes(query.encode(page_encoding, errors))
    fragment = quote_unsafe_bytes(fragment.encode('UTF-8', errors))
    return urlunsplit_bytes((scheme, netloc, path, query, fragment))



--
Ian Bicking  |  http://blog.ianbicking.org
_______________________________________________
Python-Dev mailing list
Python-Dev@python.org
http://mail.python.org/mailman/listinfo/python-dev
Unsubscribe: 
http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com

Reply via email to