Oops, I forgot some important quoting (important for the algorithm, maybe not actually for the discussion)...
from urllib.parse import urlsplit, urlunsplit import encodings.idna # urllib.parse.quote both always returns str, and is not as conservative in quoting as required here... def quote_unsafe_bytes(b): result = [] for c in b: if c < 0x20 or c >= 0x80: result.extend(('%%%02X' % c).encode('ASCII')) else: result.append(c) return bytes(result) def encode_http_url(url, page_encoding='ASCII', errors='strict'): scheme, netloc, path, query, fragment = urlsplit(url) scheme = scheme.encode('ASCII', errors) auth = port = None if '@' in netloc: auth, netloc = netloc.split('@', 1) if ':' in netloc: netloc, port = netloc.split(':', 1) netloc = encodings.idna.ToASCII(netloc) if port: netloc = netloc + b':' + port.encode('ASCII', errors) if auth: netloc = quote_unsafe_bytes(auth.encode('UTF-8', errors)) + b'@' + netloc path = quote_unsafe_bytes(path.encode('UTF-8', errors)) query = quote_unsafe_bytes(query.encode(page_encoding, errors)) fragment = quote_unsafe_bytes(fragment.encode('UTF-8', errors)) return urlunsplit_bytes((scheme, netloc, path, query, fragment)) -- Ian Bicking | http://blog.ianbicking.org _______________________________________________ Python-Dev mailing list Python-Dev@python.org http://mail.python.org/mailman/listinfo/python-dev Unsubscribe: http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com