https://github.com/python/cpython/commit/0f1f7c7889873deb7c2e2c3f18695bf636e7752c commit: 0f1f7c7889873deb7c2e2c3f18695bf636e7752c branch: main author: Paper Moon <[email protected]> committer: orsenthil <[email protected]> date: 2026-05-30T19:08:18-07:00 summary:
gh-141444:fix broken URLs and examples in urllib.request.rst (#144863) * Doc: fix broken URLs and examples in urllib.request.rst (gh-141444) * Doc: update urllib.request examples to handle gzip compression --------- Co-authored-by: Senthil Kumaran <[email protected]> files: M Doc/library/urllib.request.rst diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 64e915d042d4a0..03518d49d437ce 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -1051,7 +1051,7 @@ AbstractBasicAuthHandler Objects *headers* should be the error headers. *host* is either an authority (e.g. ``"python.org"``) or a URL containing an - authority component (e.g. ``"http://python.org/"``). In either case, the + authority component (e.g. ``"https://python.org/"``). In either case, the authority must not contain a userinfo component (so, ``"python.org"`` and ``"python.org:80"`` are fine, ``"joe:[email protected]"`` is not). @@ -1247,10 +1247,14 @@ This example gets the python.org main page and displays the first 300 bytes of it:: >>> import urllib.request - >>> with urllib.request.urlopen('http://www.python.org/') as f: - ... print(f.read(300)) - ... - b'<!doctype html>\n<!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]-->\n<!--[if IE 7]> <html class="no-js ie7 lt-ie8 lt-ie9"> <![endif]-->\n<!--[if IE 8]> <html class="no-js ie8 lt-ie9"> + >>> with urllib.request.urlopen('https://www.python.org/') as f: + ... # The response may be compressed (for example, 'gzip'). + ... print(f.headers.get('Content-Encoding')) + ... data = f.read() + ... if f.headers.get('Content-Encoding') == 'gzip': + ... import gzip + ... data = gzip.decompress(data) + ... print(data[:300].decode('utf-8', errors='replace')) Note that urlopen returns a bytes object. This is because there is no way for urlopen to automatically determine the encoding of the byte stream @@ -1267,26 +1271,30 @@ For additional information, see the W3C document: https://www.w3.org/Internation As the python.org website uses *utf-8* encoding as specified in its meta tag, we will use the same for decoding the bytes object:: - >>> with urllib.request.urlopen('http://www.python.org/') as f: - ... print(f.read(100).decode('utf-8')) + >>> with urllib.request.urlopen('https://www.python.org/') as f: + ... # Check for compression and decode appropriately. + ... enc = f.headers.get('Content-Encoding') + ... data = f.read() + ... if enc == 'gzip': + ... import gzip + ... data = gzip.decompress(data) + ... print(data[:100].decode('utf-8', errors='replace')) ... - <!doctype html> - <!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]--> - <!- It is also possible to achieve the same result without using the :term:`context manager` approach:: >>> import urllib.request - >>> f = urllib.request.urlopen('http://www.python.org/') + >>> f = urllib.request.urlopen('https://www.python.org/') >>> try: - ... print(f.read(100).decode('utf-8')) + ... enc = f.headers.get('Content-Encoding') + ... data = f.read() + ... if enc == 'gzip': + ... import gzip + ... data = gzip.decompress(data) + ... print(data[:100].decode('utf-8', errors='replace')) ... finally: ... f.close() - ... - <!doctype html> - <!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]--> - <!-- In the following example, we are sending a data-stream to the stdin of a CGI and reading the data it returns to us. Note that this example will only work @@ -1357,7 +1365,7 @@ Use the *headers* argument to the :class:`Request` constructor, or:: import urllib.request req = urllib.request.Request('http://www.example.com/') - req.add_header('Referer', 'http://www.python.org/') + req.add_header('Referer', 'https://www.python.org/') # Customize the default User-Agent header value: req.add_header('User-Agent', 'urllib-example/0.1 (Contact: . . .)') with urllib.request.urlopen(req) as f: @@ -1386,7 +1394,7 @@ containing parameters:: >>> import urllib.request >>> import urllib.parse >>> params = urllib.parse.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0}) - >>> url = "http://www.musi-cal.com/cgi-bin/query?%s" % params + >>> url = "https://www.python.org/?%s" % params >>> with urllib.request.urlopen(url) as f: ... print(f.read().decode('utf-8')) ... @@ -1398,7 +1406,7 @@ from urlencode is encoded to bytes before it is sent to urlopen as data:: >>> import urllib.parse >>> data = urllib.parse.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0}) >>> data = data.encode('ascii') - >>> with urllib.request.urlopen("http://requestb.in/xrbl82xr", data) as f: + >>> with urllib.request.urlopen("https://httpbin.org/post", data) as f: ... print(f.read().decode('utf-8')) ... @@ -1408,15 +1416,15 @@ environment settings:: >>> import urllib.request >>> proxies = {'http': 'http://proxy.example.com:8080/'} >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler(proxies)) - >>> with opener.open("http://www.python.org") as f: + >>> with opener.open("https://www.python.org") as f: ... f.read().decode('utf-8') ... The following example uses no proxies at all, overriding environment settings:: >>> import urllib.request - >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({}})) - >>> with opener.open("http://www.python.org/") as f: + >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({})) + >>> with opener.open("https://www.python.org/") as f: ... f.read().decode('utf-8') ... @@ -1449,7 +1457,7 @@ some point in the future. The following example illustrates the most common usage scenario:: >>> import urllib.request - >>> local_filename, headers = urllib.request.urlretrieve('http://python.org/') + >>> local_filename, headers = urllib.request.urlretrieve('https://python.org/') >>> html = open(local_filename) >>> html.close() _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
