https://github.com/python/cpython/commit/2a58923687cbe102550b275ccf025a1b8d2b417e
commit: 2a58923687cbe102550b275ccf025a1b8d2b417e
branch: 3.12
author: Inada Naoki <songofaca...@gmail.com>
committer: methane <songofaca...@gmail.com>
date: 2024-04-16T18:51:06+09:00
summary:

gh-77102: site: try utf-8 and locale encoding when reading .pth file (GH-117802)

(cherry picked from commit 6dc661bc9f65e9923eafbcdbf18bcc57eebbf6a4)

files:
A Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
M Lib/site.py

diff --git a/Lib/site.py b/Lib/site.py
index 924b2460d96976..b3a4916161244a 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -179,35 +179,44 @@ def addpackage(sitedir, name, known_paths):
         return
     _trace(f"Processing .pth file: {fullname!r}")
     try:
-        # locale encoding is not ideal especially on Windows. But we have used
-        # it for a long time. setuptools uses the locale encoding too.
-        f = io.TextIOWrapper(io.open_code(fullname), encoding="locale")
+        with io.open_code(fullname) as f:
+            pth_content = f.read()
     except OSError:
         return
-    with f:
-        for n, line in enumerate(f):
-            if line.startswith("#"):
-                continue
-            if line.strip() == "":
+
+    try:
+        pth_content = pth_content.decode()
+    except UnicodeDecodeError:
+        # Fallback to locale encoding for backward compatibility.
+        # We will deprecate this fallback in the future.
+        import locale
+        pth_content = pth_content.decode(locale.getencoding())
+        _trace(f"Cannot read {fullname!r} as UTF-8. "
+               f"Using fallback encoding {locale.getencoding()!r}")
+
+    for n, line in enumerate(pth_content.splitlines(), 1):
+        if line.startswith("#"):
+            continue
+        if line.strip() == "":
+            continue
+        try:
+            if line.startswith(("import ", "import\t")):
+                exec(line)
                 continue
-            try:
-                if line.startswith(("import ", "import\t")):
-                    exec(line)
-                    continue
-                line = line.rstrip()
-                dir, dircase = makepath(sitedir, line)
-                if not dircase in known_paths and os.path.exists(dir):
-                    sys.path.append(dir)
-                    known_paths.add(dircase)
-            except Exception as exc:
-                print("Error processing line {:d} of {}:\n".format(n+1, 
fullname),
-                      file=sys.stderr)
-                import traceback
-                for record in traceback.format_exception(exc):
-                    for line in record.splitlines():
-                        print('  '+line, file=sys.stderr)
-                print("\nRemainder of file ignored", file=sys.stderr)
-                break
+            line = line.rstrip()
+            dir, dircase = makepath(sitedir, line)
+            if dircase not in known_paths and os.path.exists(dir):
+                sys.path.append(dir)
+                known_paths.add(dircase)
+        except Exception as exc:
+            print(f"Error processing line {n:d} of {fullname}:\n",
+                  file=sys.stderr)
+            import traceback
+            for record in traceback.format_exception(exc):
+                for line in record.splitlines():
+                    print('  '+line, file=sys.stderr)
+            print("\nRemainder of file ignored", file=sys.stderr)
+            break
     if reset:
         known_paths = None
     return known_paths
diff --git 
a/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst 
b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
new file mode 100644
index 00000000000000..6f91251126dc7b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
@@ -0,0 +1,3 @@
+:mod:`site` module now parses ``.pth`` file with UTF-8 first, and
+:term:`locale encoding` if ``UnicodeDecodeError`` happened. It supported
+only locale encoding before.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to