[Python-checkins] gh-91048: Add filename and line number to external inspection routines (GH-133385)

ambv Sun, 04 May 2025 14:34:08 -0700

https://github.com/python/cpython/commit/3109c47be8fc00df999c5bff01229a6b93513224
commit: 3109c47be8fc00df999c5bff01229a6b93513224
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: ambv <[email protected]>
date: 2025-05-04T23:33:37+02:00
summary:


gh-91048: Add filename and line number to external inspection routines 
(GH-133385)

Signed-off-by: Pablo Galindo <[email protected]>

files:
M Lib/asyncio/tools.py
M Lib/test/test_external_inspection.py
M Modules/_remotedebuggingmodule.c

diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py
index 16440b594ad993..6c1f725e777fb9 100644
--- a/Lib/asyncio/tools.py
+++ b/Lib/asyncio/tools.py
@@ -27,6 +27,7 @@ def _index(result):
         for tid, tname, awaited in tasks:
             id2name[tid] = tname
             for stack, parent_id in awaited:
+                stack = [elem[0] if isinstance(elem, tuple) else elem for elem 
in stack]
                 awaits.append((parent_id, stack, tid))
     return id2name, awaits
 
@@ -151,6 +152,7 @@ def build_task_table(result):
                     ]
                 )
             for stack, awaiter_id in awaited:
+                stack = [elem[0] if isinstance(elem, tuple) else elem for elem 
in stack]
                 coroutine_chain = " -> ".join(stack)
                 awaiter_name = id2name.get(awaiter_id, "Unknown")
                 table.append(
diff --git a/Lib/test/test_external_inspection.py 
b/Lib/test/test_external_inspection.py
index 0fd704e698b90e..f787190b1ae4e0 100644
--- a/Lib/test/test_external_inspection.py
+++ b/Lib/test/test_external_inspection.py
@@ -4,6 +4,7 @@
 import importlib
 import sys
 import socket
+from asyncio import staggered, taskgroups
 from unittest.mock import ANY
 from test.support import os_helper, SHORT_TIMEOUT, busy_retry
 from test.support.script_helper import make_script
@@ -19,27 +20,33 @@
     from _remotedebugging import get_async_stack_trace
     from _remotedebugging import get_all_awaited_by
 except ImportError:
-    raise unittest.SkipTest(
-        "Test only runs when _remotedebuggingmodule is available")
+    raise unittest.SkipTest("Test only runs when _remotedebuggingmodule is 
available")
+
 
 def _make_test_script(script_dir, script_basename, source):
     to_return = make_script(script_dir, script_basename, source)
     importlib.invalidate_caches()
     return to_return
 
-skip_if_not_supported = unittest.skipIf((sys.platform != "darwin"
-                                         and sys.platform != "linux"
-                                         and sys.platform != "win32"),
-                                        "Test only runs on Linux, Windows and 
MacOS")
+
+skip_if_not_supported = unittest.skipIf(
+    (sys.platform != "darwin" and sys.platform != "linux" and sys.platform != 
"win32"),
+    "Test only runs on Linux, Windows and MacOS",
+)
+
+
 class TestGetStackTrace(unittest.TestCase):
 
     @skip_if_not_supported
-    @unittest.skipIf(sys.platform == "linux" and not 
PROCESS_VM_READV_SUPPORTED,
-                     "Test only runs on Linux with process_vm_readv support")
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
     def test_remote_stack_trace(self):
         # Spawn a process with some realistic Python code
         port = find_unused_port()
-        script = textwrap.dedent(f"""\
+        script = textwrap.dedent(
+            f"""\
             import time, sys, socket
             # Connect to the test process
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -57,7 +64,8 @@ def foo():
                 time.sleep(1000)
 
             bar()
-            """)
+            """
+        )
         stack_trace = None
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
@@ -66,11 +74,11 @@ def foo():
             # Create a socket server to communicate with the target process
             server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(('localhost', port))
+            server_socket.bind(("localhost", port))
             server_socket.settimeout(SHORT_TIMEOUT)
             server_socket.listen(1)
 
-            script_name = _make_test_script(script_dir, 'script', script)
+            script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
             try:
                 p = subprocess.Popen([sys.executable, script_name])
@@ -88,22 +96,24 @@ def foo():
                 p.terminate()
                 p.wait(timeout=SHORT_TIMEOUT)
 
-
             expected_stack_trace = [
-                'foo',
-                'baz',
-                'bar',
-                '<module>'
+                ("foo", script_name, 15),
+                ("baz", script_name, 11),
+                ("bar", script_name, 9),
+                ("<module>", script_name, 17),
             ]
             self.assertEqual(stack_trace, expected_stack_trace)
 
     @skip_if_not_supported
-    @unittest.skipIf(sys.platform == "linux" and not 
PROCESS_VM_READV_SUPPORTED,
-                     "Test only runs on Linux with process_vm_readv support")
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
     def test_async_remote_stack_trace(self):
         # Spawn a process with some realistic Python code
         port = find_unused_port()
-        script = textwrap.dedent(f"""\
+        script = textwrap.dedent(
+            f"""\
             import asyncio
             import time
             import sys
@@ -143,7 +153,8 @@ def new_eager_loop():
                 return loop
 
             asyncio.run(main(), loop_factory={{TASK_FACTORY}})
-            """)
+            """
+        )
         stack_trace = None
         for task_factory_variant in "asyncio.new_event_loop", "new_eager_loop":
             with (
@@ -154,25 +165,24 @@ def new_eager_loop():
                 os.mkdir(script_dir)
                 server_socket = socket.socket(socket.AF_INET, 
socket.SOCK_STREAM)
                 server_socket.setsockopt(socket.SOL_SOCKET, 
socket.SO_REUSEADDR, 1)
-                server_socket.bind(('localhost', port))
+                server_socket.bind(("localhost", port))
                 server_socket.settimeout(SHORT_TIMEOUT)
                 server_socket.listen(1)
                 script_name = _make_test_script(
-                    script_dir, 'script',
-                    script.format(TASK_FACTORY=task_factory_variant))
+                    script_dir,
+                    "script",
+                    script.format(TASK_FACTORY=task_factory_variant),
+                )
                 client_socket = None
                 try:
-                    p = subprocess.Popen(
-                        [sys.executable, script_name]
-                    )
+                    p = subprocess.Popen([sys.executable, script_name])
                     client_socket, _ = server_socket.accept()
                     server_socket.close()
                     response = client_socket.recv(1024)
                     self.assertEqual(response, b"ready")
                     stack_trace = get_async_stack_trace(p.pid)
                 except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions to read the stack trace")
+                    self.skipTest("Insufficient permissions to read the stack 
trace")
                 finally:
                     if client_socket is not None:
                         client_socket.close()
@@ -185,23 +195,91 @@ def new_eager_loop():
 
                 root_task = "Task-1"
                 expected_stack_trace = [
-                    ['c5', 'c4', 'c3', 'c2'],
-                    'c2_root',
                     [
-                        [['_aexit', '__aexit__', 'main'], root_task, []],
-                        [['c1'], 'sub_main_1', [[['_aexit', '__aexit__', 
'main'], root_task, []]]],
-                        [['c1'], 'sub_main_2', [[['_aexit', '__aexit__', 
'main'], root_task, []]]],
-                    ]
+                        ("c5", script_name, 11),
+                        ("c4", script_name, 15),
+                        ("c3", script_name, 18),
+                        ("c2", script_name, 21),
+                    ],
+                    "c2_root",
+                    [
+                        [
+                            [
+                                (
+                                    "TaskGroup._aexit",
+                                    taskgroups.__file__,
+                                    ANY,
+                                ),
+                                (
+                                    "TaskGroup.__aexit__",
+                                    taskgroups.__file__,
+                                    ANY,
+                                ),
+                                ("main", script_name, 27),
+                            ],
+                            "Task-1",
+                            [],
+                        ],
+                        [
+                            [("c1", script_name, 24)],
+                            "sub_main_1",
+                            [
+                                [
+                                    [
+                                        (
+                                            "TaskGroup._aexit",
+                                            taskgroups.__file__,
+                                            ANY,
+                                        ),
+                                        (
+                                            "TaskGroup.__aexit__",
+                                            taskgroups.__file__,
+                                            ANY,
+                                        ),
+                                        ("main", script_name, 27),
+                                    ],
+                                    "Task-1",
+                                    [],
+                                ]
+                            ],
+                        ],
+                        [
+                            [("c1", script_name, 24)],
+                            "sub_main_2",
+                            [
+                                [
+                                    [
+                                        (
+                                            "TaskGroup._aexit",
+                                            taskgroups.__file__,
+                                            ANY,
+                                        ),
+                                        (
+                                            "TaskGroup.__aexit__",
+                                            taskgroups.__file__,
+                                            ANY,
+                                        ),
+                                        ("main", script_name, 27),
+                                    ],
+                                    "Task-1",
+                                    [],
+                                ]
+                            ],
+                        ],
+                    ],
                 ]
                 self.assertEqual(stack_trace, expected_stack_trace)
 
     @skip_if_not_supported
-    @unittest.skipIf(sys.platform == "linux" and not 
PROCESS_VM_READV_SUPPORTED,
-                     "Test only runs on Linux with process_vm_readv support")
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
     def test_asyncgen_remote_stack_trace(self):
         # Spawn a process with some realistic Python code
         port = find_unused_port()
-        script = textwrap.dedent(f"""\
+        script = textwrap.dedent(
+            f"""\
             import asyncio
             import time
             import sys
@@ -225,7 +303,8 @@ async def main():
                     pass
 
             asyncio.run(main())
-            """)
+            """
+        )
         stack_trace = None
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
@@ -233,10 +312,10 @@ async def main():
             # Create a socket server to communicate with the target process
             server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(('localhost', port))
+            server_socket.bind(("localhost", port))
             server_socket.settimeout(SHORT_TIMEOUT)
             server_socket.listen(1)
-            script_name = _make_test_script(script_dir, 'script', script)
+            script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
             try:
                 p = subprocess.Popen([sys.executable, script_name])
@@ -258,17 +337,26 @@ async def main():
             stack_trace[2].sort(key=lambda x: x[1])
 
             expected_stack_trace = [
-                ['gen_nested_call', 'gen', 'main'], 'Task-1', []
+                [
+                    ("gen_nested_call", script_name, 11),
+                    ("gen", script_name, 17),
+                    ("main", script_name, 20),
+                ],
+                "Task-1",
+                [],
             ]
             self.assertEqual(stack_trace, expected_stack_trace)
 
     @skip_if_not_supported
-    @unittest.skipIf(sys.platform == "linux" and not 
PROCESS_VM_READV_SUPPORTED,
-                     "Test only runs on Linux with process_vm_readv support")
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
     def test_async_gather_remote_stack_trace(self):
         # Spawn a process with some realistic Python code
         port = find_unused_port()
-        script = textwrap.dedent(f"""\
+        script = textwrap.dedent(
+            f"""\
             import asyncio
             import time
             import sys
@@ -293,7 +381,8 @@ async def main():
                 await asyncio.gather(c1(), c2())
 
             asyncio.run(main())
-            """)
+            """
+        )
         stack_trace = None
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
@@ -301,10 +390,10 @@ async def main():
             # Create a socket server to communicate with the target process
             server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(('localhost', port))
+            server_socket.bind(("localhost", port))
             server_socket.settimeout(SHORT_TIMEOUT)
             server_socket.listen(1)
-            script_name = _make_test_script(script_dir, 'script', script)
+            script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
             try:
                 p = subprocess.Popen([sys.executable, script_name])
@@ -314,8 +403,7 @@ async def main():
                 self.assertEqual(response, b"ready")
                 stack_trace = get_async_stack_trace(p.pid)
             except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace")
+                self.skipTest("Insufficient permissions to read the stack 
trace")
             finally:
                 if client_socket is not None:
                     client_socket.close()
@@ -326,18 +414,23 @@ async def main():
             # sets are unordered, so we want to sort "awaited_by"s
             stack_trace[2].sort(key=lambda x: x[1])
 
-            expected_stack_trace =  [
-                ['deep', 'c1'], 'Task-2', [[['main'], 'Task-1', []]]
+            expected_stack_trace = [
+                [("deep", script_name, ANY), ("c1", script_name, 16)],
+                "Task-2",
+                [[[("main", script_name, 22)], "Task-1", []]],
             ]
             self.assertEqual(stack_trace, expected_stack_trace)
 
     @skip_if_not_supported
-    @unittest.skipIf(sys.platform == "linux" and not 
PROCESS_VM_READV_SUPPORTED,
-                     "Test only runs on Linux with process_vm_readv support")
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
     def test_async_staggered_race_remote_stack_trace(self):
         # Spawn a process with some realistic Python code
         port = find_unused_port()
-        script = textwrap.dedent(f"""\
+        script = textwrap.dedent(
+            f"""\
             import asyncio.staggered
             import time
             import sys
@@ -365,7 +458,8 @@ async def main():
                 )
 
             asyncio.run(main())
-            """)
+            """
+        )
         stack_trace = None
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
@@ -373,10 +467,10 @@ async def main():
             # Create a socket server to communicate with the target process
             server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(('localhost', port))
+            server_socket.bind(("localhost", port))
             server_socket.settimeout(SHORT_TIMEOUT)
             server_socket.listen(1)
-            script_name = _make_test_script(script_dir, 'script', script)
+            script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
             try:
                 p = subprocess.Popen([sys.executable, script_name])
@@ -386,8 +480,7 @@ async def main():
                 self.assertEqual(response, b"ready")
                 stack_trace = get_async_stack_trace(p.pid)
             except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace")
+                self.skipTest("Insufficient permissions to read the stack 
trace")
             finally:
                 if client_socket is not None:
                     client_socket.close()
@@ -397,20 +490,35 @@ async def main():
 
             # sets are unordered, so we want to sort "awaited_by"s
             stack_trace[2].sort(key=lambda x: x[1])
-
             expected_stack_trace = [
-                ['deep', 'c1', 'run_one_coro'],
-                    'Task-2',
-                    [[['staggered_race', 'main'], 'Task-1', []]]
+                [
+                    ("deep", script_name, ANY),
+                    ("c1", script_name, 16),
+                    ("staggered_race.<locals>.run_one_coro", 
staggered.__file__, ANY),
+                ],
+                "Task-2",
+                [
+                    [
+                        [
+                            ("staggered_race", staggered.__file__, ANY),
+                            ("main", script_name, 22),
+                        ],
+                        "Task-1",
+                        [],
+                    ]
+                ],
             ]
             self.assertEqual(stack_trace, expected_stack_trace)
 
     @skip_if_not_supported
-    @unittest.skipIf(sys.platform == "linux" and not 
PROCESS_VM_READV_SUPPORTED,
-                     "Test only runs on Linux with process_vm_readv support")
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
     def test_async_global_awaited_by(self):
         port = find_unused_port()
-        script = textwrap.dedent(f"""\
+        script = textwrap.dedent(
+            f"""\
             import asyncio
             import os
             import random
@@ -475,7 +583,8 @@ async def main():
                         tg.create_task(echo_client_spam(server), name="echo 
client spam")
 
             asyncio.run(main())
-            """)
+            """
+        )
         stack_trace = None
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
@@ -483,10 +592,10 @@ async def main():
             # Create a socket server to communicate with the target process
             server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(('localhost', port))
+            server_socket.bind(("localhost", port))
             server_socket.settimeout(SHORT_TIMEOUT)
             server_socket.listen(1)
-            script_name = _make_test_script(script_dir, 'script', script)
+            script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
             try:
                 p = subprocess.Popen([sys.executable, script_name])
@@ -506,7 +615,9 @@ async def main():
                         msg = str(re)
                         if msg.startswith("Task list appears corrupted"):
                             continue
-                        elif msg.startswith("Invalid linked list structure 
reading remote memory"):
+                        elif msg.startswith(
+                            "Invalid linked list structure reading remote 
memory"
+                        ):
                             continue
                         elif msg.startswith("Unknown error reading memory"):
                             continue
@@ -525,22 +636,62 @@ async def main():
                 # expected: at least 1000 pending tasks
                 self.assertGreaterEqual(len(entries), 1000)
                 # the first three tasks stem from the code structure
-                self.assertIn((ANY, 'Task-1', []), entries)
-                self.assertIn((ANY, 'server task', [[['_aexit', '__aexit__', 
'main'], ANY]]), entries)
-                self.assertIn((ANY, 'echo client spam', [[['_aexit', 
'__aexit__', 'main'], ANY]]), entries)
+                self.assertIn((ANY, "Task-1", []), entries)
+                main_stack = [
+                    (
+                        "TaskGroup._aexit",
+                        taskgroups.__file__,
+                        ANY,
+                    ),
+                    (
+                        "TaskGroup.__aexit__",
+                        taskgroups.__file__,
+                        ANY,
+                    ),
+                    ("main", script_name, 60),
+                ]
+                self.assertIn(
+                    (ANY, "server task", [[main_stack, ANY]]),
+                    entries,
+                )
+                self.assertIn(
+                    (ANY, "echo client spam", [[main_stack, ANY]]),
+                    entries,
+                )
 
-                expected_stack = [[['_aexit', '__aexit__', 
'echo_client_spam'], ANY]]
-                tasks_with_stack = [task for task in entries if task[2] == 
expected_stack]
+                expected_stack = [
+                    [
+                        [
+                            (
+                                "TaskGroup._aexit",
+                                taskgroups.__file__,
+                                ANY,
+                            ),
+                            (
+                                "TaskGroup.__aexit__",
+                                taskgroups.__file__,
+                                ANY,
+                            ),
+                            ("echo_client_spam", script_name, 41),
+                        ],
+                        ANY,
+                    ]
+                ]
+                tasks_with_stack = [
+                    task for task in entries if task[2] == expected_stack
+                ]
                 self.assertGreaterEqual(len(tasks_with_stack), 1000)
 
                 # the final task will have some random number, but it should 
for
                 # sure be one of the echo client spam horde (In windows this 
is not true
                 # for some reason)
                 if sys.platform != "win32":
-                    self.assertEqual([[['_aexit', '__aexit__', 
'echo_client_spam'], ANY]], entries[-1][2])
+                    self.assertEqual(
+                        expected_stack,
+                        entries[-1][2],
+                    )
             except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace")
+                self.skipTest("Insufficient permissions to read the stack 
trace")
             finally:
                 if client_socket is not None:
                     client_socket.close()
@@ -549,11 +700,21 @@ async def main():
                 p.wait(timeout=SHORT_TIMEOUT)
 
     @skip_if_not_supported
-    @unittest.skipIf(sys.platform == "linux" and not 
PROCESS_VM_READV_SUPPORTED,
-                     "Test only runs on Linux with process_vm_readv support")
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
     def test_self_trace(self):
         stack_trace = get_stack_trace(os.getpid())
-        self.assertEqual(stack_trace[0], "test_self_trace")
+        self.assertEqual(
+            stack_trace[0],
+            (
+                "TestGetStackTrace.test_self_trace",
+                __file__,
+                self.test_self_trace.__code__.co_firstlineno + 6,
+            ),
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Modules/_remotedebuggingmodule.c b/Modules/_remotedebuggingmodule.c
index 0e055ae1604d5f..cffa9a38331dde 100644
--- a/Modules/_remotedebuggingmodule.c
+++ b/Modules/_remotedebuggingmodule.c
@@ -80,37 +80,6 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
     return address;
 }
 
-static int
-read_string(
-    proc_handle_t *handle,
-    _Py_DebugOffsets* debug_offsets,
-    uintptr_t address,
-    char* buffer,
-    Py_ssize_t size
-) {
-    Py_ssize_t len;
-    int result = _Py_RemoteDebug_ReadRemoteMemory(
-        handle,
-        address + debug_offsets->unicode_object.length,
-        sizeof(Py_ssize_t),
-        &len
-    );
-    if (result < 0) {
-        return -1;
-    }
-    if (len >= size) {
-        PyErr_SetString(PyExc_RuntimeError, "Buffer too small");
-        return -1;
-    }
-    size_t offset = debug_offsets->unicode_object.asciiobject_size;
-    result = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, 
buffer);
-    if (result < 0) {
-        return -1;
-    }
-    buffer[len] = '\0';
-    return 0;
-}
-
 static inline int
 read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr)
 {
@@ -188,20 +157,34 @@ read_py_str(
     uintptr_t address,
     Py_ssize_t max_len
 ) {
-    assert(max_len > 0);
-
     PyObject *result = NULL;
+    char *buf = NULL;
+
+    Py_ssize_t len;
+    int res = _Py_RemoteDebug_ReadRemoteMemory(
+        handle,
+        address + debug_offsets->unicode_object.length,
+        sizeof(Py_ssize_t),
+        &len
+    );
+    if (res < 0) {
+        goto err;
+    }
 
-    char *buf = (char *)PyMem_RawMalloc(max_len);
+    buf = (char *)PyMem_RawMalloc(len+1);
     if (buf == NULL) {
         PyErr_NoMemory();
         return NULL;
     }
-    if (read_string(handle, debug_offsets, address, buf, max_len)) {
+
+    size_t offset = debug_offsets->unicode_object.asciiobject_size;
+    res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf);
+    if (res < 0) {
         goto err;
     }
+    buf[len] = '\0';
 
-    result = PyUnicode_FromString(buf);
+    result = PyUnicode_FromStringAndSize(buf, len);
     if (result == NULL) {
         goto err;
     }
@@ -211,10 +194,63 @@ read_py_str(
     return result;
 
 err:
+    if (buf != NULL) {
+        PyMem_RawFree(buf);
+    }
+    return NULL;
+}
+
+static PyObject *
+read_py_bytes(
+    proc_handle_t *handle,
+    _Py_DebugOffsets* debug_offsets,
+    uintptr_t address
+) {
+    PyObject *result = NULL;
+    char *buf = NULL;
+
+    Py_ssize_t len;
+    int res = _Py_RemoteDebug_ReadRemoteMemory(
+        handle,
+        address + debug_offsets->bytes_object.ob_size,
+        sizeof(Py_ssize_t),
+        &len
+    );
+    if (res < 0) {
+        goto err;
+    }
+
+    buf = (char *)PyMem_RawMalloc(len+1);
+    if (buf == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    size_t offset = debug_offsets->bytes_object.ob_sval;
+    res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf);
+    if (res < 0) {
+        goto err;
+    }
+    buf[len] = '\0';
+
+    result = PyBytes_FromStringAndSize(buf, len);
+    if (result == NULL) {
+        goto err;
+    }
+
     PyMem_RawFree(buf);
+    assert(result != NULL);
+    return result;
+
+err:
+    if (buf != NULL) {
+        PyMem_RawFree(buf);
+    }
     return NULL;
 }
 
+
+
 static long
 read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t 
address)
 {
@@ -332,6 +368,15 @@ parse_task_name(
     );
 }
 
+static int
+parse_frame_object(
+    proc_handle_t *handle,
+    PyObject** result,
+    struct _Py_DebugOffsets* offsets,
+    uintptr_t address,
+    uintptr_t* previous_frame
+);
+
 static int
 parse_coro_chain(
     proc_handle_t *handle,
@@ -351,22 +396,16 @@ parse_coro_chain(
         return -1;
     }
 
-    uintptr_t gen_name_addr;
-    err = read_py_ptr(
-        handle,
-        coro_address + offsets->gen_object.gi_name,
-        &gen_name_addr);
-    if (err) {
-        return -1;
-    }
-
-    PyObject *name = read_py_str(
-        handle,
-        offsets,
-        gen_name_addr,
-        255
-    );
-    if (name == NULL) {
+    PyObject* name = NULL;
+    uintptr_t prev_frame;
+    if (parse_frame_object(
+                handle,
+                &name,
+                offsets,
+                coro_address + offsets->gen_object.gi_iframe,
+                &prev_frame)
+        < 0)
+    {
         return -1;
     }
 
@@ -743,49 +782,204 @@ parse_task_awaited_by(
     return 0;
 }
 
+typedef struct
+{
+    int lineno;
+    int end_lineno;
+    int column;
+    int end_column;
+} LocationInfo;
+
+static int
+scan_varint(const uint8_t **ptr)
+{
+    unsigned int read = **ptr;
+    *ptr = *ptr + 1;
+    unsigned int val = read & 63;
+    unsigned int shift = 0;
+    while (read & 64) {
+        read = **ptr;
+        *ptr = *ptr + 1;
+        shift += 6;
+        val |= (read & 63) << shift;
+    }
+    return val;
+}
+
 static int
-parse_code_object(
-    proc_handle_t *handle,
-    PyObject* result,
-    struct _Py_DebugOffsets* offsets,
-    uintptr_t address,
-    uintptr_t* previous_frame
-) {
-    uintptr_t address_of_function_name;
-    int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
+scan_signed_varint(const uint8_t **ptr)
+{
+    unsigned int uval = scan_varint(ptr);
+    if (uval & 1) {
+        return -(int)(uval >> 1);
+    }
+    else {
+        return uval >> 1;
+    }
+}
+
+
+static bool
+parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, 
LocationInfo* info)
+{
+    const uint8_t* ptr = (const uint8_t*)(linetable);
+    uint64_t addr = 0;
+    info->lineno = firstlineno;
+
+    while (*ptr != '\0') {
+        // See InternalDocs/code_objects.md for where these magic numbers are 
from
+        // and for the decoding algorithm.
+        uint8_t first_byte = *(ptr++);
+        uint8_t code = (first_byte >> 3) & 15;
+        size_t length = (first_byte & 7) + 1;
+        uintptr_t end_addr = addr + length;
+        switch (code) {
+            case PY_CODE_LOCATION_INFO_NONE: {
+                break;
+            }
+            case PY_CODE_LOCATION_INFO_LONG: {
+                int line_delta = scan_signed_varint(&ptr);
+                info->lineno += line_delta;
+                info->end_lineno = info->lineno + scan_varint(&ptr);
+                info->column = scan_varint(&ptr) - 1;
+                info->end_column = scan_varint(&ptr) - 1;
+                break;
+            }
+            case PY_CODE_LOCATION_INFO_NO_COLUMNS: {
+                int line_delta = scan_signed_varint(&ptr);
+                info->lineno += line_delta;
+                info->column = info->end_column = -1;
+                break;
+            }
+            case PY_CODE_LOCATION_INFO_ONE_LINE0:
+            case PY_CODE_LOCATION_INFO_ONE_LINE1:
+            case PY_CODE_LOCATION_INFO_ONE_LINE2: {
+                int line_delta = code - 10;
+                info->lineno += line_delta;
+                info->end_lineno = info->lineno;
+                info->column = *(ptr++);
+                info->end_column = *(ptr++);
+                break;
+            }
+            default: {
+                uint8_t second_byte = *(ptr++);
+                assert((second_byte & 128) == 0);
+                info->column = code << 3 | (second_byte >> 4);
+                info->end_column = info->column + (second_byte & 15);
+                break;
+            }
+        }
+        if (addr <= addrq && end_addr > addrq) {
+            return true;
+        }
+        addr = end_addr;
+    }
+    return false;
+}
+
+static int
+read_remote_pointer(proc_handle_t *handle, uintptr_t address, uintptr_t 
*out_ptr, const char *error_message)
+{
+    int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(handle, address, 
sizeof(void *), out_ptr);
+    if (bytes_read < 0) {
+        return -1;
+    }
+
+    if ((void *)(*out_ptr) == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, error_message);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int
+read_instruction_ptr(proc_handle_t *handle, struct _Py_DebugOffsets *offsets,
+                     uintptr_t current_frame, uintptr_t *instruction_ptr)
+{
+    return read_remote_pointer(
         handle,
-        address + offsets->code_object.name,
-        sizeof(void*),
-        &address_of_function_name
+        current_frame + offsets->interpreter_frame.instr_ptr,
+        instruction_ptr,
+        "No instruction ptr found"
     );
-    if (bytes_read < 0) {
+}
+
+static int
+parse_code_object(proc_handle_t *handle,
+                  PyObject **result,
+                  struct _Py_DebugOffsets *offsets,
+                  uintptr_t address,
+                  uintptr_t current_frame,
+                  uintptr_t *previous_frame)
+{
+    uintptr_t addr_func_name, addr_file_name, addr_linetable, instruction_ptr;
+
+    if (read_remote_pointer(handle, address + offsets->code_object.qualname, 
&addr_func_name, "No function name found") < 0 ||
+        read_remote_pointer(handle, address + offsets->code_object.filename, 
&addr_file_name, "No file name found") < 0 ||
+        read_remote_pointer(handle, address + offsets->code_object.linetable, 
&addr_linetable, "No linetable found") < 0 ||
+        read_instruction_ptr(handle, offsets, current_frame, &instruction_ptr) 
< 0) {
         return -1;
     }
 
-    if ((void*)address_of_function_name == NULL) {
-        PyErr_SetString(PyExc_RuntimeError, "No function name found");
+    int firstlineno;
+    if (_Py_RemoteDebug_ReadRemoteMemory(handle,
+                                         address + 
offsets->code_object.firstlineno,
+                                         sizeof(int),
+                                         &firstlineno) < 0) {
         return -1;
     }
 
-    PyObject* py_function_name = read_py_str(
-        handle, offsets, address_of_function_name, 256);
-    if (py_function_name == NULL) {
+    PyObject *py_linetable = read_py_bytes(handle, offsets, addr_linetable);
+    if (!py_linetable) {
+        return -1;
+    }
+
+    uintptr_t addr_code_adaptive = address + 
offsets->code_object.co_code_adaptive;
+    ptrdiff_t addrq = (uint16_t *)instruction_ptr - (uint16_t 
*)addr_code_adaptive;
+
+    LocationInfo info;
+    parse_linetable(addrq, PyBytes_AS_STRING(py_linetable), firstlineno, 
&info);
+    Py_DECREF(py_linetable);  // Done with linetable
+
+    PyObject *py_line = PyLong_FromLong(info.lineno);
+    if (!py_line) {
         return -1;
     }
 
-    if (PyList_Append(result, py_function_name) == -1) {
-        Py_DECREF(py_function_name);
+    PyObject *py_func_name = read_py_str(handle, offsets, addr_func_name, 256);
+    if (!py_func_name) {
+        Py_DECREF(py_line);
         return -1;
     }
-    Py_DECREF(py_function_name);
 
+    PyObject *py_file_name = read_py_str(handle, offsets, addr_file_name, 256);
+    if (!py_file_name) {
+        Py_DECREF(py_line);
+        Py_DECREF(py_func_name);
+        return -1;
+    }
+
+    PyObject *result_tuple = PyTuple_New(3);
+    if (!result_tuple) {
+        Py_DECREF(py_line);
+        Py_DECREF(py_func_name);
+        Py_DECREF(py_file_name);
+        return -1;
+    }
+
+    PyTuple_SET_ITEM(result_tuple, 0, py_func_name);  // steals ref
+    PyTuple_SET_ITEM(result_tuple, 1, py_file_name);  // steals ref
+    PyTuple_SET_ITEM(result_tuple, 2, py_line);       // steals ref
+
+    *result = result_tuple;
     return 0;
 }
 
 static int
 parse_frame_object(
     proc_handle_t *handle,
-    PyObject* result,
+    PyObject** result,
     struct _Py_DebugOffsets* offsets,
     uintptr_t address,
     uintptr_t* previous_frame
@@ -826,13 +1020,13 @@ parse_frame_object(
     }
 
     return parse_code_object(
-        handle, result, offsets, address_of_code_object, previous_frame);
+        handle, result, offsets, address_of_code_object, address, 
previous_frame);
 }
 
 static int
 parse_async_frame_object(
     proc_handle_t *handle,
-    PyObject* result,
+    PyObject** result,
     struct _Py_DebugOffsets* offsets,
     uintptr_t address,
     uintptr_t* previous_frame,
@@ -882,7 +1076,7 @@ parse_async_frame_object(
     }
 
     if (parse_code_object(
-        handle, result, offsets, *code_object, previous_frame)) {
+        handle, result, offsets, *code_object, address, previous_frame)) {
         return -1;
     }
 
@@ -1353,9 +1547,10 @@ get_stack_trace(PyObject* self, PyObject* args)
     }
 
     while ((void*)address_of_current_frame != NULL) {
+        PyObject* frame_info = NULL;
         if (parse_frame_object(
                     handle,
-                    result,
+                    &frame_info,
                     &local_debug_offsets,
                     address_of_current_frame,
                     &address_of_current_frame)
@@ -1364,6 +1559,19 @@ get_stack_trace(PyObject* self, PyObject* args)
             Py_DECREF(result);
             goto result_err;
         }
+
+        if (!frame_info) {
+            continue;
+        }
+
+        if (PyList_Append(result, frame_info) == -1) {
+            Py_DECREF(result);
+            goto result_err;
+        }
+
+        Py_DECREF(frame_info);
+        frame_info = NULL;
+
     }
 
 result_err:
@@ -1485,9 +1693,10 @@ get_async_stack_trace(PyObject* self, PyObject* args)
 
     uintptr_t address_of_code_object;
     while ((void*)address_of_current_frame != NULL) {
+        PyObject* frame_info = NULL;
         int res = parse_async_frame_object(
             handle,
-            calls,
+            &frame_info,
             &local_debug_offsets,
             address_of_current_frame,
             &address_of_current_frame,
@@ -1499,6 +1708,18 @@ get_async_stack_trace(PyObject* self, PyObject* args)
             goto result_err;
         }
 
+        if (!frame_info) {
+            continue;
+        }
+
+        if (PyList_Append(calls, frame_info) == -1) {
+            Py_DECREF(calls);
+            goto result_err;
+        }
+
+        Py_DECREF(frame_info);
+        frame_info = NULL;
+
         if (address_of_code_object == address_of_running_task_code_obj) {
             break;
         }

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

[Python-checkins] gh-91048: Add filename and line number to external inspection routines (GH-133385)

Reply via email to