https://github.com/python/cpython/commit/9af7a20caeb2912a05dd0fa07bbb4bfe7fb874e4
commit: 9af7a20caeb2912a05dd0fa07bbb4bfe7fb874e4
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-12-24T12:38:17Z
summary:

gh-136186: Fix flaky tests in test_external_inspection (#143110)

files:
M Lib/test/test_external_inspection.py

diff --git a/Lib/test/test_external_inspection.py 
b/Lib/test/test_external_inspection.py
index 4c502cd1de7418..b1a3a8e65a9802 100644
--- a/Lib/test/test_external_inspection.py
+++ b/Lib/test/test_external_inspection.py
@@ -253,6 +253,31 @@ def get_all_awaited_by(pid):
     raise RuntimeError("Failed to get all awaited_by after retries")
 
 
+def _get_stack_trace_with_retry(unwinder, timeout=SHORT_TIMEOUT, 
condition=None):
+    """Get stack trace from an existing unwinder with retry for transient 
errors.
+
+    This handles the case where we want to reuse an existing RemoteUnwinder
+    instance but still handle transient failures like "Failed to parse initial
+    frame in chain" that can occur when sampling at an inopportune moment.
+    If condition is provided, keeps retrying until condition(traces) is True.
+    """
+    last_error = None
+    for _ in busy_retry(timeout):
+        try:
+            traces = unwinder.get_stack_trace()
+            if condition is None or condition(traces):
+                return traces
+            # Condition not met yet, keep retrying
+        except TRANSIENT_ERRORS as e:
+            last_error = e
+            continue
+    if last_error:
+        raise RuntimeError(
+            f"Failed to get stack trace after retries: {last_error}"
+        )
+    raise RuntimeError("Condition never satisfied within timeout")
+
+
 # ============================================================================
 # Base test class with shared infrastructure
 # ============================================================================
@@ -1708,16 +1733,16 @@ def main_work():
 
                     # Get stack trace with all threads
                     unwinder_all = RemoteUnwinder(p.pid, all_threads=True)
-                    for _ in range(MAX_TRIES):
-                        all_traces = unwinder_all.get_stack_trace()
-                        found = self._find_frame_in_trace(
-                            all_traces,
-                            lambda f: f.funcname == "main_work"
-                            and f.location.lineno > 12,
-                        )
-                        if found:
-                            break
-                        time.sleep(RETRY_DELAY)
+                    for _ in busy_retry(SHORT_TIMEOUT):
+                        with contextlib.suppress(*TRANSIENT_ERRORS):
+                            all_traces = unwinder_all.get_stack_trace()
+                            found = self._find_frame_in_trace(
+                                all_traces,
+                                lambda f: f.funcname == "main_work"
+                                and f.location.lineno > 12,
+                            )
+                            if found:
+                                break
                     else:
                         self.fail(
                             "Main thread did not start its busy work on time"
@@ -1727,7 +1752,7 @@ def main_work():
                     unwinder_gil = RemoteUnwinder(
                         p.pid, only_active_thread=True
                     )
-                    gil_traces = unwinder_gil.get_stack_trace()
+                    gil_traces = _get_stack_trace_with_retry(unwinder_gil)
 
                     # Count threads
                     total_threads = sum(
@@ -2002,15 +2027,15 @@ def busy():
                         mode=mode,
                         skip_non_matching_threads=False,
                     )
-                    for _ in range(MAX_TRIES):
-                        traces = unwinder.get_stack_trace()
-                        statuses = self._get_thread_statuses(traces)
+                    for _ in busy_retry(SHORT_TIMEOUT):
+                        with contextlib.suppress(*TRANSIENT_ERRORS):
+                            traces = unwinder.get_stack_trace()
+                            statuses = self._get_thread_statuses(traces)
 
-                        if check_condition(
-                            statuses, sleeper_tid, busy_tid
-                        ):
-                            break
-                        time.sleep(0.5)
+                            if check_condition(
+                                statuses, sleeper_tid, busy_tid
+                            ):
+                                break
 
                     return statuses, sleeper_tid, busy_tid
             finally:
@@ -2154,29 +2179,29 @@ def busy_thread():
                         mode=PROFILING_MODE_ALL,
                         skip_non_matching_threads=False,
                     )
-                    for _ in range(MAX_TRIES):
-                        traces = unwinder.get_stack_trace()
-                        statuses = self._get_thread_statuses(traces)
-
-                        # Check ALL mode provides both GIL and CPU info
-                        if (
-                            sleeper_tid in statuses
-                            and busy_tid in statuses
-                            and not (
-                                statuses[sleeper_tid]
-                                & THREAD_STATUS_ON_CPU
-                            )
-                            and not (
-                                statuses[sleeper_tid]
-                                & THREAD_STATUS_HAS_GIL
-                            )
-                            and (statuses[busy_tid] & THREAD_STATUS_ON_CPU)
-                            and (
-                                statuses[busy_tid] & THREAD_STATUS_HAS_GIL
-                            )
-                        ):
-                            break
-                        time.sleep(0.5)
+                    for _ in busy_retry(SHORT_TIMEOUT):
+                        with contextlib.suppress(*TRANSIENT_ERRORS):
+                            traces = unwinder.get_stack_trace()
+                            statuses = self._get_thread_statuses(traces)
+
+                            # Check ALL mode provides both GIL and CPU info
+                            if (
+                                sleeper_tid in statuses
+                                and busy_tid in statuses
+                                and not (
+                                    statuses[sleeper_tid]
+                                    & THREAD_STATUS_ON_CPU
+                                )
+                                and not (
+                                    statuses[sleeper_tid]
+                                    & THREAD_STATUS_HAS_GIL
+                                )
+                                and (statuses[busy_tid] & THREAD_STATUS_ON_CPU)
+                                and (
+                                    statuses[busy_tid] & THREAD_STATUS_HAS_GIL
+                                )
+                            ):
+                                break
 
                     self.assertIsNotNone(
                         sleeper_tid, "Sleeper thread id not received"
@@ -2300,18 +2325,18 @@ def test_thread_status_exception_detection(self):
                 mode=PROFILING_MODE_ALL,
                 skip_non_matching_threads=False,
             )
-            for _ in range(MAX_TRIES):
-                traces = unwinder.get_stack_trace()
-                statuses = self._get_thread_statuses(traces)
-
-                if (
-                    exception_tid in statuses
-                    and normal_tid in statuses
-                    and (statuses[exception_tid] & THREAD_STATUS_HAS_EXCEPTION)
-                    and not (statuses[normal_tid] & 
THREAD_STATUS_HAS_EXCEPTION)
-                ):
-                    break
-                time.sleep(0.5)
+            for _ in busy_retry(SHORT_TIMEOUT):
+                with contextlib.suppress(*TRANSIENT_ERRORS):
+                    traces = unwinder.get_stack_trace()
+                    statuses = self._get_thread_statuses(traces)
+
+                    if (
+                        exception_tid in statuses
+                        and normal_tid in statuses
+                        and (statuses[exception_tid] & 
THREAD_STATUS_HAS_EXCEPTION)
+                        and not (statuses[normal_tid] & 
THREAD_STATUS_HAS_EXCEPTION)
+                    ):
+                        break
 
             self.assertIn(exception_tid, statuses)
             self.assertIn(normal_tid, statuses)
@@ -2343,18 +2368,18 @@ def test_thread_status_exception_mode_filtering(self):
                 mode=PROFILING_MODE_EXCEPTION,
                 skip_non_matching_threads=True,
             )
-            for _ in range(MAX_TRIES):
-                traces = unwinder.get_stack_trace()
-                statuses = self._get_thread_statuses(traces)
-
-                if exception_tid in statuses:
-                    self.assertNotIn(
-                        normal_tid,
-                        statuses,
-                        "Normal thread should be filtered out in exception 
mode",
-                    )
-                    return
-                time.sleep(0.5)
+            for _ in busy_retry(SHORT_TIMEOUT):
+                with contextlib.suppress(*TRANSIENT_ERRORS):
+                    traces = unwinder.get_stack_trace()
+                    statuses = self._get_thread_statuses(traces)
+
+                    if exception_tid in statuses:
+                        self.assertNotIn(
+                            normal_tid,
+                            statuses,
+                            "Normal thread should be filtered out in exception 
mode",
+                        )
+                        return
 
             self.fail("Never found exception thread in exception mode")
 
@@ -2497,8 +2522,23 @@ def _run_scenario_process(self, scenario):
             finally:
                 _cleanup_sockets(client_socket, server_socket)
 
-    def _check_exception_status(self, p, thread_tid, expect_exception):
-        """Helper to check if thread has expected exception status."""
+    def _check_thread_status(
+        self, p, thread_tid, condition, condition_name="condition"
+    ):
+        """Helper to check thread status with a custom condition.
+
+        This waits until we see 3 consecutive samples where the condition
+        returns True, which confirms the thread has reached and is stable
+        in the expected state. Samples that don't match are ignored (the
+        thread may not have reached the expected state yet).
+
+        Args:
+            p: Process object with pid attribute
+            thread_tid: Thread ID to check
+            condition: Callable(statuses, thread_tid) -> bool that returns
+                       True when the thread is in the expected state
+            condition_name: Description of condition for error messages
+        """
         unwinder = RemoteUnwinder(
             p.pid,
             all_threads=True,
@@ -2506,40 +2546,37 @@ def _check_exception_status(self, p, thread_tid, 
expect_exception):
             skip_non_matching_threads=False,
         )
 
-        # Collect multiple samples for reliability
-        results = []
-        for _ in range(MAX_TRIES):
-            try:
+        # Wait for 3 consecutive samples matching expected state
+        matching_samples = 0
+        for _ in busy_retry(SHORT_TIMEOUT):
+            with contextlib.suppress(*TRANSIENT_ERRORS):
                 traces = unwinder.get_stack_trace()
-            except TRANSIENT_ERRORS:
-                time.sleep(RETRY_DELAY)
-                continue
-            statuses = self._get_thread_statuses(traces)
-
-            if thread_tid in statuses:
-                has_exc = bool(statuses[thread_tid] & 
THREAD_STATUS_HAS_EXCEPTION)
-                results.append(has_exc)
+                statuses = self._get_thread_statuses(traces)
 
-                if len(results) >= 3:
-                    break
+                if thread_tid in statuses:
+                    if condition(statuses, thread_tid):
+                        matching_samples += 1
+                        if matching_samples >= 3:
+                            return  # Success - confirmed stable in expected 
state
+                    else:
+                        # Thread not yet in expected state, reset counter
+                        matching_samples = 0
 
-            time.sleep(RETRY_DELAY)
+        self.fail(
+            f"Thread did not stabilize in expected state "
+            f"({condition_name}) within timeout"
+        )
 
-        # Check majority of samples match expected
-        if not results:
-            self.fail("Never found target thread in stack traces")
+    def _check_exception_status(self, p, thread_tid, expect_exception):
+        """Helper to check if thread has expected exception status."""
+        def condition(statuses, tid):
+            has_exc = bool(statuses[tid] & THREAD_STATUS_HAS_EXCEPTION)
+            return has_exc == expect_exception
 
-        majority = sum(results) > len(results) // 2
-        if expect_exception:
-            self.assertTrue(
-                majority,
-                f"Thread should have HAS_EXCEPTION flag, got {results}"
-            )
-        else:
-            self.assertFalse(
-                majority,
-                f"Thread should NOT have HAS_EXCEPTION flag, got {results}"
-            )
+        self._check_thread_status(
+            p, thread_tid, condition,
+            condition_name=f"expect_exception={expect_exception}"
+        )
 
     @unittest.skipIf(
         sys.platform not in ("linux", "darwin", "win32"),
@@ -3445,7 +3482,7 @@ def test_get_stats(self):
             _wait_for_signal(client_socket, b"ready")
 
             # Take a sample
-            unwinder.get_stack_trace()
+            _get_stack_trace_with_retry(unwinder)
 
             stats = unwinder.get_stats()
             client_socket.sendall(b"done")

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to