(airflow) branch main updated: Improve detection of the airflow code vs. test code in db isolation (#41127)

potiuk Tue, 30 Jul 2024 11:30:37 -0700

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git



The following commit(s) were added to refs/heads/main by this push:
     new 4e70337baa Improve detection of the airflow code vs. test code in db 
isolation (#41127)
4e70337baa is described below

commit 4e70337baa96476a8b6f13e136d6c8a9a4f8f3a1
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue Jul 30 20:30:25 2024 +0200

    Improve detection of the airflow code vs. test code in db isolation (#41127)
    
    The current code has been only checking when the session is created
    in tests - to see if the aiflow code is not using the session,
    However, Sometimes when session is created in the tests and passed
    to "airflow" code (say running a task) - the session can be
    passed directly and it does not matter where the session is
    created but where it is used. This PR implements checking of
    that - and handles better the edge cases.
    
    The message is also improved in this case - we show the exact
    case where the session was used in airflow, on top of the
    whole stacktrace so you should be able to see easily what
    the problem is.
    
    Related: #41067
---
 airflow/settings.py | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/airflow/settings.py b/airflow/settings.py
index eb4053f50e..e1915ab807 100644
--- a/airflow/settings.py
+++ b/airflow/settings.py
@@ -293,6 +293,7 @@ class TracebackSession:
 
 AIRFLOW_PATH = os.path.dirname(os.path.dirname(__file__))
 AIRFLOW_TESTS_PATH = os.path.join(AIRFLOW_PATH, "tests")
+AIRFLOW_SETTINGS_PATH = os.path.join(AIRFLOW_PATH, "airflow", "settings.py")
 
 
 class TracebackSessionForTests:
@@ -309,26 +310,31 @@ class TracebackSessionForTests:
     db_session_class = None
 
     def __init__(self):
-        self.traceback = traceback.extract_stack()
         self.current_db_session = TracebackSessionForTests.db_session_class()
+        self.created_traceback = traceback.extract_stack()
 
     def __getattr__(self, item):
-        if self.is_called_from_test_code():
+        test_code, frame_summary = self.is_called_from_test_code()
+        if test_code:
             return getattr(self.current_db_session, item)
         raise RuntimeError(
             "TracebackSessionForTests object was used but internal API is 
enabled. "
-            "Only test code is allowed to use this object. "
+            "Only test code is allowed to use this object.\n"
+            f"Called from:\n    {frame_summary.filename}: 
{frame_summary.lineno}{frame_summary.colno}\n"
+            f"     {frame_summary.line}\n\n"
             "You'll need to ensure you are making only RPC calls with this 
object. "
-            "The stack list below will show where the TracebackSession object 
was created."
-            + "\n".join(traceback.format_list(self.traceback))
+            "The stack list below will show where the TracebackSession object 
was called:\n"
+            + "".join(traceback.format_list(self.traceback))
+            + "\n\nThe stack list below will show where the TracebackSession 
object was created:\n"
+            + "".join(traceback.format_list(self.created_traceback))
         )
 
     def remove(*args, **kwargs):
         pass
 
-    def is_called_from_test_code(self) -> bool:
+    def is_called_from_test_code(self) -> tuple[bool, traceback.FrameSummary | 
None]:
         """
-        Check if the object was created from test code.
+        Check if the traceback session was used from the test code.
 
         This is done by checking if the first "airflow" filename in the 
traceback
         is "airflow/tests" or "regular airflow".
@@ -336,12 +342,21 @@ class TracebackSessionForTests:
         :meta: private
         :return: True if the object was created from test code, False 
otherwise.
         """
-        for tb in self.traceback:
+        self.traceback = traceback.extract_stack()
+        if any(filename.endswith("conftest.py") for filename, _, _, _ in 
self.traceback):
+            return True, None
+        for tb in self.traceback[::-1]:
+            # Skip first two settings.py file (will be always here - because 
we call it from here
+            if tb.filename == AIRFLOW_SETTINGS_PATH:
+                continue
             if tb.filename.startswith(AIRFLOW_PATH):
-                # if this is the also "test" code, we are good, otherwise we 
are in Airflow code
-                return tb.filename.startswith(AIRFLOW_TESTS_PATH)
+                if tb.filename.startswith(AIRFLOW_TESTS_PATH):
+                    return True, None
+                else:
+                    return False, tb
         # if it is from elsewhere.... Why???? We should return False in order 
to crash to find out
-        return False
+        # The traceback line will be always 3rd (two bottom ones are Airflow)
+        return False, self.traceback[-2]
 
 
 def _is_sqlite_db_path_relative(sqla_conn_str: str) -> bool:

(airflow) branch main updated: Improve detection of the airflow code vs. test code in db isolation (#41127)

Reply via email to