kaxil commented on a change in pull request #20443:
URL: https://github.com/apache/airflow/pull/20443#discussion_r774029297



##########
File path: airflow/listeners/events.py
##########
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+
+from sqlalchemy import event
+from sqlalchemy.orm import Session
+
+from airflow.listeners.listener import get_listener_manager
+from airflow.models import TaskInstance
+from airflow.utils.state import State
+
+
+def register_task_instance_state_events():
+    logger = logging.getLogger()
+
+    @event.listens_for(Session, 'after_flush', propagate=True)
+    def on_task_instance_state_session_flush(session, flush_context):
+        """
+        Listens for session.flush() events that modify TaskInstance's state, 
and notify listeners that listen
+        for that event. Doing it this way enable us to be stateless in the 
SQLAlchemy event listener.
+        """
+        for state in flush_context.states:
+            if isinstance(state.object, TaskInstance) and session.is_modified(
+                state.object, include_collections=False
+            ):
+                added, unchanged, deleted = 
flush_context.get_attribute_history(state, 'state')
+
+                logger.debug(f"session flush listener: added {added} unchanged 
{unchanged} deleted {deleted}")

Review comment:
       ```suggestion
                   logger.debug("Session flush listener: added %s unchanged %s 
deleted %s", added, unchanged, deleted)
   ```
   
   

##########
File path: airflow/listeners/listener.py
##########
@@ -0,0 +1,66 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pluggy
+
+from airflow.listeners import spec
+
+
+class Listener:
+    """Class used as a namespace for listener hook implementation namespace"""
+
+
+_listener_manager = None
+
+
+class ListenerManager:
+    """Class that manager registration of listeners and provides hook property 
for calling them"""

Review comment:
       ```suggestion
       """Class that manages registration of listeners and provides hook 
property for calling them"""
   ```

##########
File path: airflow/listeners/listener.py
##########
@@ -0,0 +1,66 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pluggy
+
+from airflow.listeners import spec
+
+
+class Listener:
+    """Class used as a namespace for listener hook implementation namespace"""
+
+
+_listener_manager = None
+
+
+class ListenerManager:
+    """Class that manager registration of listeners and provides hook property 
for calling them"""
+
+    def __init__(self):
+        self.pm = pluggy.PluginManager("airflow")
+        self.pm.add_hookspecs(spec)
+        self.listener_names = set()

Review comment:
       ```suggestion
           self.listener_names: Set[str] = set()
   ```

##########
File path: airflow/settings.py
##########
@@ -543,6 +543,9 @@ def initialize():
 # loaded from module.
 LAZY_LOAD_PROVIDERS = conf.getboolean('core', 'lazy_discover_providers', 
fallback=True)
 
+# By default running listeners on scheduler is disabled. Set it to True if you 
want to execute them.
+EXECUTE_LISTENERS_ON_SCHEDULER = conf.getboolean('core', 
'execute_listeners_on_scheduler', fallback=False)

Review comment:
       It needs to be added to 
https://github.com/apache/airflow/blob/main/airflow/config_templates/default_airflow.cfg
 and 
https://github.com/apache/airflow/blob/main/airflow/config_templates/config.yml 
too as we have got automated docs building from there - 
https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html

##########
File path: airflow/utils/orm_event_handlers.py
##########
@@ -86,3 +86,9 @@ def after_cursor_execute(conn, cursor, statement, parameters, 
context, executema
                 stack_info,
                 statement.replace("\n", " "),
             )
+
+    if conf.getboolean('core', 'execute_listeners_on_scheduler', 
fallback=False):

Review comment:
       ```suggestion
       if settings.EXECUTE_LISTENERS_ON_SCHEDULER:
   ```

##########
File path: airflow/listeners/listener.py
##########
@@ -0,0 +1,66 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pluggy
+
+from airflow.listeners import spec
+
+
+class Listener:
+    """Class used as a namespace for listener hook implementation namespace"""
+
+
+_listener_manager = None
+
+
+class ListenerManager:
+    """Class that manager registration of listeners and provides hook property 
for calling them"""
+
+    def __init__(self):
+        self.pm = pluggy.PluginManager("airflow")
+        self.pm.add_hookspecs(spec)
+        self.listener_names = set()
+
+    def has_listeners(self) -> bool:
+        return len(self.pm.get_plugins()) > 0
+
+    @property
+    def hook(self):

Review comment:
       Can we add TypeHints for this property

##########
File path: tests/listeners/test_listeners.py
##########
@@ -0,0 +1,127 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import os
+
+import pluggy
+import pytest as pytest
+
+os.environ["AIRFLOW__CORE__EXECUTE_LISTENERS_ON_SCHEDULER"] = "True"

Review comment:
       This will cause issues if for some reason this file is imported by any 
other tests, unlikely but still. 
   
   Can we instead use a pytest module level fixture and patch `os.environ`

##########
File path: airflow/jobs/local_task_job.py
##########
@@ -291,3 +293,19 @@ def _update_dagrun_state_for_paused_dag(self, 
session=None):
             if dag_run:
                 dag_run.dag = dag
                 dag_run.update_state(session=session, execute_callbacks=True)
+
+    @staticmethod
+    def _enable_task_listeners():
+        """
+        Check if we have any registered listeners, then register sqlalchemy 
hooks for
+        TI state change if we do.
+        """
+        from airflow.plugins_manager import integrate_listener_plugins
+
+        integrate_listener_plugins()
+        from airflow.listeners.listener import get_listener_manager
+
+        if get_listener_manager().has_listeners():
+            from airflow.listeners.events import 
register_task_instance_state_events

Review comment:
       Is there a reason why imports are in such order, if yes, can we add 
comments explaining that please




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to