kaxil commented on a change in pull request #20443: URL: https://github.com/apache/airflow/pull/20443#discussion_r774029297
########## File path: airflow/listeners/events.py ########## @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import logging + +from sqlalchemy import event +from sqlalchemy.orm import Session + +from airflow.listeners.listener import get_listener_manager +from airflow.models import TaskInstance +from airflow.utils.state import State + + +def register_task_instance_state_events(): + logger = logging.getLogger() + + @event.listens_for(Session, 'after_flush', propagate=True) + def on_task_instance_state_session_flush(session, flush_context): + """ + Listens for session.flush() events that modify TaskInstance's state, and notify listeners that listen + for that event. Doing it this way enable us to be stateless in the SQLAlchemy event listener. + """ + for state in flush_context.states: + if isinstance(state.object, TaskInstance) and session.is_modified( + state.object, include_collections=False + ): + added, unchanged, deleted = flush_context.get_attribute_history(state, 'state') + + logger.debug(f"session flush listener: added {added} unchanged {unchanged} deleted {deleted}") Review comment: ```suggestion logger.debug("Session flush listener: added %s unchanged %s deleted %s", added, unchanged, deleted) ``` ########## File path: airflow/listeners/listener.py ########## @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pluggy + +from airflow.listeners import spec + + +class Listener: + """Class used as a namespace for listener hook implementation namespace""" + + +_listener_manager = None + + +class ListenerManager: + """Class that manager registration of listeners and provides hook property for calling them""" Review comment: ```suggestion """Class that manages registration of listeners and provides hook property for calling them""" ``` ########## File path: airflow/listeners/listener.py ########## @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pluggy + +from airflow.listeners import spec + + +class Listener: + """Class used as a namespace for listener hook implementation namespace""" + + +_listener_manager = None + + +class ListenerManager: + """Class that manager registration of listeners and provides hook property for calling them""" + + def __init__(self): + self.pm = pluggy.PluginManager("airflow") + self.pm.add_hookspecs(spec) + self.listener_names = set() Review comment: ```suggestion self.listener_names: Set[str] = set() ``` ########## File path: airflow/settings.py ########## @@ -543,6 +543,9 @@ def initialize(): # loaded from module. LAZY_LOAD_PROVIDERS = conf.getboolean('core', 'lazy_discover_providers', fallback=True) +# By default running listeners on scheduler is disabled. Set it to True if you want to execute them. +EXECUTE_LISTENERS_ON_SCHEDULER = conf.getboolean('core', 'execute_listeners_on_scheduler', fallback=False) Review comment: It needs to be added to https://github.com/apache/airflow/blob/main/airflow/config_templates/default_airflow.cfg and https://github.com/apache/airflow/blob/main/airflow/config_templates/config.yml too as we have got automated docs building from there - https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html ########## File path: airflow/utils/orm_event_handlers.py ########## @@ -86,3 +86,9 @@ def after_cursor_execute(conn, cursor, statement, parameters, context, executema stack_info, statement.replace("\n", " "), ) + + if conf.getboolean('core', 'execute_listeners_on_scheduler', fallback=False): Review comment: ```suggestion if settings.EXECUTE_LISTENERS_ON_SCHEDULER: ``` ########## File path: airflow/listeners/listener.py ########## @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pluggy + +from airflow.listeners import spec + + +class Listener: + """Class used as a namespace for listener hook implementation namespace""" + + +_listener_manager = None + + +class ListenerManager: + """Class that manager registration of listeners and provides hook property for calling them""" + + def __init__(self): + self.pm = pluggy.PluginManager("airflow") + self.pm.add_hookspecs(spec) + self.listener_names = set() + + def has_listeners(self) -> bool: + return len(self.pm.get_plugins()) > 0 + + @property + def hook(self): Review comment: Can we add TypeHints for this property ########## File path: tests/listeners/test_listeners.py ########## @@ -0,0 +1,127 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os + +import pluggy +import pytest as pytest + +os.environ["AIRFLOW__CORE__EXECUTE_LISTENERS_ON_SCHEDULER"] = "True" Review comment: This will cause issues if for some reason this file is imported by any other tests, unlikely but still. Can we instead use a pytest module level fixture and patch `os.environ` ########## File path: airflow/jobs/local_task_job.py ########## @@ -291,3 +293,19 @@ def _update_dagrun_state_for_paused_dag(self, session=None): if dag_run: dag_run.dag = dag dag_run.update_state(session=session, execute_callbacks=True) + + @staticmethod + def _enable_task_listeners(): + """ + Check if we have any registered listeners, then register sqlalchemy hooks for + TI state change if we do. + """ + from airflow.plugins_manager import integrate_listener_plugins + + integrate_listener_plugins() + from airflow.listeners.listener import get_listener_manager + + if get_listener_manager().has_listeners(): + from airflow.listeners.events import register_task_instance_state_events Review comment: Is there a reason why imports are in such order, if yes, can we add comments explaining that please -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
