Lee-W commented on code in PR #43826: URL: https://github.com/apache/airflow/pull/43826#discussion_r1841686146
########## airflow/dag_processing/collection.py: ########## @@ -425,3 +427,51 @@ def add_task_asset_references( for task_id, asset_id in referenced_outlets if (task_id, asset_id) not in orm_refs ) + + def add_asset_trigger_references( + self, assets: dict[tuple[str, str], AssetModel], *, session: Session + ) -> None: + # Update references from assets being used + for name_uri, asset in self.assets.items(): + asset_model = assets[name_uri] + trigger_class_path_to_asset_dict: dict[str, BaseTrigger] = { + trigger.serialize()[0]: trigger for trigger in asset.watchers + } + + trigger_class_paths_from_asset: set[str] = set(trigger_class_path_to_asset_dict.keys()) + trigger_class_paths_from_asset_model: set[str] = { + trigger.classpath for trigger in asset_model.triggers + } + + # Optimization: no diff between the DB and DAG definitions, no update needed + if trigger_class_paths_from_asset == trigger_class_paths_from_asset_model: + continue + + refs_to_add = trigger_class_paths_from_asset - trigger_class_paths_from_asset_model + refs_to_remove = trigger_class_paths_from_asset_model - trigger_class_paths_from_asset + + # Remove old references + asset_model.triggers = [ + trigger for trigger in asset_model.triggers if trigger.classpath not in refs_to_remove + ] + + # Add new references + for trigger_class_path in refs_to_add: + trigger_model = session.scalar( + select(Trigger).where(Trigger.classpath == trigger_class_path).limit(1) + ) + + # Create the trigger in the DB if it does not exist + if not trigger_model: + trigger_model = Trigger.from_object(trigger_class_path_to_asset_dict[trigger_class_path]) + session.add(trigger_model) Review Comment: Not sure whether collect all the models together and use `add_all` would be better 🤔 ########## airflow/assets/__init__.py: ########## @@ -301,10 +328,14 @@ def __init__( self.uri = _sanitize_uri(_validate_non_empty_identifier(self, fields["uri"], uri)) self.group = _validate_identifier(self, fields["group"], group) if group else self.asset_type self.extra = _set_extra_default(extra) + self.watchers = watchers or [] def __fspath__(self) -> str: return self.uri + def __hash__(self) -> int: Review Comment: It was removed after discussing with @uranusjr . seems to introduce more trouble than benefit then. is there any reason we want to add it here? ########## airflow/assets/__init__.py: ########## @@ -266,20 +269,43 @@ class Asset(os.PathLike, BaseAsset): uri: str group: str extra: dict[str, Any] + watchers: list[BaseTrigger] = [] Review Comment: do we need to use field instead? not sure how attrs deals with default values ########## airflow/dag_processing/collection.py: ########## @@ -425,3 +427,51 @@ def add_task_asset_references( for task_id, asset_id in referenced_outlets if (task_id, asset_id) not in orm_refs ) + + def add_asset_trigger_references( + self, assets: dict[tuple[str, str], AssetModel], *, session: Session + ) -> None: + # Update references from assets being used + for name_uri, asset in self.assets.items(): + asset_model = assets[name_uri] + trigger_class_path_to_asset_dict: dict[str, BaseTrigger] = { + trigger.serialize()[0]: trigger for trigger in asset.watchers + } + + trigger_class_paths_from_asset: set[str] = set(trigger_class_path_to_asset_dict.keys()) + trigger_class_paths_from_asset_model: set[str] = { + trigger.classpath for trigger in asset_model.triggers + } + + # Optimization: no diff between the DB and DAG definitions, no update needed + if trigger_class_paths_from_asset == trigger_class_paths_from_asset_model: + continue + + refs_to_add = trigger_class_paths_from_asset - trigger_class_paths_from_asset_model + refs_to_remove = trigger_class_paths_from_asset_model - trigger_class_paths_from_asset + + # Remove old references + asset_model.triggers = [ + trigger for trigger in asset_model.triggers if trigger.classpath not in refs_to_remove + ] + + # Add new references + for trigger_class_path in refs_to_add: + trigger_model = session.scalar( + select(Trigger).where(Trigger.classpath == trigger_class_path).limit(1) + ) + + # Create the trigger in the DB if it does not exist + if not trigger_model: + trigger_model = Trigger.from_object(trigger_class_path_to_asset_dict[trigger_class_path]) + session.add(trigger_model) + + asset_model.triggers.append(trigger_model) + + # Remove references from assets no longer used + all_assets = session.scalars(select(AssetModel)) + # orphan_assets = set() + for asset_model in all_assets: + if (asset_model.name, asset_model.uri) not in self.assets: + asset_model.triggers = [] + # orphan_assets.add(asset_model.id) Review Comment: ```suggestion ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org