Lee-W commented on code in PR #43826:
URL: https://github.com/apache/airflow/pull/43826#discussion_r1841686146


##########
airflow/dag_processing/collection.py:
##########
@@ -425,3 +427,51 @@ def add_task_asset_references(
                 for task_id, asset_id in referenced_outlets
                 if (task_id, asset_id) not in orm_refs
             )
+
+    def add_asset_trigger_references(
+        self, assets: dict[tuple[str, str], AssetModel], *, session: Session
+    ) -> None:
+        # Update references from assets being used
+        for name_uri, asset in self.assets.items():
+            asset_model = assets[name_uri]
+            trigger_class_path_to_asset_dict: dict[str, BaseTrigger] = {
+                trigger.serialize()[0]: trigger for trigger in asset.watchers
+            }
+
+            trigger_class_paths_from_asset: set[str] = 
set(trigger_class_path_to_asset_dict.keys())
+            trigger_class_paths_from_asset_model: set[str] = {
+                trigger.classpath for trigger in asset_model.triggers
+            }
+
+            # Optimization: no diff between the DB and DAG definitions, no 
update needed
+            if trigger_class_paths_from_asset == 
trigger_class_paths_from_asset_model:
+                continue
+
+            refs_to_add = trigger_class_paths_from_asset - 
trigger_class_paths_from_asset_model
+            refs_to_remove = trigger_class_paths_from_asset_model - 
trigger_class_paths_from_asset
+
+            # Remove old references
+            asset_model.triggers = [
+                trigger for trigger in asset_model.triggers if 
trigger.classpath not in refs_to_remove
+            ]
+
+            # Add new references
+            for trigger_class_path in refs_to_add:
+                trigger_model = session.scalar(
+                    select(Trigger).where(Trigger.classpath == 
trigger_class_path).limit(1)
+                )
+
+                # Create the trigger in the DB if it does not exist
+                if not trigger_model:
+                    trigger_model = 
Trigger.from_object(trigger_class_path_to_asset_dict[trigger_class_path])
+                    session.add(trigger_model)

Review Comment:
   Not sure whether collect all the models together and use `add_all` would be 
better 🤔 



##########
airflow/assets/__init__.py:
##########
@@ -301,10 +328,14 @@ def __init__(
         self.uri = _sanitize_uri(_validate_non_empty_identifier(self, 
fields["uri"], uri))
         self.group = _validate_identifier(self, fields["group"], group) if 
group else self.asset_type
         self.extra = _set_extra_default(extra)
+        self.watchers = watchers or []
 
     def __fspath__(self) -> str:
         return self.uri
 
+    def __hash__(self) -> int:

Review Comment:
   It was removed after discussing with @uranusjr . seems to introduce more 
trouble than benefit then. is there any reason we want to add it here?



##########
airflow/assets/__init__.py:
##########
@@ -266,20 +269,43 @@ class Asset(os.PathLike, BaseAsset):
     uri: str
     group: str
     extra: dict[str, Any]
+    watchers: list[BaseTrigger] = []

Review Comment:
   do we need to use field instead? not sure how attrs deals with default values



##########
airflow/dag_processing/collection.py:
##########
@@ -425,3 +427,51 @@ def add_task_asset_references(
                 for task_id, asset_id in referenced_outlets
                 if (task_id, asset_id) not in orm_refs
             )
+
+    def add_asset_trigger_references(
+        self, assets: dict[tuple[str, str], AssetModel], *, session: Session
+    ) -> None:
+        # Update references from assets being used
+        for name_uri, asset in self.assets.items():
+            asset_model = assets[name_uri]
+            trigger_class_path_to_asset_dict: dict[str, BaseTrigger] = {
+                trigger.serialize()[0]: trigger for trigger in asset.watchers
+            }
+
+            trigger_class_paths_from_asset: set[str] = 
set(trigger_class_path_to_asset_dict.keys())
+            trigger_class_paths_from_asset_model: set[str] = {
+                trigger.classpath for trigger in asset_model.triggers
+            }
+
+            # Optimization: no diff between the DB and DAG definitions, no 
update needed
+            if trigger_class_paths_from_asset == 
trigger_class_paths_from_asset_model:
+                continue
+
+            refs_to_add = trigger_class_paths_from_asset - 
trigger_class_paths_from_asset_model
+            refs_to_remove = trigger_class_paths_from_asset_model - 
trigger_class_paths_from_asset
+
+            # Remove old references
+            asset_model.triggers = [
+                trigger for trigger in asset_model.triggers if 
trigger.classpath not in refs_to_remove
+            ]
+
+            # Add new references
+            for trigger_class_path in refs_to_add:
+                trigger_model = session.scalar(
+                    select(Trigger).where(Trigger.classpath == 
trigger_class_path).limit(1)
+                )
+
+                # Create the trigger in the DB if it does not exist
+                if not trigger_model:
+                    trigger_model = 
Trigger.from_object(trigger_class_path_to_asset_dict[trigger_class_path])
+                    session.add(trigger_model)
+
+                asset_model.triggers.append(trigger_model)
+
+        # Remove references from assets no longer used
+        all_assets = session.scalars(select(AssetModel))
+        # orphan_assets = set()
+        for asset_model in all_assets:
+            if (asset_model.name, asset_model.uri) not in self.assets:
+                asset_model.triggers = []
+                # orphan_assets.add(asset_model.id)

Review Comment:
   ```suggestion
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to