tatiana commented on code in PR #50392:
URL: https://github.com/apache/airflow/pull/50392#discussion_r2084127402


##########
providers/databricks/src/airflow/providers/databricks/hooks/databricks_sql.py:
##########
@@ -309,3 +318,83 @@ def bulk_dump(self, table, tmp_file):
 
     def bulk_load(self, table, tmp_file):
         raise NotImplementedError()
+
+    def get_openlineage_database_info(self, connection) -> DatabaseInfo:
+        from airflow.providers.openlineage.sqlparser import DatabaseInfo
+
+        return DatabaseInfo(
+            scheme=self.get_openlineage_database_dialect(connection),
+            authority=self._get_openlineage_authority(connection),
+            database=self.catalog,
+            information_schema_columns=[
+                "table_schema",
+                "table_name",
+                "column_name",
+                "ordinal_position",
+                "data_type",
+                "table_catalog",
+            ],
+            is_information_schema_cross_db=True,
+        )
+
+    def get_openlineage_database_dialect(self, _) -> str:
+        return "databricks"
+
+    def get_openlineage_default_schema(self) -> str | None:
+        return self.schema or "default"
+
+    def _get_openlineage_authority(self, _) -> str | None:
+        return self.host
+
+    def get_openlineage_database_specific_lineage(self, task_instance) -> 
OperatorLineage | None:
+        """
+        Generate OpenLineage metadata for a Databricks task instance based on 
executed query IDs.
+
+        If a single query ID is present, attach an `ExternalQueryRunFacet` to 
the lineage metadata.
+        If multiple query IDs are present, emits separate OpenLineage events 
for each query instead.
+
+        Note that `get_openlineage_database_specific_lineage` is usually 
called after task's execution,

Review Comment:
   In which circumstances would this not be called after the task execution?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to