JDarDagran commented on code in PR #35794:
URL: https://github.com/apache/airflow/pull/35794#discussion_r1402686751
##########
airflow/providers/amazon/aws/hooks/redshift_sql.py:
##########
@@ -174,3 +175,63 @@ def get_conn(self) -> RedshiftConnection:
conn_kwargs_dejson = self.conn.extra_dejson
conn_kwargs: dict = {**conn_params, **conn_kwargs_dejson}
return redshift_connector.connect(**conn_kwargs)
+
+ def get_openlineage_database_info(self, connection) -> DatabaseInfo:
+ """Returns Redshift specific information for OpenLineage."""
+ from airflow.providers.openlineage.sqlparser import DatabaseInfo
+
+ authority = self._get_openlineage_redshift_authority_part(connection)
+
+ return DatabaseInfo(
+ scheme="redshift",
+ authority=authority,
+ database=connection.schema,
+ information_schema_table_name="SVV_REDSHIFT_COLUMNS",
+ information_schema_columns=[
+ "schema_name",
+ "table_name",
+ "column_name",
+ "ordinal_position",
+ "data_type",
+ "database_name",
+ ],
+ is_information_schema_cross_db=True,
+ use_flat_cross_db_query=True,
+ )
+
+ def _get_openlineage_redshift_authority_part(self, connection) -> str:
+ from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
+
+ port = connection.port or 5439
+
+ cluster_identifier = None
+
+ if connection.extra_dejson.get("iam", False):
+ cluster_identifier =
connection.extra_dejson.get("cluster_identifier")
+ region_name = AwsBaseHook(aws_conn_id=self.aws_conn_id).region_name
Review Comment:
It's not very well documented in`redshift-connector` what's the precedence.
Now I think these are the cases:
1. IAM = True:
a. `cluster_identifier` from `connect` argument, rest from default AWS
profile.
b. `cluster_identifier` but also `access_key_id`,
`secret_access_key`,`session_token`, `region` from `connect` aguments
c. `cluster_identifier` from `connect` argument but without credentials
passed explicitly, not sure if `region` has precedence over value from AWS
default profile
2. IAM = False: attempt to retrieve region name from hostname
For 1.c. I think `get_iam_token` does not include `region` as argument so it
default to AWS default profiles` value.
Feedback from anyone closer to this connector implementation would be really
helpful :)
I was looking at
https://github.com/aws/amazon-redshift-python-driver/blob/master/README.rst
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]