Brooke-white commented on a change in pull request #18447: URL: https://github.com/apache/airflow/pull/18447#discussion_r722744704
########## File path: airflow/providers/amazon/aws/operators/redshift.py ########## @@ -0,0 +1,73 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import List, Optional, Union + +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.redshift import RedshiftSQLHook + + +class RedshiftSQLOperator(BaseOperator): + """ + Executes SQL Statements against an Amazon Redshift cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:RedshiftSQLOperator` + + :param sql: the sql code to be executed + :type sql: Can receive a str representing a sql statement, + a list of str (sql statements) Review comment: fixed in 2456aaf118ae3955809ccda187cd16e32e1172d6 ########## File path: docs/apache-airflow-providers-amazon/connections/redshift.rst ########## @@ -0,0 +1,82 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. _howto/connection:redshift: + +Amazon Redshift Connection +========================== + +The Redshift connection type enables integrations with Redshift. + +Authenticating to Amazon Redshift +--------------------------------- + +Authentication may be performed using any of the authentication methods supported by `redshift_connector <https://github.com/aws/amazon-redshift-python-driver>`_ such as via direct credentials, IAM authentication, or using an Identity Provider (IdP) plugin. + +Default Connection IDs +----------------------- + +The default connection ID is ``redshift_default``. + +Configuring the Connection +-------------------------- + + +User + Specify the username to use for authentication with Amazon Redshift. + +Password + Specify the password to use for authentication with Amazon Redshift. + +Host + Specify the Amazon Redshift hostname. + +Database + Specify the Amazon Redshift database name. + +Extra + Specify the extra parameters (as json dictionary) that can be used in + Amazon Redshift connection. For a complete list of supported parameters + please see the `documentation <https://github.com/aws/amazon-redshift-python-driver#connection-parameters>`_ + for redshift_connector. + + +When specifying the connection in environment variable you should specify +it using URI syntax. + +Note that all components of the URI should be URL-encoded. + +Examples +-------- + +Database Authentication + +.. code-block:: bash + + AIRFLOW_CONN_REDSHIFT_DEFAULT=redshift://awsuser:passw...@redshift-cluster-1.123456789.us-west-1.redshift.amazonaws.com:5439/?database=dev&ssl=True + +IAM Authentication using AWS Profile + +.. code-block:: bash + + AIRFLOW_CONN_REDSHIFT_DEFAULT=redshift://:@:/?database=dev&iam=True&db_user=awsuser&cluster_identifier=redshift-cluster-1&profile=default Review comment: Thank you for noticing this detail! I've taken your advice and reworked these examples to use `Connection` and `get_uri()` in 6cfa652, so readers can see what the generated URL looks like. This approach is much more readable than the prior. Thanks for sharing re: SSM, it would definitely be useful in simplifying this use case :) ########## File path: airflow/providers/amazon/aws/operators/redshift.py ########## @@ -0,0 +1,73 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import List, Optional, Union + +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.redshift import RedshiftSQLHook + + +class RedshiftSQLOperator(BaseOperator): + """ + Executes SQL Statements against an Amazon Redshift cluster + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:RedshiftSQLOperator` + + :param sql: the sql code to be executed + :type sql: Can receive a str representing a sql statement, + a list of str (sql statements) + :param redshift_conn_id: reference to + :ref:`Amazon Redshift connection id<howto/connection:redshift>` + :type redshift_conn_id: str + :param parameters: (optional) the parameters to render the SQL query with. + :type parameters: dict or iterable + :param autocommit: if True, each command is automatically committed. + (default value: False) + :type autocommit: bool + """ + + template_fields = ('sql',) + template_ext = ('.sql',) + + def __init__( + self, + *, + sql: Union[str, List[str]], + redshift_conn_id: str = 'redshift_default', + parameters: Optional[dict] = None, Review comment: good catch -- thanks for noticing this. fixed in d7899a7 ########## File path: tests/providers/amazon/aws/hooks/test_redshift.py ########## @@ -103,3 +107,67 @@ def test_cluster_status_returns_available_cluster(self): hook = RedshiftHook(aws_conn_id='aws_default') status = hook.cluster_status('test_cluster') assert status == 'available' + + +class TestRedshiftSQLHookConn(unittest.TestCase): + def setUp(self): + super().setUp() + + self.connection = Connection( + conn_type='redshift', login='login', password='password', host='host', port=5439, schema="dev" + ) + + self.db_hook = RedshiftSQLHook() + self.db_hook.get_connection = mock.Mock() + self.db_hook.get_connection.return_value = self.connection + + def test_get_uri(self): + expected = 'redshift+redshift_connector://login:password@host:5439/dev' + x = self.db_hook.get_uri() + assert x == expected + + @mock.patch('airflow.providers.amazon.aws.hooks.redshift.redshift_connector.connect') + def test_get_conn(self, mock_connect): + self.db_hook.get_conn() + mock_connect.assert_called_once_with( + user='login', password='password', host='host', port=5439, database='dev' + ) + + @mock.patch('airflow.providers.amazon.aws.hooks.redshift.redshift_connector.connect') + def test_get_conn_extra(self, mock_connect): + self.connection.extra = json.dumps( + { + "iam": True, + "cluster_identifier": "my-test-cluster", + "profile": "default", + } + ) + self.db_hook.get_conn() + mock_connect.assert_called_once_with( + user='login', + password='password', + host='host', + port=5439, + cluster_identifier="my-test-cluster", + profile="default", + database='dev', + iam=True, + ) + + @parameterized.expand( + [ + ({}, {}, {}), + ({"login": "test"}, {}, {"user": "test"}), + ({}, {"user": "test"}, {"user": "test"}), + ({"login": "original"}, {"user": "overridden"}, {"user": "overridden"}), + ({"login": "test1"}, {"password": "test2"}, {"user": "test1", "password": "test2"}), + ], + ) + @mock.patch('airflow.providers.amazon.aws.hooks.redshift.redshift_connector.connect') + def test_get_conn_overrides_correctly(self, test_args, test_kwargs, expected_call_args, mock_connect): Review comment: good point -- i've fixed this in 3821f40 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
