ashb commented on a change in pull request #4996: [AIRFLOW-4184] Add an athena
helper to insert into table
URL: https://github.com/apache/airflow/pull/4996#discussion_r270354946
##########
File path: airflow/contrib/hooks/aws_athena_hook.py
##########
@@ -148,3 +156,105 @@ def stop_query(self, query_execution_id):
:return: dict
"""
return
self.conn.stop_query_execution(QueryExecutionId=query_execution_id)
+
+
+class AWSAthenaHelpers(AWSAthenaHook):
+ """
+ The Athena Helpers contains helper methods to execute queries against
+ Athena. The methods can be used directly by operators.
+ """
+
+ def __init__(self, aws_conn_id='aws_default', region_name=None, *args,
**kwargs):
+ super(AWSAthenaHelpers, self).__init__(
+ aws_conn_id=aws_conn_id, region_name=region_name, **kwargs)
+ self.region_name = region_name
+ self.s3_hook = None
+ self.glue_hook = None
+
+ def get_s3_hook(self):
+ """
+ check if s3 hook exists already or create one and return it
+ :return: s3 hook
+ """
+ if not self.s3_hook:
+ self.s3_hook = S3Hook(
+ aws_conn_id=self.aws_conn_id, verify=self.verify)
+ return self.s3_hook
+
+ def get_glue_hook(self):
+ """
+ check if glue hook exists already or create one and return it
+ :return: glue hook
+ """
+ if not self.glue_hook:
+ self.glue_hook = AwsGlueCatalogHook(
+ aws_conn_id=self.aws_conn_id, region_name=self.region_name)
+ return self.glue_hook
+
+ def run_insert_into_table(self, src_db, src_table, dst_db, dst_table,
mode='error'):
+ """
+ insert data in s3 from the source table to the destination table
Review comment:
Why is this going via S3 rather than issuing an athena query?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services