RosterIn commented on a change in pull request #4996: [AIRFLOW-4184] Add an
athena helper to insert into table
URL: https://github.com/apache/airflow/pull/4996#discussion_r279673416
##########
File path: airflow/contrib/hooks/aws_athena_hook.py
##########
@@ -148,3 +156,105 @@ def stop_query(self, query_execution_id):
:return: dict
"""
return
self.conn.stop_query_execution(QueryExecutionId=query_execution_id)
+
+
+class AWSAthenaHelpers(AWSAthenaHook):
+ """
+ The Athena Helpers contains helper methods to execute queries against
+ Athena. The methods can be used directly by operators.
+ """
+
+ def __init__(self, aws_conn_id='aws_default', region_name=None, *args,
**kwargs):
+ super(AWSAthenaHelpers, self).__init__(
+ aws_conn_id=aws_conn_id, region_name=region_name, **kwargs)
+ self.region_name = region_name
+ self.s3_hook = None
+ self.glue_hook = None
+
+ def get_s3_hook(self):
+ """
+ check if s3 hook exists already or create one and return it
+ :return: s3 hook
+ """
+ if not self.s3_hook:
+ self.s3_hook = S3Hook(
+ aws_conn_id=self.aws_conn_id, verify=self.verify)
+ return self.s3_hook
+
+ def get_glue_hook(self):
+ """
+ check if glue hook exists already or create one and return it
+ :return: glue hook
+ """
+ if not self.glue_hook:
+ self.glue_hook = AwsGlueCatalogHook(
+ aws_conn_id=self.aws_conn_id, region_name=self.region_name)
+ return self.glue_hook
+
+ def run_insert_into_table(self, src_db, src_table, dst_db, dst_table,
mode='error'):
+ """
+ insert data in s3 from the source table to the destination table
Review comment:
@bryanyang0528
Can you please explain the use case for this?
When I want to add data to Athena I just upload files to the same directory
and it works like a charm as Athena scans the whole folder.
For cases where I upload files to another path I just need to update the
table partition with:
`ALTER TABLE _____ADD IF NOT EXISTS PARTITION (dt=____) location 's3://____`
https://stackoverflow.com/questions/50164744/how-to-efficiently-append-new-data-to-table-in-aws-athena
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services