ashb commented on a change in pull request #4996: [AIRFLOW-4184] Add an athena 
helper to insert into table
URL: https://github.com/apache/airflow/pull/4996#discussion_r289008153
 
 

 ##########
 File path: airflow/contrib/hooks/aws_athena_hook.py
 ##########
 @@ -148,3 +156,105 @@ def stop_query(self, query_execution_id):
         :return: dict
         """
         return 
self.conn.stop_query_execution(QueryExecutionId=query_execution_id)
+
+
+class AWSAthenaHelpers(AWSAthenaHook):
+    """
+    The Athena Helpers contains helper methods to execute queries against
+    Athena. The methods can be used directly by operators.
+    """
+
+    def __init__(self, aws_conn_id='aws_default', region_name=None, *args, 
**kwargs):
+        super(AWSAthenaHelpers, self).__init__(
+            aws_conn_id=aws_conn_id, region_name=region_name, **kwargs)
+        self.region_name = region_name
+        self.s3_hook = None
+        self.glue_hook = None
+
+    def get_s3_hook(self):
+        """
+        check if s3 hook exists already or create one and return it
+        :return: s3 hook
+        """
+        if not self.s3_hook:
+            self.s3_hook = S3Hook(
+                aws_conn_id=self.aws_conn_id, verify=self.verify)
+        return self.s3_hook
+
+    def get_glue_hook(self):
+        """
+        check if glue hook exists already or create one and return it
+        :return: glue hook
+        """
+        if not self.glue_hook:
+            self.glue_hook = AwsGlueCatalogHook(
+                aws_conn_id=self.aws_conn_id, region_name=self.region_name)
+        return self.glue_hook
+
+    def run_insert_into_table(self, src_db, src_table, dst_db, dst_table, 
mode='error'):
+        """
+        insert data in s3 from the source table to the destination table
 
 Review comment:
   If (and it is still and if) we add this to Airflow this function belongs in 
an Operator, not a Hook.
   
   Additionally you are putting S3 files in place but doing nothing to update 
the Glue catalog.
   
   Overall I am skeptical that this approach is the right way of doing it. In 
the past I have done this sort of thing from an EMR cluster using the 
Glue-compatible hive metastore to insert into tables.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to