romibuzi commented on code in PR #27893:
URL: https://github.com/apache/airflow/pull/27893#discussion_r1032472780
##########
airflow/providers/amazon/aws/hooks/glue.py:
##########
@@ -92,10 +93,51 @@ def __init__(
kwargs["client_type"] = "glue"
super().__init__(*args, **kwargs)
+ def create_glue_job_config(self) -> dict:
+ if self.s3_bucket is None:
+ raise AirflowException("Could not initialize glue job, error:
Specify Parameter `s3_bucket`")
+
+ default_command = {
+ "Name": "glueetl",
+ "ScriptLocation": self.script_location,
+ }
+ command = self.create_job_kwargs.pop("Command", default_command)
+
+ s3_log_path =
f"s3://{self.s3_bucket}/{self.s3_glue_logs}{self.job_name}"
+ execution_role = self.get_iam_execution_role()
+
+ if "WorkerType" in self.create_job_kwargs and "NumberOfWorkers" in
self.create_job_kwargs:
+ return dict(
+ Name=self.job_name,
+ Description=self.desc,
+ LogUri=s3_log_path,
+ Role=execution_role["Role"]["Arn"],
+ ExecutionProperty={"MaxConcurrentRuns":
self.concurrent_run_limit},
+ Command=command,
+ MaxRetries=self.retry_limit,
+ **self.create_job_kwargs,
+ )
+ else:
+ return dict(
+ Name=self.job_name,
+ Description=self.desc,
+ LogUri=s3_log_path,
+ Role=execution_role["Role"]["Arn"],
+ ExecutionProperty={"MaxConcurrentRuns":
self.concurrent_run_limit},
+ Command=command,
+ MaxRetries=self.retry_limit,
+ MaxCapacity=self.num_of_dpus,
+ **self.create_job_kwargs,
+ )
+
+ @cached_property
+ def glue_client(self):
+ """:return: AWS Glue client"""
+ return self.get_conn()
Review Comment:
Oh good catch! I have removed it and made use of `AwsBaseHook.get_conn()`
instead
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]