ashb commented on a change in pull request #11612:
URL: https://github.com/apache/airflow/pull/11612#discussion_r507609214
##########
File path: airflow/providers/amazon/aws/hooks/sagemaker.py
##########
@@ -105,18 +105,18 @@ def secondary_training_status_message(job_description,
prev_description):
:return: Job status string to be printed.
"""
- if (
- job_description is None
- or job_description.get('SecondaryStatusTransitions') is None
- or len(job_description.get('SecondaryStatusTransitions')) == 0
- ):
+ if job_description is None:
+ return ''
+
+ secondary_status_transitions =
job_description.get('SecondaryStatusTransitions')
+ if secondary_status_transitions is None or
len(secondary_status_transitions) == 0:
Review comment:
```suggestion
if len(job_description.get('SecondaryStatusTransitions', [])) == 0:
```
##########
File path: airflow/providers/amazon/aws/operators/sagemaker_base.py
##########
@@ -41,12 +41,12 @@ class SageMakerBaseOperator(BaseOperator):
integer_fields = [] # type: Iterable[Iterable[str]]
@apply_defaults
- def __init__(self, *, config, aws_conn_id='aws_default', **kwargs):
+ def __init__(self, *, config: dict, aws_conn_id: str = 'aws_default',
**kwargs):
super().__init__(**kwargs)
self.aws_conn_id = aws_conn_id
self.config = config
- self.hook = None
+ self.hook = SageMakerHook(aws_conn_id=self.aws_conn_id)
Review comment:
Please don't do this -- hooks can instantiate resources, and so should
not be created in operator constructors, but only created when needed.
Either annotate this as a `self.hook: Optional[SageMakerHook] = None`
or
```python
@cached_property
def hook(self):
return SageMakerHook(aws_conn_id=self.aws_conn_id)
```
(you'll need an import for that last one, we already use this elsewhere,
aws/base_hook.py for instance.
##########
File path: airflow/providers/amazon/aws/hooks/sagemaker.py
##########
@@ -298,7 +298,7 @@ def multi_stream_iter(self, log_group, streams,
positions=None):
self.logs_hook.get_log_events(log_group, s,
positions[s].timestamp, positions[s].skip)
for s in streams
]
- events = []
+ events: List[Optional[Any]] = []
Review comment:
`Optional[Any]` seems a bit odd -- Any would do.
##########
File path: airflow/providers/amazon/aws/sensors/sagemaker_training.py
##########
@@ -72,6 +75,15 @@ def get_sagemaker_response(self):
if self.print_log:
if not self.log_resource_inited:
self.init_log_resource(self.get_hook())
+ if (
+ self.instance_count is None
+ or self.state is None
+ or self.last_description is None
+ or self.last_describe_job_call is None
+ ):
+ raise AirflowException(
+ "instance_count, state, last_description,
last_describe_job_call must be specified"
Review comment:
Are you sure about this?
If this is the case, we should raise this in the constructor so it is a DAG
parse time error, rather than a run/execution time error
##########
File path: airflow/providers/amazon/aws/sensors/sagemaker_training.py
##########
@@ -72,6 +75,15 @@ def get_sagemaker_response(self):
if self.print_log:
if not self.log_resource_inited:
self.init_log_resource(self.get_hook())
+ if (
+ self.instance_count is None
+ or self.state is None
+ or self.last_description is None
+ or self.last_describe_job_call is None
+ ):
+ raise AirflowException(
Review comment:
```suggestion
raise ValueError(
```
is more appropriate than an AirflowException.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]