josh-fell commented on a change in pull request #21500:
URL: https://github.com/apache/airflow/pull/21500#discussion_r804755557
##########
File path: airflow/providers/amazon/aws/sensors/s3.py
##########
@@ -77,31 +77,24 @@ def __init__(
**kwargs,
):
super().__init__(**kwargs)
-
self.bucket_name = bucket_name
self.bucket_key = bucket_key
self.wildcard_match = wildcard_match
self.aws_conn_id = aws_conn_id
self.verify = verify
self.hook: Optional[S3Hook] = None
- def poke(self, context: 'Context'):
-
+ def _resolve_bucket_and_key(self):
+ """If key is URI, parse bucket"""
if self.bucket_name is None:
- parsed_url = urlparse(self.bucket_key)
- if parsed_url.netloc == '':
- raise AirflowException('If key is a relative path from root,
please provide a bucket_name')
- self.bucket_name = parsed_url.netloc
- self.bucket_key = parsed_url.path.lstrip('/')
+ self.bucket_name, self.bucket_key =
S3Hook.parse_s3_url(self.bucket_key)
else:
parsed_url = urlparse(self.bucket_key)
if parsed_url.scheme != '' or parsed_url.netloc != '':
- raise AirflowException(
- 'If bucket_name is provided, bucket_key'
- ' should be relative path from root'
- ' level, rather than a full s3:// url'
- )
+ raise AirflowException('If bucket_name provided, bucket_key
must be relative path, not URI.')
Review comment:
```suggestion
raise AirflowException('If bucket_name is provided,
bucket_key must be a relative path, not a URI.')
```
##########
File path: airflow/providers/amazon/aws/sensors/s3.py
##########
@@ -77,31 +77,24 @@ def __init__(
**kwargs,
):
super().__init__(**kwargs)
-
self.bucket_name = bucket_name
self.bucket_key = bucket_key
self.wildcard_match = wildcard_match
self.aws_conn_id = aws_conn_id
self.verify = verify
self.hook: Optional[S3Hook] = None
- def poke(self, context: 'Context'):
-
+ def _resolve_bucket_and_key(self):
+ """If key is URI, parse bucket"""
if self.bucket_name is None:
- parsed_url = urlparse(self.bucket_key)
- if parsed_url.netloc == '':
- raise AirflowException('If key is a relative path from root,
please provide a bucket_name')
- self.bucket_name = parsed_url.netloc
- self.bucket_key = parsed_url.path.lstrip('/')
+ self.bucket_name, self.bucket_key =
S3Hook.parse_s3_url(self.bucket_key)
else:
parsed_url = urlparse(self.bucket_key)
Review comment:
Doesn't have to happen in this PR but I wonder if
`S3Hook.parse_s3_url()` could be augmented with some sort of configurable check
between validating a URI vs. a relative path? Centralize some of this logic
there without have `urlparse()` is different places. Anyway, just a thought.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]