ashb commented on a change in pull request #17937:
URL: https://github.com/apache/airflow/pull/17937#discussion_r723132242
##########
File path: airflow/providers/amazon/aws/transfers/ftp_to_s3.py
##########
@@ -53,52 +64,89 @@ class FTPToS3Operator(BaseOperator):
:type acl_policy: str
"""
- template_fields = (
- 's3_bucket',
- 's3_key',
- 'ftp_path',
- )
+ template_fields = ('ftp_path', 's3_bucket', 's3_key', 'ftp_filenames',
's3_filenames')
def __init__(
self,
- s3_bucket,
- s3_key,
- ftp_path,
- ftp_conn_id='ftp_default',
- aws_conn_id='aws_default',
- replace=False,
- encrypt=False,
- gzip=False,
- acl_policy=None,
- *args,
+ *,
+ ftp_path: str,
+ s3_bucket: str,
+ s3_key: str,
+ ftp_filenames: Optional[Union[str, List[str]]] = None,
+ s3_filenames: Optional[Union[str, List[str]]] = None,
+ ftp_conn_id: str = 'ftp_default',
+ aws_conn_id: str = 'aws_default',
+ replace: bool = False,
+ encrypt: bool = False,
+ gzip: bool = False,
+ acl_policy: str = None,
**kwargs,
):
- super().__init__(*args, **kwargs)
+ super().__init__(**kwargs)
+ self.ftp_path = ftp_path
self.s3_bucket = s3_bucket
self.s3_key = s3_key
- self.ftp_path = ftp_path
+ self.ftp_filenames = ftp_filenames
+ self.s3_filenames = s3_filenames
self.aws_conn_id = aws_conn_id
self.ftp_conn_id = ftp_conn_id
self.replace = replace
self.encrypt = encrypt
self.gzip = gzip
self.acl_policy = acl_policy
- def execute(self, context):
- s3_hook = S3Hook(self.aws_conn_id)
- ftp_hook = FTPHook(ftp_conn_id=self.ftp_conn_id)
+ self.ftp_hook = FTPHook(ftp_conn_id=self.ftp_conn_id)
+
+ def __upload_to_s3_from_ftp(self, remote_filename, s3_file_key):
+ s3 = S3Hook(self.aws_conn_id)
with NamedTemporaryFile() as local_tmp_file:
- ftp_hook.retrieve_file(
- remote_full_path=self.ftp_path,
local_full_path_or_buffer=local_tmp_file.name
+ self.ftp_hook.retrieve_file(
+ remote_full_path=remote_filename,
local_full_path_or_buffer=local_tmp_file.name
)
- s3_hook.load_file(
+ s3.load_file(
filename=local_tmp_file.name,
- key=self.s3_key,
+ key=s3_file_key,
bucket_name=self.s3_bucket,
replace=self.replace,
encrypt=self.encrypt,
gzip=self.gzip,
acl_policy=self.acl_policy,
)
+ self.log.info(f'File upload to {s3_file_key}')
+
+ def execute(self, context):
+ if self.ftp_filenames:
+ if isinstance(self.ftp_filenames, str):
+ self.log.info(f'Getting files in {self.ftp_path}')
+
+ list_dir = self.ftp_hook.list_directory(
+ path=self.ftp_path,
+ )
+
+ if self.ftp_filenames == 'all':
Review comment:
```suggestion
if self.ftp_filenames == '*':
```
Using a string of `all` here feels odd/error prone.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]