phanikumv commented on code in PR #31360:
URL: https://github.com/apache/airflow/pull/31360#discussion_r1227644081


##########
airflow/providers/sftp/operators/sftp.py:
##########
@@ -188,3 +191,85 @@ def execute(self, context: Any) -> str | list[str] | None:
             raise AirflowException(f"Error while transferring {file_msg}, 
error: {str(e)}")
 
         return self.local_filepath
+
+    def get_openlineage_facets_on_start(self):
+        try:
+            from openlineage.client.run import Dataset
+
+            from airflow.providers.openlineage.extractors import 
OperatorLineage
+        except ImportError:
+            return None
+
+        scheme = "file"
+        local_host = socket.gethostname()
+        try:
+            local_host = socket.gethostbyname(local_host)
+        except Exception as e:
+            self.log.warning(
+                f"Failed to resolve local hostname. Using the hostname got by 
socket.gethostbyname() without resolution. {e}",  # noqa: E501
+                exc_info=True,
+            )
+
+        if self.sftp_hook is not None:
+            hook = self.sftp_hook
+        elif self.ssh_hook is not None:
+            hook = self.ssh_hook
+        else:
+            hook = SFTPHook(ssh_conn_id=self.ssh_conn_id)
+
+        if self.remote_host is not None:
+            remote_host = self.remote_host
+        else:
+            remote_host = hook.get_connection(hook.ssh_conn_id).host
+        try:
+            remote_host = socket.gethostbyname(remote_host)
+        except Exception as e:
+            self.log.warning(
+                f"Failed to resolve remote hostname. Using the provided 
hostname without resolution. {e}",  # noqa: E501
+                exc_info=True,
+            )
+
+        if hasattr(hook, "port"):
+            remote_port = hook.port
+        elif hasattr(hook, "ssh_hook"):
+            remote_port = hook.ssh_hook.port
+
+        # Since v4.1.0, SFTPOperator accepts both a string (single file) and a 
list of
+        # strings (multiple files) as local_filepath and remote_filepath, and 
internally
+        # keeps them as list in both cases. But before 4.1.0, only single 
string is
+        # allowed. So we consider both cases here for backward compatibility.
+        if isinstance(self.local_filepath, str):
+            local_filepath = [self.local_filepath]
+        else:
+            local_filepath = self.local_filepath
+        if isinstance(self.remote_filepath, str):
+            remote_filepath = [self.remote_filepath]
+        else:
+            remote_filepath = self.remote_filepath
+
+        local_datasets = [
+            Dataset(namespace=self._get_namespace(scheme, local_host, None, 
path), name=path)
+            for path in local_filepath
+        ]
+        remote_datasets = [
+            Dataset(namespace=self._get_namespace(scheme, remote_host, 
remote_port, path), name=path)
+            for path in remote_filepath
+        ]
+        print(remote_datasets)

Review Comment:
   please remove `print` here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to