GitHub user Susmit07 edited a comment on the discussion: S3 multipart upload 
for parquet

In nutshell the doc to be updated with:

S3 PutObject API:
-------------------

```
    // Function to stream HDFS file as a ByteString source
    private def streamHDFSFile: Source[ByteString, _] = {
      val hadoopConfig = new Configuration()
      val hdfsPath = new Path(hdfsFilePath)

      // Get the HDFS FileSystem and open the file stream
      val fs = FileSystem.get(hadoopConfig)
      HdfsSource.data(fs, hdfsPath)
    }
```

```
    // Function to get the file size from HDFS / content-length
    private def getHDFSFileSize: Long = {
      val hadoopConfig = new Configuration()
      val hdfsPath = new Path(hdfsFilePath)
      val fs = FileSystem.get(hadoopConfig)
      fs.getFileStatus(hdfsPath).getLen
    }
```

```
    // Function to upload small files using putObject with content length
    private def uploadSmallFileToS3(contentLength: Long): Future[_] = {
      val hdfsSource: Source[ByteString, _] = streamHDFSFile

      // Define the S3 putObject with content length
      val s3Headers = S3Headers.empty
      val s3PutObject = S3.putObject(
        bucket,
        bucketKey,
        hdfsSource,
        contentLength,
        ContentTypes.`application/octet-stream`,
        s3Headers
      )

      // Run the stream to upload the file using putObject
      s3PutObject.runWith(Sink.head).flatMap(objectMetadata => {
        println(s"Uploaded successfully with ETag: ${objectMetadata.eTag}")
        Future.successful(Done)
      })
    }
```

Configuration:
---------------

application.conf (src/resource)

```
s3 {
  region = "us-west-2"
  endpoint = ""
  credentials {
    access-key-id = ""
    secret-access-key = ""
  }
 # very important else bucket discovery fails.
  path-style-access = true
  marker {
    bucket = ${S3_MARKER_BUCKET}
  }
  sink {
    bucket = ""
  }
  ssl-enabled = true
}
```
Jokes apart, hope I get an acknowledgement :)

GitHub link: 
https://github.com/apache/pekko-connectors/discussions/870#discussioncomment-10996643

----
This is an automatically sent email for [email protected].
To unsubscribe, please send an email to: 
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to