rohangarg commented on code in PR #13741:
URL: https://github.com/apache/druid/pull/13741#discussion_r1098343416


##########
extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnector.java:
##########
@@ -78,35 +100,117 @@ public InputStream readRange(String path, long from, long 
size) throws IOExcepti
           size
       );
     }
-    return buildInputStream(new GetObjectRequest(config.getBucket(), 
objectPath(path)).withRange(from, from + size - 1));
+    return buildInputStream(
+        new GetObjectRequest(config.getBucket(), 
objectPath(path)).withRange(from, from + size - 1),
+        path
+    );
   }
 
-  private RetryingInputStream buildInputStream(GetObjectRequest 
getObjectRequest) throws IOException
+  private InputStream buildInputStream(GetObjectRequest getObjectRequest, 
String path)
   {
-    return new RetryingInputStream<>(
-        getObjectRequest,
-        new ObjectOpenFunction<GetObjectRequest>()
-        {
-          @Override
-          public InputStream open(GetObjectRequest object)
-          {
-            return s3Client.getObject(object).getObjectContent();
-          }
+    // fetch the size of the whole object to make chunks
+    long readEnd;
+    AtomicLong currReadStart = new AtomicLong(0);
+    if (getObjectRequest.getRange() != null) {
+      currReadStart.set(getObjectRequest.getRange()[0]);
+      readEnd = getObjectRequest.getRange()[1] + 1;
+    } else {
+      readEnd = this.s3Client.getObjectMetadata(config.getBucket(), 
objectPath(path)).getInstanceLength();
+    }
 
-          @Override
-          public InputStream open(GetObjectRequest object, long offset)
-          {
-            final GetObjectRequest offsetObjectRequest = new GetObjectRequest(
-                object.getBucketName(),
-                object.getKey()
+    // build a sequence input stream from chunks
+    return new SequenceInputStream(new Enumeration<InputStream>()
+    {
+      @Override
+      public boolean hasMoreElements()
+      {
+        // don't stop until the whole object is downloaded
+        return currReadStart.get() < readEnd;
+      }
+
+      @Override
+      public InputStream nextElement()
+      {
+        File outFile = new File(config.getTempDir().getAbsolutePath(), 
UUID.randomUUID().toString());
+        // in a single chunk, only download a maximum of 
DOWNLOAD_MAX_CHUNK_SIZE
+        long endPoint = Math.min(currReadStart.get() + 
DOWNLOAD_MAX_CHUNK_SIZE, readEnd) - 1;
+        try {
+          if (!outFile.createNewFile()) {
+            throw new IOE(
+                StringUtils.format(
+                    "Could not create temporary file [%s] for copying [%s]",
+                    outFile.getAbsolutePath(),
+                    objectPath(path)
+                )
             );
-            offsetObjectRequest.setRange(offset);
-            return open(offsetObjectRequest);
           }
-        },
-        S3Utils.S3RETRY,
-        config.getMaxRetry()
-    );
+          FileUtils.copyLarge(
+              () -> new RetryingInputStream<>(
+                  new GetObjectRequest(
+                      config.getBucket(),
+                      objectPath(path)
+                  ).withRange(currReadStart.get(), endPoint),
+                  new ObjectOpenFunction<GetObjectRequest>()
+                  {
+                    @Override
+                    public InputStream open(GetObjectRequest object)
+                    {
+                      return s3Client.getObject(object).getObjectContent();
+                    }
+
+                    @Override
+                    public InputStream open(GetObjectRequest object, long 
offset)
+                    {
+                      if (object.getRange() != null) {
+                        long[] oldRange = object.getRange();
+                        object.setRange(oldRange[0] + offset, oldRange[1]);
+                      } else {
+                        object.setRange(offset);
+                      }
+                      return open(object);
+                    }
+                  },
+                  S3Utils.S3RETRY,
+                  config.getMaxRetry()
+              ),
+              outFile,
+              new byte[8 * 1024],
+              Predicates.alwaysFalse(),
+              1,
+              StringUtils.format("Retrying copying of [%s] to [%s]", 
objectPath(path), outFile.getAbsolutePath())
+          );
+        }
+        catch (IOException e) {
+          throw new UncheckedIOException(e);
+        }
+        try {
+          AtomicBoolean isClosed = new AtomicBoolean(false);
+          return new FileInputStream(outFile)
+          {
+            @Override
+            public void close() throws IOException
+            {
+              // close should be idempotent
+              if (isClosed.get()) {
+                return;
+              }

Review Comment:
   I agree too that if close is being called multiple times, it could indicate 
some loose contract in the execution layer. Will check that independently - 
trying to push this change through since it unblocks the usage of durable 
storage feature in MSQ.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to