This is an automated email from the ASF dual-hosted git repository.
zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new 9572b842 [ISSUE-451][Improvement] Read HDFS data files with random
sequence to distribute pressure (#452)
9572b842 is described below
commit 9572b8423fff8f3dfa52411dae1caa4d33f08663
Author: Junfan Zhang <[email protected]>
AuthorDate: Tue Jan 3 16:26:22 2023 +0800
[ISSUE-451][Improvement] Read HDFS data files with random sequence to
distribute pressure (#452)
### What changes were proposed in this pull request?
[Improvement] Read HDFS data files with random sequence to distribute
pressure #452
### Why are the changes needed?
In PR https://github.com/apache/incubator-uniffle/pull/396 to support
concurrently writing single partition's data into multiple HDFS files, it's
better to randomly read HDFS data files to distribute stress in client side.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing UTs
---
.../uniffle/storage/handler/impl/HdfsClientReadHandler.java | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git
a/storage/src/main/java/org/apache/uniffle/storage/handler/impl/HdfsClientReadHandler.java
b/storage/src/main/java/org/apache/uniffle/storage/handler/impl/HdfsClientReadHandler.java
index 259fc7ac..8c76b368 100644
---
a/storage/src/main/java/org/apache/uniffle/storage/handler/impl/HdfsClientReadHandler.java
+++
b/storage/src/main/java/org/apache/uniffle/storage/handler/impl/HdfsClientReadHandler.java
@@ -18,8 +18,9 @@
package org.apache.uniffle.storage.handler.impl;
import java.io.FileNotFoundException;
-import java.util.Comparator;
+import java.util.Collections;
import java.util.List;
+import java.util.stream.Collectors;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
@@ -143,7 +144,10 @@ public class HdfsClientReadHandler extends
AbstractClientReadHandler {
LOG.warn("Can't create ShuffleReaderHandler for " + filePrefix, e);
}
}
-
readHandlers.sort(Comparator.comparing(HdfsShuffleReadHandler::getFilePrefix));
+ Collections.shuffle(readHandlers);
+ LOG.info("Reading order of HDFS files with name prefix: {}",
+ readHandlers.stream().map(x ->
x.filePrefix).collect(Collectors.toList())
+ );
}
}