This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 22c7c6136 [core] Adjust VectoredReadable to better read performance
(#3430)
22c7c6136 is described below
commit 22c7c6136619563fe026dd854a7a05c10ee30ea8
Author: Jingsong Lee <[email protected]>
AuthorDate: Thu May 30 15:28:21 2024 +0800
[core] Adjust VectoredReadable to better read performance (#3430)
---
.../apache/paimon/benchmark/TableReadBenchmark.java | 13 +++++++++++++
.../org/apache/paimon/fs/VectoredReadUtils.java | 21 ++++++++++++++++++++-
.../java/org/apache/paimon/fs/VectoredReadable.java | 2 +-
3 files changed, 34 insertions(+), 2 deletions(-)
diff --git
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
index 5ceaa95f4..2c6a00dfa 100644
---
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
+++
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
@@ -94,6 +94,19 @@ public class TableReadBenchmark extends TableBenchmark {
*/
}
+ @Test
+ public void testOrcReadProjection1() throws Exception {
+ innerTestProjection(
+ Collections.singletonMap("orc", prepareData(orc(), "orc")),
new int[] {10});
+ /*
+ * OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
+ * Apple M1 Pro
+ * read: Best/Avg Time(ms) Row Rate(K/s)
Per Row(ns) Relative
+ *
------------------------------------------------------------------------------------------------
+ * OPERATORTEST_read_read-orc 716 / 728 4187.4
238.8 1.0X
+ */
+ }
+
private Options orc() {
Options options = new Options();
options.set(CoreOptions.FILE_FORMAT, CoreOptions.FILE_FORMAT_ORC);
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java
b/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java
index de7974aca..156bdb81f 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java
@@ -19,6 +19,7 @@
package org.apache.paimon.fs;
import org.apache.paimon.utils.BlockingExecutor;
+import org.apache.paimon.utils.IOUtils;
import java.io.EOFException;
import java.io.IOException;
@@ -46,10 +47,17 @@ public class VectoredReadUtils {
return;
}
+ List<? extends FileRange> sortRanges = validateAndSortRanges(ranges);
List<CombinedRange> combinedRanges =
- mergeSortedRanges(validateAndSortRanges(ranges),
readable.minSeekForVectorReads());
+ mergeSortedRanges(sortRanges,
readable.minSeekForVectorReads());
int parallelism = readable.parallelismForVectorReads();
+
+ if (combinedRanges.size() == 1) {
+ fallbackToReadSequence(readable, sortRanges);
+ return;
+ }
+
BlockingExecutor executor = new BlockingExecutor(IO_THREAD_POOL,
parallelism);
long batchSize = readable.batchSizeForVectorReads();
for (CombinedRange combinedRange : combinedRanges) {
@@ -69,6 +77,17 @@ public class VectoredReadUtils {
}
}
+ private static void fallbackToReadSequence(
+ VectoredReadable readable, List<? extends FileRange> ranges)
throws IOException {
+ SeekableInputStream in = (SeekableInputStream) readable;
+ for (FileRange range : ranges) {
+ byte[] bytes = new byte[range.getLength()];
+ in.seek(range.getOffset());
+ IOUtils.readFully(in, bytes);
+ range.getData().complete(bytes);
+ }
+ }
+
private static void readSingleRange(VectoredReadable readable, FileRange
range) {
if (range.getLength() == 0) {
range.getData().complete(new byte[0]);
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadable.java
b/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadable.java
index a3d3faace..2aa28b662 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadable.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadable.java
@@ -59,7 +59,7 @@ public interface VectoredReadable {
/** The batch size of data read by a single parallelism. */
default int batchSizeForVectorReads() {
- return 1024 * 1024;
+ return 4 * 1024 * 1024;
}
/** The read parallelism for vector reads. */