This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 22c7c6136 [core] Adjust VectoredReadable to better read performance 
(#3430)
22c7c6136 is described below

commit 22c7c6136619563fe026dd854a7a05c10ee30ea8
Author: Jingsong Lee <[email protected]>
AuthorDate: Thu May 30 15:28:21 2024 +0800

    [core] Adjust VectoredReadable to better read performance (#3430)
---
 .../apache/paimon/benchmark/TableReadBenchmark.java | 13 +++++++++++++
 .../org/apache/paimon/fs/VectoredReadUtils.java     | 21 ++++++++++++++++++++-
 .../java/org/apache/paimon/fs/VectoredReadable.java |  2 +-
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git 
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
index 5ceaa95f4..2c6a00dfa 100644
--- 
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
+++ 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
@@ -94,6 +94,19 @@ public class TableReadBenchmark extends TableBenchmark {
          */
     }
 
+    @Test
+    public void testOrcReadProjection1() throws Exception {
+        innerTestProjection(
+                Collections.singletonMap("orc", prepareData(orc(), "orc")), 
new int[] {10});
+        /*
+         * OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
+         * Apple M1 Pro
+         * read:                            Best/Avg Time(ms)    Row Rate(K/s) 
     Per Row(ns)   Relative
+         * 
------------------------------------------------------------------------------------------------
+         * OPERATORTEST_read_read-orc            716 /  728           4187.4   
         238.8       1.0X
+         */
+    }
+
     private Options orc() {
         Options options = new Options();
         options.set(CoreOptions.FILE_FORMAT, CoreOptions.FILE_FORMAT_ORC);
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java 
b/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java
index de7974aca..156bdb81f 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java
@@ -19,6 +19,7 @@
 package org.apache.paimon.fs;
 
 import org.apache.paimon.utils.BlockingExecutor;
+import org.apache.paimon.utils.IOUtils;
 
 import java.io.EOFException;
 import java.io.IOException;
@@ -46,10 +47,17 @@ public class VectoredReadUtils {
             return;
         }
 
+        List<? extends FileRange> sortRanges = validateAndSortRanges(ranges);
         List<CombinedRange> combinedRanges =
-                mergeSortedRanges(validateAndSortRanges(ranges), 
readable.minSeekForVectorReads());
+                mergeSortedRanges(sortRanges, 
readable.minSeekForVectorReads());
 
         int parallelism = readable.parallelismForVectorReads();
+
+        if (combinedRanges.size() == 1) {
+            fallbackToReadSequence(readable, sortRanges);
+            return;
+        }
+
         BlockingExecutor executor = new BlockingExecutor(IO_THREAD_POOL, 
parallelism);
         long batchSize = readable.batchSizeForVectorReads();
         for (CombinedRange combinedRange : combinedRanges) {
@@ -69,6 +77,17 @@ public class VectoredReadUtils {
         }
     }
 
+    private static void fallbackToReadSequence(
+            VectoredReadable readable, List<? extends FileRange> ranges) 
throws IOException {
+        SeekableInputStream in = (SeekableInputStream) readable;
+        for (FileRange range : ranges) {
+            byte[] bytes = new byte[range.getLength()];
+            in.seek(range.getOffset());
+            IOUtils.readFully(in, bytes);
+            range.getData().complete(bytes);
+        }
+    }
+
     private static void readSingleRange(VectoredReadable readable, FileRange 
range) {
         if (range.getLength() == 0) {
             range.getData().complete(new byte[0]);
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadable.java 
b/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadable.java
index a3d3faace..2aa28b662 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadable.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadable.java
@@ -59,7 +59,7 @@ public interface VectoredReadable {
 
     /** The batch size of data read by a single parallelism. */
     default int batchSizeForVectorReads() {
-        return 1024 * 1024;
+        return 4 * 1024 * 1024;
     }
 
     /** The read parallelism for vector reads. */

Reply via email to