This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 8f6948093 [core][lookup] Add writer benchmark for sort and hash file 
store (#3828)
8f6948093 is described below

commit 8f69480934e59da4e7b4163c1594d7d49d0a525e
Author: Fang Yong <[email protected]>
AuthorDate: Mon Jul 29 11:42:04 2024 +0800

    [core][lookup] Add writer benchmark for sort and hash file store (#3828)
---
 .../benchmark/lookup/AbstractLookupBenchmark.java  |  39 +++++++
 .../{ => lookup}/LookupBloomFilterBenchmark.java   |  23 +---
 .../benchmark/lookup/LookupWriterBenchmark.java    | 116 +++++++++++++++++++++
 3 files changed, 159 insertions(+), 19 deletions(-)

diff --git 
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java
 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java
new file mode 100644
index 000000000..652142941
--- /dev/null
+++ 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.benchmark.lookup;
+
+/** Abstract benchmark class for lookup. */
+abstract class AbstractLookupBenchmark {
+    protected static final int[] VALUE_LENGTHS = {0, 500, 1000, 2000, 4000};
+
+    protected byte[][] generateSequenceInputs(int start, int end) {
+        int count = end - start;
+        byte[][] result = new byte[count][4];
+        for (int i = 0; i < count; i++) {
+            result[i] = intToByteArray(i);
+        }
+        return result;
+    }
+
+    protected byte[] intToByteArray(int value) {
+        return new byte[] {
+            (byte) (value >>> 24), (byte) (value >>> 16), (byte) (value >>> 
8), (byte) value
+        };
+    }
+}
diff --git 
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java
 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java
similarity index 87%
rename from 
paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java
rename to 
paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java
index a65f1364f..68f9b38ff 100644
--- 
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java
+++ 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java
@@ -16,8 +16,9 @@
  * limitations under the License.
  */
 
-package org.apache.paimon.benchmark;
+package org.apache.paimon.benchmark.lookup;
 
+import org.apache.paimon.benchmark.Benchmark;
 import org.apache.paimon.io.cache.CacheManager;
 import org.apache.paimon.lookup.hash.HashLookupStoreFactory;
 import org.apache.paimon.lookup.hash.HashLookupStoreReader;
@@ -36,7 +37,7 @@ import java.util.UUID;
 import java.util.concurrent.ThreadLocalRandom;
 
 /** Benchmark for measure the bloom filter for lookup. */
-public class LookupBloomFilterBenchmark {
+public class LookupBloomFilterBenchmark extends AbstractLookupBenchmark {
 
     @TempDir Path tempDir;
     ThreadLocalRandom rnd = ThreadLocalRandom.current();
@@ -57,15 +58,6 @@ public class LookupBloomFilterBenchmark {
                 "lookup", generateSequenceInputs(0, 100000), 
generateRandomInputs(100000, 200000));
     }
 
-    private byte[][] generateSequenceInputs(int start, int end) {
-        int count = end - start;
-        byte[][] result = new byte[count][4];
-        for (int i = 0; i < count; i++) {
-            result[i] = intToByteArray(i);
-        }
-        return result;
-    }
-
     private byte[][] generateRandomInputs(int start, int end) {
         int count = end - start;
         byte[][] result = new byte[count][4];
@@ -75,18 +67,11 @@ public class LookupBloomFilterBenchmark {
         return result;
     }
 
-    public byte[] intToByteArray(int value) {
-        return new byte[] {
-            (byte) (value >>> 24), (byte) (value >>> 16), (byte) (value >>> 
8), (byte) value
-        };
-    }
-
     public void innerTest(String name, byte[][] inputs, byte[][] probe) throws 
Exception {
         Benchmark benchmark =
                 new Benchmark(name, 
probe.length).setNumWarmupIters(1).setOutputPerIteration(true);
 
-        int[] valueLengths = {0, 500, 1000, 2000};
-        for (int valueLength : valueLengths) {
+        for (int valueLength : VALUE_LENGTHS) {
             HashLookupStoreReader reader = writeData(null, inputs, 
valueLength);
 
             benchmark.addCase(
diff --git 
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java
 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java
new file mode 100644
index 000000000..84a1d6cd4
--- /dev/null
+++ 
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.benchmark.lookup;
+
+import org.apache.paimon.CoreOptions;
+import org.apache.paimon.benchmark.Benchmark;
+import org.apache.paimon.data.serializer.RowCompactedSerializer;
+import org.apache.paimon.io.cache.CacheManager;
+import org.apache.paimon.lookup.LookupStoreFactory;
+import org.apache.paimon.lookup.LookupStoreWriter;
+import org.apache.paimon.options.MemorySize;
+import 
org.apache.paimon.testutils.junit.parameterized.ParameterizedTestExtension;
+import org.apache.paimon.testutils.junit.parameterized.Parameters;
+import org.apache.paimon.types.IntType;
+import org.apache.paimon.types.RowType;
+
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.UUID;
+
+import static org.apache.paimon.CoreOptions.LOOKUP_LOCAL_FILE_TYPE;
+
+/** Benchmark for measuring the throughput of writing for lookup. */
+@ExtendWith(ParameterizedTestExtension.class)
+public class LookupWriterBenchmark extends AbstractLookupBenchmark {
+
+    private final int recordCount;
+    @TempDir Path tempDir;
+
+    public LookupWriterBenchmark(int recordCount) {
+        this.recordCount = recordCount;
+    }
+
+    @Parameters(name = "record-count-{0}")
+    public static List<Integer> getVarSeg() {
+        return Arrays.asList(1000000, 5000000, 10000000, 15000000, 20000000);
+    }
+
+    @TestTemplate
+    void testLookupWriter() {
+        writeLookupDataBenchmark(generateSequenceInputs(0, recordCount));
+    }
+
+    public void writeLookupDataBenchmark(byte[][] inputs) {
+        Benchmark benchmark =
+                new Benchmark("writer-" + inputs.length, inputs.length)
+                        .setNumWarmupIters(1)
+                        .setOutputPerIteration(true);
+        for (int valueLength : VALUE_LENGTHS) {
+            for (CoreOptions.LookupLocalFileType fileType :
+                    CoreOptions.LookupLocalFileType.values()) {
+                CoreOptions options =
+                        CoreOptions.fromMap(
+                                Collections.singletonMap(
+                                        LOOKUP_LOCAL_FILE_TYPE.key(), 
fileType.name()));
+                benchmark.addCase(
+                        String.format(
+                                "%s-write-%dB-value-%d-num",
+                                fileType.name(), valueLength, inputs.length),
+                        5,
+                        () -> {
+                            try {
+                                writeData(options, inputs, valueLength);
+                            } catch (IOException e) {
+                                throw new RuntimeException(e);
+                            }
+                        });
+            }
+        }
+
+        benchmark.run();
+    }
+
+    private void writeData(CoreOptions options, byte[][] inputs, int 
valueLength)
+            throws IOException {
+        byte[] value = new byte[valueLength];
+        Arrays.fill(value, (byte) 1);
+        LookupStoreFactory factory =
+                LookupStoreFactory.create(
+                        options,
+                        new CacheManager(MemorySize.ofMebiBytes(10)),
+                        new RowCompactedSerializer(RowType.of(new IntType()))
+                                .createSliceComparator());
+
+        File file = new File(tempDir.toFile(), UUID.randomUUID().toString());
+        LookupStoreWriter writer = factory.createWriter(file, null);
+        for (byte[] input : inputs) {
+            writer.put(input, value);
+        }
+        writer.close();
+    }
+}

Reply via email to