This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 8f6948093 [core][lookup] Add writer benchmark for sort and hash file
store (#3828)
8f6948093 is described below
commit 8f69480934e59da4e7b4163c1594d7d49d0a525e
Author: Fang Yong <[email protected]>
AuthorDate: Mon Jul 29 11:42:04 2024 +0800
[core][lookup] Add writer benchmark for sort and hash file store (#3828)
---
.../benchmark/lookup/AbstractLookupBenchmark.java | 39 +++++++
.../{ => lookup}/LookupBloomFilterBenchmark.java | 23 +---
.../benchmark/lookup/LookupWriterBenchmark.java | 116 +++++++++++++++++++++
3 files changed, 159 insertions(+), 19 deletions(-)
diff --git
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java
new file mode 100644
index 000000000..652142941
--- /dev/null
+++
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.benchmark.lookup;
+
+/** Abstract benchmark class for lookup. */
+abstract class AbstractLookupBenchmark {
+ protected static final int[] VALUE_LENGTHS = {0, 500, 1000, 2000, 4000};
+
+ protected byte[][] generateSequenceInputs(int start, int end) {
+ int count = end - start;
+ byte[][] result = new byte[count][4];
+ for (int i = 0; i < count; i++) {
+ result[i] = intToByteArray(i);
+ }
+ return result;
+ }
+
+ protected byte[] intToByteArray(int value) {
+ return new byte[] {
+ (byte) (value >>> 24), (byte) (value >>> 16), (byte) (value >>>
8), (byte) value
+ };
+ }
+}
diff --git
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java
similarity index 87%
rename from
paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java
rename to
paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java
index a65f1364f..68f9b38ff 100644
---
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java
+++
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java
@@ -16,8 +16,9 @@
* limitations under the License.
*/
-package org.apache.paimon.benchmark;
+package org.apache.paimon.benchmark.lookup;
+import org.apache.paimon.benchmark.Benchmark;
import org.apache.paimon.io.cache.CacheManager;
import org.apache.paimon.lookup.hash.HashLookupStoreFactory;
import org.apache.paimon.lookup.hash.HashLookupStoreReader;
@@ -36,7 +37,7 @@ import java.util.UUID;
import java.util.concurrent.ThreadLocalRandom;
/** Benchmark for measure the bloom filter for lookup. */
-public class LookupBloomFilterBenchmark {
+public class LookupBloomFilterBenchmark extends AbstractLookupBenchmark {
@TempDir Path tempDir;
ThreadLocalRandom rnd = ThreadLocalRandom.current();
@@ -57,15 +58,6 @@ public class LookupBloomFilterBenchmark {
"lookup", generateSequenceInputs(0, 100000),
generateRandomInputs(100000, 200000));
}
- private byte[][] generateSequenceInputs(int start, int end) {
- int count = end - start;
- byte[][] result = new byte[count][4];
- for (int i = 0; i < count; i++) {
- result[i] = intToByteArray(i);
- }
- return result;
- }
-
private byte[][] generateRandomInputs(int start, int end) {
int count = end - start;
byte[][] result = new byte[count][4];
@@ -75,18 +67,11 @@ public class LookupBloomFilterBenchmark {
return result;
}
- public byte[] intToByteArray(int value) {
- return new byte[] {
- (byte) (value >>> 24), (byte) (value >>> 16), (byte) (value >>>
8), (byte) value
- };
- }
-
public void innerTest(String name, byte[][] inputs, byte[][] probe) throws
Exception {
Benchmark benchmark =
new Benchmark(name,
probe.length).setNumWarmupIters(1).setOutputPerIteration(true);
- int[] valueLengths = {0, 500, 1000, 2000};
- for (int valueLength : valueLengths) {
+ for (int valueLength : VALUE_LENGTHS) {
HashLookupStoreReader reader = writeData(null, inputs,
valueLength);
benchmark.addCase(
diff --git
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java
new file mode 100644
index 000000000..84a1d6cd4
--- /dev/null
+++
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.benchmark.lookup;
+
+import org.apache.paimon.CoreOptions;
+import org.apache.paimon.benchmark.Benchmark;
+import org.apache.paimon.data.serializer.RowCompactedSerializer;
+import org.apache.paimon.io.cache.CacheManager;
+import org.apache.paimon.lookup.LookupStoreFactory;
+import org.apache.paimon.lookup.LookupStoreWriter;
+import org.apache.paimon.options.MemorySize;
+import
org.apache.paimon.testutils.junit.parameterized.ParameterizedTestExtension;
+import org.apache.paimon.testutils.junit.parameterized.Parameters;
+import org.apache.paimon.types.IntType;
+import org.apache.paimon.types.RowType;
+
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.UUID;
+
+import static org.apache.paimon.CoreOptions.LOOKUP_LOCAL_FILE_TYPE;
+
+/** Benchmark for measuring the throughput of writing for lookup. */
+@ExtendWith(ParameterizedTestExtension.class)
+public class LookupWriterBenchmark extends AbstractLookupBenchmark {
+
+ private final int recordCount;
+ @TempDir Path tempDir;
+
+ public LookupWriterBenchmark(int recordCount) {
+ this.recordCount = recordCount;
+ }
+
+ @Parameters(name = "record-count-{0}")
+ public static List<Integer> getVarSeg() {
+ return Arrays.asList(1000000, 5000000, 10000000, 15000000, 20000000);
+ }
+
+ @TestTemplate
+ void testLookupWriter() {
+ writeLookupDataBenchmark(generateSequenceInputs(0, recordCount));
+ }
+
+ public void writeLookupDataBenchmark(byte[][] inputs) {
+ Benchmark benchmark =
+ new Benchmark("writer-" + inputs.length, inputs.length)
+ .setNumWarmupIters(1)
+ .setOutputPerIteration(true);
+ for (int valueLength : VALUE_LENGTHS) {
+ for (CoreOptions.LookupLocalFileType fileType :
+ CoreOptions.LookupLocalFileType.values()) {
+ CoreOptions options =
+ CoreOptions.fromMap(
+ Collections.singletonMap(
+ LOOKUP_LOCAL_FILE_TYPE.key(),
fileType.name()));
+ benchmark.addCase(
+ String.format(
+ "%s-write-%dB-value-%d-num",
+ fileType.name(), valueLength, inputs.length),
+ 5,
+ () -> {
+ try {
+ writeData(options, inputs, valueLength);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+ }
+
+ benchmark.run();
+ }
+
+ private void writeData(CoreOptions options, byte[][] inputs, int
valueLength)
+ throws IOException {
+ byte[] value = new byte[valueLength];
+ Arrays.fill(value, (byte) 1);
+ LookupStoreFactory factory =
+ LookupStoreFactory.create(
+ options,
+ new CacheManager(MemorySize.ofMebiBytes(10)),
+ new RowCompactedSerializer(RowType.of(new IntType()))
+ .createSliceComparator());
+
+ File file = new File(tempDir.toFile(), UUID.randomUUID().toString());
+ LookupStoreWriter writer = factory.createWriter(file, null);
+ for (byte[] input : inputs) {
+ writer.put(input, value);
+ }
+ writer.close();
+ }
+}