This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new e6df3b72e [core] Add TableFormatBenchmark to test format only
e6df3b72e is described below
commit e6df3b72e6cba499d53f91e0911564e02598ca75
Author: Jingsong <[email protected]>
AuthorDate: Tue Mar 26 16:58:56 2024 +0800
[core] Add TableFormatBenchmark to test format only
---
.../apache/paimon/benchmark/TableBenchmark.java | 14 ++--
...eadBenchmark.java => TableFormatBenchmark.java} | 81 +++++++---------------
.../paimon/benchmark/TableReadBenchmark.java | 33 ++++++---
3 files changed, 56 insertions(+), 72 deletions(-)
diff --git
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableBenchmark.java
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableBenchmark.java
index 0df1bc2ec..c7bd2c22e 100644
---
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableBenchmark.java
+++
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableBenchmark.java
@@ -41,8 +41,6 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import static java.util.Collections.singletonList;
-
/** Base class for table benchmark. */
public class TableBenchmark {
@@ -53,6 +51,11 @@ public class TableBenchmark {
private final RandomDataGenerator random = new RandomDataGenerator();
protected Table createTable(Options tableOptions, String tableName) throws
Exception {
+ return createTable(tableOptions, tableName,
Collections.singletonList("k"));
+ }
+
+ protected Table createTable(Options tableOptions, String tableName,
List<String> primaryKeys)
+ throws Exception {
Options catalogOptions = new Options();
catalogOptions.set(CatalogOptions.WAREHOUSE,
tempFile.toUri().toString());
Catalog catalog =
CatalogFactory.createCatalog(CatalogContext.create(catalogOptions));
@@ -66,12 +69,7 @@ public class TableBenchmark {
}
tableOptions.set(CoreOptions.SNAPSHOT_NUM_RETAINED_MAX, 10);
Schema schema =
- new Schema(
- fields,
- Collections.emptyList(),
- singletonList("k"),
- tableOptions.toMap(),
- "");
+ new Schema(fields, Collections.emptyList(), primaryKeys,
tableOptions.toMap(), "");
Identifier identifier = Identifier.create(database, tableName);
catalog.createTable(identifier, schema, false);
return catalog.getTable(identifier);
diff --git
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableFormatBenchmark.java
similarity index 51%
copy from
paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
copy to
paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableFormatBenchmark.java
index f089ef234..c5648a785 100644
---
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
+++
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableFormatBenchmark.java
@@ -18,7 +18,6 @@
package org.apache.paimon.benchmark;
-import org.apache.paimon.CoreOptions;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.options.Options;
import org.apache.paimon.reader.RecordReader;
@@ -31,89 +30,59 @@ import org.apache.paimon.table.source.Split;
import org.junit.jupiter.api.Test;
-import java.util.LinkedHashMap;
+import java.util.Collections;
import java.util.List;
-import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
/** Benchmark for table read. */
-public class TableReadBenchmark extends TableBenchmark {
+public class TableFormatBenchmark extends TableBenchmark {
- private final int rowCount = 1000000;
+ private final int rowCount = 10000000;
@Test
public void testRead() throws Exception {
- Map<String, Table> tables = new LinkedHashMap<>();
- tables.put("orc", prepareData(orc(), "orc"));
- tables.put("parquet", prepareData(parquet(), "parquet"));
- tables.put("avro", prepareData(avro(), "avro"));
-
- innerTest(tables);
+ innerTest(prepareData());
/*
* OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
* Apple M1 Pro
* read: Best/Avg Time(ms) Row Rate(K/s)
Per Row(ns) Relative
*
------------------------------------------------------------------------------------------------
- * OPERATORTEST_read_read-orc 1046 / 1295 2867.3
348.8 1.0X
- * OPERATORTEST_read_read-parquet 3076 / 5295 975.4
1025.2 0.3X
- * OPERATORTEST_read_read-avro 4156 / 4362 721.8
1385.5 0.3X
+ * OPERATORTEST_read_read-orc 11314 / 11366 2651.6
377.1 1.0X
*/
}
- private Options orc() {
- Options options = new Options();
- options.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.ORC);
- return options;
- }
-
- private Options parquet() {
- Options options = new Options();
- options.set(CoreOptions.FILE_FORMAT,
CoreOptions.FileFormatType.PARQUET);
- return options;
- }
-
- private Options avro() {
- Options options = new Options();
- options.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.AVRO);
- return options;
- }
-
- private void innerTest(Map<String, Table> tables) {
+ private void innerTest(Table table) {
int readTime = 3;
Benchmark benchmark =
new Benchmark("read", readTime * rowCount)
.setNumWarmupIters(1)
.setOutputPerIteration(true);
- for (String name : tables.keySet()) {
- benchmark.addCase(
- "read-" + name,
- 5,
- () -> {
- Table table = tables.get(name);
- for (int i = 0; i < readTime; i++) {
- List<Split> splits =
table.newReadBuilder().newScan().plan().splits();
- AtomicLong readCount = new AtomicLong(0);
- try {
- for (Split split : splits) {
- RecordReader<InternalRow> reader =
-
table.newReadBuilder().newRead().createReader(split);
- reader.forEachRemaining(row ->
readCount.incrementAndGet());
- }
- System.out.printf("Finish read %d rows.\n",
readCount.get());
- } catch (Exception e) {
- throw new RuntimeException(e);
+ benchmark.addCase(
+ "read",
+ 5,
+ () -> {
+ for (int i = 0; i < readTime; i++) {
+ List<Split> splits =
table.newReadBuilder().newScan().plan().splits();
+ AtomicLong readCount = new AtomicLong(0);
+ try {
+ for (Split split : splits) {
+ RecordReader<InternalRow> reader =
+
table.newReadBuilder().newRead().createReader(split);
+ reader.forEachRemaining(row ->
readCount.incrementAndGet());
}
+ System.out.printf("Finish read %d rows.\n",
readCount.get());
+ } catch (Exception e) {
+ throw new RuntimeException(e);
}
- });
- }
+ }
+ });
benchmark.run();
}
- private Table prepareData(Options options, String tableName) throws
Exception {
- options.set(CoreOptions.BUCKET, 1);
- Table table = createTable(options, tableName);
+ private Table prepareData() throws Exception {
+ Table table = createTable(new Options(), "table",
Collections.emptyList());
StreamWriteBuilder writeBuilder = table.newStreamWriteBuilder();
StreamTableWrite write = writeBuilder.newWrite();
StreamTableCommit commit = writeBuilder.newCommit();
diff --git
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
index f089ef234..f0af590a0 100644
---
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
+++
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/TableReadBenchmark.java
@@ -31,7 +31,7 @@ import org.apache.paimon.table.source.Split;
import org.junit.jupiter.api.Test;
-import java.util.LinkedHashMap;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
@@ -43,20 +43,37 @@ public class TableReadBenchmark extends TableBenchmark {
private final int rowCount = 1000000;
@Test
- public void testRead() throws Exception {
- Map<String, Table> tables = new LinkedHashMap<>();
- tables.put("orc", prepareData(orc(), "orc"));
- tables.put("parquet", prepareData(parquet(), "parquet"));
- tables.put("avro", prepareData(avro(), "avro"));
-
- innerTest(tables);
+ public void testOrcRead() throws Exception {
+ innerTest(Collections.singletonMap("orc", prepareData(orc(), "orc")));
/*
* OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
* Apple M1 Pro
* read: Best/Avg Time(ms) Row Rate(K/s)
Per Row(ns) Relative
*
------------------------------------------------------------------------------------------------
* OPERATORTEST_read_read-orc 1046 / 1295 2867.3
348.8 1.0X
+ */
+ }
+
+ @Test
+ public void testParquetRead() throws Exception {
+ innerTest(Collections.singletonMap("parquet", prepareData(parquet(),
"parquet")));
+ /*
+ * OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
+ * Apple M1 Pro
+ * read: Best/Avg Time(ms) Row Rate(K/s)
Per Row(ns) Relative
+ *
------------------------------------------------------------------------------------------------
* OPERATORTEST_read_read-parquet 3076 / 5295 975.4
1025.2 0.3X
+ */
+ }
+
+ @Test
+ public void testAvroRead() throws Exception {
+ innerTest(Collections.singletonMap("avro", prepareData(avro(),
"avro")));
+ /*
+ * OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
+ * Apple M1 Pro
+ * read: Best/Avg Time(ms) Row Rate(K/s)
Per Row(ns) Relative
+ *
------------------------------------------------------------------------------------------------
* OPERATORTEST_read_read-avro 4156 / 4362 721.8
1385.5 0.3X
*/
}