wangyum commented on pull request #30517:
URL: https://github.com/apache/spark/pull/30517#issuecomment-743969298
Parquet column index benchmark code and result:
```scala
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.benchmark
import java.io.File
import scala.util.Random
import org.apache.spark.SparkConf
import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions.{monotonically_increasing_id,
timestamp_seconds}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
/**
* Benchmark to measure read performance with Parquet column index.
* To run this benchmark:
* {{{
* 1. without sbt: bin/spark-submit --class <this class> <spark sql
test jar>
* 2. build/sbt "sql/test:runMain <this class>"
* 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt
"sql/test:runMain <this class>"
* Results will be written to
"benchmarks/ParquetFilterPushdownBenchmark-results.txt".
* }}}
*/
object ParquetFilterPushdownBenchmark extends SqlBasedBenchmark {
override def getSparkSession: SparkSession = {
val conf = new SparkConf()
.setAppName(this.getClass.getSimpleName)
// Since `spark.master` always exists, overrides this value
.set("spark.master", "local[1]")
.setIfMissing("spark.driver.memory", "3g")
.setIfMissing("spark.executor.memory", "3g")
.setIfMissing("orc.compression", "snappy")
.setIfMissing("spark.sql.parquet.compression.codec", "snappy")
SparkSession.builder().config(conf).getOrCreate()
}
private val numRows = 1024 * 1024 * 15
private val width = 5
private val mid = numRows / 2
def withTempTable(tableNames: String*)(f: => Unit): Unit = {
try f finally tableNames.foreach(spark.catalog.dropTempView)
}
private def prepareTable(
dir: File, numRows: Int, width: Int, useStringForValue: Boolean): Unit
= {
import spark.implicits._
val selectExpr = (1 to width).map(i => s"CAST(value AS STRING) c$i")
val valueCol = if (useStringForValue) {
monotonically_increasing_id().cast("string")
} else {
monotonically_increasing_id()
}
val df = spark.range(numRows).map(_ =>
Random.nextLong).selectExpr(selectExpr: _*)
.withColumn("value", valueCol)
.sort("value")
saveAsTable(df, dir)
}
private def prepareStringDictTable(
dir: File, numRows: Int, numDistinctValues: Int, width: Int): Unit = {
val selectExpr = (0 to width).map {
case 0 => s"CAST(id % $numDistinctValues AS STRING) AS value"
case i => s"CAST(rand() AS STRING) c$i"
}
val df = spark.range(numRows).selectExpr(selectExpr: _*).sort("value")
saveAsTable(df, dir, true)
}
private def saveAsTable(df: DataFrame, dir: File, useDictionary: Boolean =
false): Unit = {
val parquetPath = dir.getCanonicalPath + "/parquet"
df.write.mode("overwrite").parquet(parquetPath)
spark.read.parquet(parquetPath).createOrReplaceTempView("parquetTable")
}
def filterPushDownBenchmark(
values: Int,
title: String,
whereExpr: String,
selectExpr: String = "*"): Unit = {
val benchmark = new Benchmark(title, values, minNumIters = 5, output =
output)
Seq(false, true).foreach { columnIndexEnabled =>
val name = s"Parquet Vectorized ${if (columnIndexEnabled)
s"(columnIndex)" else ""}"
benchmark.addCase(name) { _ =>
withSQLConf("parquet.filter.columnindex.enabled" ->
s"$columnIndexEnabled") {
spark.sql(s"SELECT $selectExpr FROM parquetTable WHERE
$whereExpr").noop()
}
}
}
benchmark.run()
}
private def runIntBenchmark(numRows: Int, width: Int, mid: Int): Unit = {
Seq("value IS NULL", s"$mid < value AND value < $mid").foreach {
whereExpr =>
val title = s"Select 0 int row ($whereExpr)".replace("value AND
value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
Seq(
s"value = $mid",
s"value <=> $mid",
s"$mid <= value AND value <= $mid",
s"${mid - 1} < value AND value < ${mid + 1}"
).foreach { whereExpr =>
val title = s"Select 1 int row ($whereExpr)".replace("value AND
value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ",
MAX(value)")
Seq(10, 50, 90).foreach { percent =>
filterPushDownBenchmark(
numRows,
s"Select $percent% int rows (value < ${numRows * percent / 100})",
s"value < ${numRows * percent / 100}",
selectExpr
)
}
Seq("value IS NOT NULL", "value > -1", "value != -1").foreach {
whereExpr =>
filterPushDownBenchmark(
numRows,
s"Select all int rows ($whereExpr)",
whereExpr,
selectExpr)
}
}
private def runStringBenchmark(
numRows: Int, width: Int, searchValue: Int, colType: String): Unit = {
Seq("value IS NULL", s"'$searchValue' < value AND value <
'$searchValue'")
.foreach { whereExpr =>
val title = s"Select 0 $colType row ($whereExpr)".replace("value AND
value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
Seq(
s"value = '$searchValue'",
s"value <=> '$searchValue'",
s"'$searchValue' <= value AND value <= '$searchValue'"
).foreach { whereExpr =>
val title = s"Select 1 $colType row ($whereExpr)".replace("value AND
value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ",
MAX(value)")
Seq("value IS NOT NULL").foreach { whereExpr =>
filterPushDownBenchmark(
numRows,
s"Select all $colType rows ($whereExpr)",
whereExpr,
selectExpr)
}
}
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
runBenchmark("Pushdown for many distinct value case") {
withTempPath { dir =>
withTempTable("parquetTable") {
Seq(true, false).foreach { useStringForValue =>
prepareTable(dir, numRows, width, useStringForValue)
if (useStringForValue) {
runStringBenchmark(numRows, width, mid, "string")
} else {
runIntBenchmark(numRows, width, mid)
}
}
}
}
}
runBenchmark("Pushdown for few distinct value case (use dictionary
encoding)") {
withTempPath { dir =>
val numDistinctValues = 200
withTempTable("parquetTable") {
prepareStringDictTable(dir, numRows, numDistinctValues, width)
runStringBenchmark(numRows, width, numDistinctValues / 2,
"distinct string")
}
}
}
runBenchmark("Pushdown benchmark for StringStartsWith") {
withTempPath { dir =>
withTempTable("parquetTable") {
prepareTable(dir, numRows, width, true)
Seq(
"value like '10%'",
"value like '1000%'",
s"value like '${mid.toString.substring(0, mid.toString.length -
1)}%'"
).foreach { whereExpr =>
val title = s"StringStartsWith filter: ($whereExpr)"
filterPushDownBenchmark(numRows, title, whereExpr)
}
}
}
}
runBenchmark(s"Pushdown benchmark for ${DecimalType.simpleString}") {
withTempPath { dir =>
Seq(
s"decimal(${Decimal.MAX_INT_DIGITS}, 2)",
s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)",
s"decimal(${DecimalType.MAX_PRECISION}, 2)"
).foreach { dt =>
val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
val valueCol = if
(dt.equalsIgnoreCase(s"decimal(${Decimal.MAX_INT_DIGITS}, 2)")) {
monotonically_increasing_id() % 9999999
} else {
monotonically_increasing_id()
}
val df = spark.range(numRows)
.selectExpr(columns: _*).withColumn("value", valueCol.cast(dt))
withTempTable("parquetTable") {
saveAsTable(df, dir)
Seq(s"value = $mid").foreach { whereExpr =>
val title = s"Select 1 $dt row ($whereExpr)".replace("value
AND value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("",
",", ", MAX(value)")
Seq(10, 50, 90).foreach { percent =>
filterPushDownBenchmark(
numRows,
s"Select $percent% $dt rows (value < ${numRows * percent /
100})",
s"value < ${numRows * percent / 100}",
selectExpr
)
}
}
}
}
}
runBenchmark("Pushdown benchmark for InSet -> InFilters") {
withTempPath { dir =>
withTempTable("parquetTable") {
prepareTable(dir, numRows, width, false)
Seq(5, 10, 50, 100).foreach { count =>
Seq(10, 50, 90).foreach { distribution =>
val filter =
Range(0, count).map(r => scala.util.Random.nextInt(numRows *
distribution / 100))
val whereExpr = s"value in(${filter.mkString(",")})"
val title = s"InSet -> InFilters (values count: $count,
distribution: $distribution)"
filterPushDownBenchmark(numRows, title, whereExpr)
}
}
}
}
}
runBenchmark(s"Pushdown benchmark for ${ByteType.simpleString}") {
withTempPath { dir =>
val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
val df = spark.range(numRows).selectExpr(columns: _*)
.withColumn("value", (monotonically_increasing_id() %
Byte.MaxValue).cast(ByteType))
.orderBy("value")
withTempTable("parquetTable") {
saveAsTable(df, dir)
Seq(s"value = CAST(${Byte.MaxValue / 2} AS
${ByteType.simpleString})")
.foreach { whereExpr =>
val title = s"Select 1 ${ByteType.simpleString} row
($whereExpr)"
.replace("value AND value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("",
",", ", MAX(value)")
Seq(10, 50, 90).foreach { percent =>
filterPushDownBenchmark(
numRows,
s"Select $percent% ${ByteType.simpleString} rows " +
s"(value < CAST(${Byte.MaxValue * percent / 100} AS
${ByteType.simpleString}))",
s"value < CAST(${Byte.MaxValue * percent / 100} AS
${ByteType.simpleString})",
selectExpr
)
}
}
}
}
runBenchmark(s"Pushdown benchmark for Timestamp") {
withTempPath { dir =>
withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key ->
true.toString) {
ParquetOutputTimestampType.values.toSeq.map(_.toString).foreach {
fileType =>
withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key ->
fileType) {
val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
val df = spark.range(numRows).selectExpr(columns: _*)
.withColumn("value",
timestamp_seconds(monotonically_increasing_id()))
withTempTable("parquetTable") {
saveAsTable(df, dir)
Seq(s"value = timestamp_seconds($mid)").foreach { whereExpr
=>
val title = s"Select 1 timestamp stored as $fileType row
($whereExpr)"
.replace("value AND value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width)
.map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
Seq(10, 50, 90).foreach { percent =>
filterPushDownBenchmark(
numRows,
s"Select $percent% timestamp stored as $fileType rows " +
s"(value < timestamp_seconds(${numRows * percent /
100}))",
s"value < timestamp_seconds(${numRows * percent / 100})",
selectExpr
)
}
}
}
}
}
}
}
runBenchmark(s"Pushdown benchmark with many filters") {
val numRows = 1
val width = 500
withTempPath { dir =>
val columns = (1 to width).map(i => s"id c$i")
val df = spark.range(1).selectExpr(columns: _*)
withTempTable("parquetTable") {
saveAsTable(df, dir)
Seq(1, 250, 500).foreach { numFilter =>
val whereExpr = (1 to numFilter).map(i => s"c$i = 0").mkString("
and ")
// Note: InferFiltersFromConstraints will add more filters to
this given filters
filterPushDownBenchmark(numRows, s"Select 1 row with $numFilter
filters", whereExpr)
}
}
}
}
}
}
```
```
================================================================================================
Pushdown for many distinct value case
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 string row (value IS NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 99 111
10 158.9 6.3 1.0X
Parquet Vectorized (columnindex) 78 86
9 201.6 5.0 1.3X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 string row ('7864320' < value < '7864320'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 835
848 17 18.8 53.1 1.0X
Parquet Vectorized (columnindex) 91
96 4 173.5 5.8 9.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row (value = '7864320'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 839 846
6 18.7 53.3 1.0X
Parquet Vectorized (columnindex) 85 93
10 184.8 5.4 9.9X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row (value <=> '7864320'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 838 852
16 18.8 53.3 1.0X
Parquet Vectorized (columnindex) 79 85
4 197.9 5.1 10.5X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row ('7864320' <= value <= '7864320'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 872
907 45 18.0 55.4 1.0X
Parquet Vectorized (columnindex) 83
89 5 188.9 5.3 10.5X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all string rows (value IS NOT NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 16487 16727
253 1.0 1048.2 1.0X
Parquet Vectorized (columnindex) 16355 16426
62 1.0 1039.8 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 int row (value IS NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 64 68
5 246.7 4.1 1.0X
Parquet Vectorized (columnindex) 61 66
4 258.0 3.9 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 int row (7864320 < value < 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 770 795
34 20.4 48.9 1.0X
Parquet Vectorized (columnindex) 78 84
5 201.1 5.0 9.8X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (value = 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 774 795
21 20.3 49.2 1.0X
Parquet Vectorized (columnindex) 77 82
6 205.3 4.9 10.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (value <=> 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 771 777
7 20.4 49.0 1.0X
Parquet Vectorized (columnindex) 69 76
5 226.8 4.4 11.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (7864320 <= value <= 7864320): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 769 794
29 20.4 48.9 1.0X
Parquet Vectorized (columnindex) 74 82
6 213.3 4.7 10.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (7864319 < value < 7864321): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 775 825
43 20.3 49.3 1.0X
Parquet Vectorized (columnindex) 76 81
5 206.3 4.8 10.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% int rows (value < 1572864): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2326 2395
51 6.8 147.9 1.0X
Parquet Vectorized (columnindex) 1655 1669
13 9.5 105.2 1.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% int rows (value < 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 7898 8175
157 2.0 502.1 1.0X
Parquet Vectorized (columnindex) 7658 7731
73 2.1 486.9 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% int rows (value < 14155776): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 13778 13858
78 1.1 876.0 1.0X
Parquet Vectorized (columnindex) 13771 13885
105 1.1 875.5 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value IS NOT NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 15072 15281
163 1.0 958.3 1.0X
Parquet Vectorized (columnindex) 15119 15344
194 1.0 961.3 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value > -1): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 15257 15378
198 1.0 970.0 1.0X
Parquet Vectorized (columnindex) 15296 15519
232 1.0 972.5 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value != -1): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 15262 15325
92 1.0 970.4 1.0X
Parquet Vectorized (columnindex) 15173 15255
84 1.0 964.7 1.0X
================================================================================================
Pushdown for few distinct value case (use dictionary encoding)
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 distinct string row (value IS NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 53 59
5 298.8 3.3 1.0X
Parquet Vectorized (columnindex) 52 57
6 300.2 3.3 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 distinct string row ('100' < value < '100'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 890
902 7 17.7 56.6 1.0X
Parquet Vectorized (columnindex) 59
62 4 266.2 3.8 15.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row (value = '100'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 894 905
10 17.6 56.9 1.0X
Parquet Vectorized (columnindex) 125 130
6 126.2 7.9 7.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row (value <=> '100'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 901 920
26 17.5 57.3 1.0X
Parquet Vectorized (columnindex) 119 127
4 132.1 7.6 7.6X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row ('100' <= value <= '100'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 902
914 10 17.4 57.3 1.0X
Parquet Vectorized (columnindex) 126
132 7 124.8 8.0 7.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all distinct string rows (value IS NOT NULL): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 16872
16947 97 0.9 1072.7 1.0X
Parquet Vectorized (columnindex) 16861
16970 80 0.9 1072.0 1.0X
================================================================================================
Pushdown benchmark for StringStartsWith
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '10%'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1025 1038
17 15.4 65.1 1.0X
Parquet Vectorized (columnindex) 852 868
16 18.5 54.2 1.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '1000%'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 816 838
25 19.3 51.9 1.0X
Parquet Vectorized (columnindex) 74 79
5 213.7 4.7 11.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '786432%'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 817
836 11 19.2 52.0 1.0X
Parquet Vectorized (columnindex) 76
82 4 207.6 4.8 10.8X
================================================================================================
Pushdown benchmark for decimal
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(9, 2) row (value = 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1125 1138
13 14.0 71.6 1.0X
Parquet Vectorized (columnindex) 50 55
5 313.1 3.2 22.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(9, 2) rows (value < 1572864): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 4800
4930 131 3.3 305.2 1.0X
Parquet Vectorized (columnindex) 2227
2274 40 7.1 141.6 2.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(9, 2) rows (value < 7864320): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 10016
10204 202 1.6 636.8 1.0X
Parquet Vectorized (columnindex) 9571
9677 63 1.6 608.5 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(9, 2) rows (value < 14155776): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 11161
11403 187 1.4 709.6 1.0X
Parquet Vectorized (columnindex) 11103
11283 130 1.4 705.9 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(18, 2) row (value = 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1150 1168
21 13.7 73.1 1.0X
Parquet Vectorized (columnindex) 45 48
5 350.0 2.9 25.6X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(18, 2) rows (value < 1572864): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1946
1978 34 8.1 123.7 1.0X
Parquet Vectorized (columnindex) 1155
1189 28 13.6 73.4 1.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(18, 2) rows (value < 7864320): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 6206
6413 211 2.5 394.6 1.0X
Parquet Vectorized (columnindex) 5659
5786 96 2.8 359.8 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(18, 2) rows (value < 14155776): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 10375
10534 240 1.5 659.6 1.0X
Parquet Vectorized (columnindex) 10120
10334 221 1.6 643.4 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(38, 2) row (value = 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1694 1748
49 9.3 107.7 1.0X
Parquet Vectorized (columnindex) 46 50
4 338.6 3.0 36.5X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(38, 2) rows (value < 1572864): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2712
2782 48 5.8 172.5 1.0X
Parquet Vectorized (columnindex) 1584
1611 32 9.9 100.7 1.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(38, 2) rows (value < 7864320): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8356
8499 82 1.9 531.3 1.0X
Parquet Vectorized (columnindex) 7781
7979 123 2.0 494.7 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(38, 2) rows (value < 14155776): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 14128
14304 182 1.1 898.2 1.0X
Parquet Vectorized (columnindex) 13940
14004 44 1.1 886.3 1.0X
================================================================================================
Pushdown benchmark for InSet -> InFilters
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 10): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 773
784 11 20.4 49.1 1.0X
Parquet Vectorized (columnindex) 113
118 5 139.4 7.2 6.8X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 50): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2910
2930 13 5.4 185.0 1.0X
Parquet Vectorized (columnindex) 116
120 4 136.1 7.3 25.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 90): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2207
2218 8 7.1 140.3 1.0X
Parquet Vectorized (columnindex) 117
123 8 134.3 7.4 18.8X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 10): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1493
1506 11 10.5 94.9 1.0X
Parquet Vectorized (columnindex) 159
164 6 99.2 10.1 9.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 50): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 3591
3635 32 4.4 228.3 1.0X
Parquet Vectorized (columnindex) 170
175 5 92.6 10.8 21.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 90): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 5079
5147 50 3.1 322.9 1.0X
Parquet Vectorized (columnindex) 172
180 5 91.2 11.0 29.5X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 10): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8280
8533 245 1.9 526.4 1.0X
Parquet Vectorized (columnindex) 8341
8423 93 1.9 530.3 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 50): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8248
8347 72 1.9 524.4 1.0X
Parquet Vectorized (columnindex) 8230
8303 66 1.9 523.2 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 90): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8219
8285 44 1.9 522.6 1.0X
Parquet Vectorized (columnindex) 8183
8381 184 1.9 520.3 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 10): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8354
8411 60 1.9 531.1 1.0X
Parquet Vectorized (columnindex) 8181
8256 60 1.9 520.1 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 50): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8151
8210 38 1.9 518.3 1.0X
Parquet Vectorized (columnindex) 8169
8210 37 1.9 519.4 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 90): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8131
8204 46 1.9 516.9 1.0X
Parquet Vectorized (columnindex) 8167
8231 65 1.9 519.3 1.0X
================================================================================================
Pushdown benchmark for tinyint
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 tinyint row (value = CAST(63 AS tinyint)): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1124
1198 68 14.0 71.4 1.0X
Parquet Vectorized (columnindex) 91
93 2 173.7 5.8 12.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1845
1892 37 8.5 117.3 1.0X
Parquet Vectorized (columnindex) 1113
1123 11 14.1 70.8 1.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 5850
5911 73 2.7 371.9 1.0X
Parquet Vectorized (columnindex) 5450
5567 91 2.9 346.5 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 10395
10458 62 1.5 660.9 1.0X
Parquet Vectorized (columnindex) 9928
10104 176 1.6 631.2 1.0X
================================================================================================
Pushdown benchmark for Timestamp
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):
Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
3929 4094 107 4.0 249.8 1.0X
Parquet Vectorized (columnindex)
3991 4068 74 3.9 253.7 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as INT96 rows (value <
timestamp_seconds(1572864)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
4774 5003 163 3.3 303.5
1.0X
Parquet Vectorized (columnindex)
4769 4880 159 3.3 303.2
1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as INT96 rows (value <
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
7736 7884 142 2.0 491.9
1.0X
Parquet Vectorized (columnindex)
7587 7795 156 2.1 482.4
1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as INT96 rows (value <
timestamp_seconds(14155776)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
10722 10785 61 1.5 681.7
1.0X
Parquet Vectorized (columnindex)
10719 10775 55 1.5 681.5
1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as TIMESTAMP_MICROS row (value =
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
1127 1172 47 14.0 71.6
1.0X
Parquet Vectorized (columnindex)
44 46 3 360.8 2.8
25.9X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as TIMESTAMP_MICROS rows (value <
timestamp_seconds(1572864)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
1861 1916 90 8.5
118.3 1.0X
Parquet Vectorized (columnindex)
1127 1160 22 14.0
71.7 1.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as TIMESTAMP_MICROS rows (value <
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
5809 5934 214 2.7
369.3 1.0X
Parquet Vectorized (columnindex)
5455 5523 93 2.9
346.8 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as TIMESTAMP_MICROS rows (value <
timestamp_seconds(14155776)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
9777 10097 244 1.6
621.6 1.0X
Parquet Vectorized (columnindex)
9808 9849 44 1.6
623.6 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as TIMESTAMP_MILLIS row (value =
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
1125 1163 24 14.0 71.5
1.0X
Parquet Vectorized (columnindex)
43 47 5 369.3 2.7
26.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value <
timestamp_seconds(1572864)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
1905 1977 80 8.3
121.1 1.0X
Parquet Vectorized (columnindex)
1137 1186 40 13.8
72.3 1.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value <
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
6018 6266 230 2.6
382.6 1.0X
Parquet Vectorized (columnindex)
5631 5703 69 2.8
358.0 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value <
timestamp_seconds(14155776)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
10132 10224 113 1.6
644.2 1.0X
Parquet Vectorized (columnindex)
9898 9992 69 1.6
629.3 1.0X
================================================================================================
Pushdown benchmark with many filters
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 1 filters: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 182 190
7 0.0 182396465.0 1.0X
Parquet Vectorized (columnindex) 187 192
5 0.0 187246572.0 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 250 filters: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2228 2257
19 0.0 2228318860.0 1.0X
Parquet Vectorized (columnindex) 2212 2244
24 0.0 2212486315.0 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 500 filters: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 12391 12473
86 0.0 12391350628.0 1.0X
Parquet Vectorized (columnindex) 12438 12594
229 0.0 12438065459.0 1.0X
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]