wangyum commented on pull request #31393:
URL: https://github.com/apache/spark/pull/31393#issuecomment-769767724
Benchmark and benchmark result:
```scala
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.benchmark
import java.io.File
import scala.util.Random
import org.apache.parquet.hadoop.ParquetInputFormat
import org.apache.spark.SparkConf
import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions.{monotonically_increasing_id,
timestamp_seconds}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
/**
* Benchmark to measure read performance with Parquet column index.
* To run this benchmark:
* {{{
* 1. without sbt: bin/spark-submit --class <this class> <spark sql test
jar>
* 2. build/sbt "sql/test:runMain <this class>"
* 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt
"sql/test:runMain <this class>"
* Results will be written to
"benchmarks/ParquetFilterPushdownBenchmark-results.txt".
* }}}
*/
object ParquetColumnIndexBenchmark extends SqlBasedBenchmark {
override def getSparkSession: SparkSession = {
val conf = new SparkConf()
.setAppName(this.getClass.getSimpleName)
// Since `spark.master` always exists, overrides this value
.set("spark.master", "local[1]")
.setIfMissing("spark.driver.memory", "3g")
.setIfMissing("spark.executor.memory", "3g")
.setIfMissing("orc.compression", "snappy")
.setIfMissing("spark.sql.parquet.compression.codec", "snappy")
SparkSession.builder().config(conf).getOrCreate()
}
private val numRows = 1024 * 1024 * 15
private val width = 5
private val mid = numRows / 2
def withTempTable(tableNames: String*)(f: => Unit): Unit = {
try f finally tableNames.foreach(spark.catalog.dropTempView)
}
private def prepareTable(
dir: File, numRows: Int, width: Int, useStringForValue: Boolean): Unit
= {
import spark.implicits._
val selectExpr = (1 to width).map(i => s"CAST(value AS STRING) c$i")
val valueCol = if (useStringForValue) {
monotonically_increasing_id().cast("string")
} else {
monotonically_increasing_id()
}
val df = spark.range(numRows).map(_ =>
Random.nextLong).selectExpr(selectExpr: _*)
.withColumn("value", valueCol)
.sort("value")
saveAsTable(df, dir)
}
private def prepareStringDictTable(
dir: File, numRows: Int, numDistinctValues: Int, width: Int): Unit = {
val selectExpr = (0 to width).map {
case 0 => s"CAST(id % $numDistinctValues AS STRING) AS value"
case i => s"CAST(rand() AS STRING) c$i"
}
val df = spark.range(numRows).selectExpr(selectExpr: _*).sort("value")
saveAsTable(df, dir, true)
}
private def saveAsTable(df: DataFrame, dir: File, useDictionary: Boolean =
false): Unit = {
val parquetPath = dir.getCanonicalPath + "/parquet"
df.write.mode("overwrite").parquet(parquetPath)
spark.read.parquet(parquetPath).createOrReplaceTempView("parquetTable")
}
def filterPushDownBenchmark(
values: Int,
title: String,
whereExpr: String,
selectExpr: String = "*"): Unit = {
val benchmark = new Benchmark(title, values, minNumIters = 5, output =
output)
Seq(false, true).foreach { columnIndexEnabled =>
val name = s"Parquet Vectorized ${if (columnIndexEnabled)
s"(columnIndex)" else ""}"
benchmark.addCase(name) { _ =>
withSQLConf(ParquetInputFormat.COLUMN_INDEX_FILTERING_ENABLED ->
s"$columnIndexEnabled") {
spark.sql(s"SELECT $selectExpr FROM parquetTable WHERE
$whereExpr").noop()
}
}
}
benchmark.run()
}
private def runIntBenchmark(numRows: Int, width: Int, mid: Int): Unit = {
Seq("value IS NULL", s"$mid < value AND value < $mid").foreach {
whereExpr =>
val title = s"Select 0 int row ($whereExpr)".replace("value AND
value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
Seq(
s"value = $mid",
s"value <=> $mid",
s"$mid <= value AND value <= $mid",
s"${mid - 1} < value AND value < ${mid + 1}"
).foreach { whereExpr =>
val title = s"Select 1 int row ($whereExpr)".replace("value AND
value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ",
MAX(value)")
Seq(10, 50, 90).foreach { percent =>
filterPushDownBenchmark(
numRows,
s"Select $percent% int rows (value < ${numRows * percent / 100})",
s"value < ${numRows * percent / 100}",
selectExpr
)
}
Seq("value IS NOT NULL", "value > -1", "value != -1").foreach {
whereExpr =>
filterPushDownBenchmark(
numRows,
s"Select all int rows ($whereExpr)",
whereExpr,
selectExpr)
}
}
private def runStringBenchmark(
numRows: Int, width: Int, searchValue: Int, colType: String): Unit = {
Seq("value IS NULL", s"'$searchValue' < value AND value <
'$searchValue'")
.foreach { whereExpr =>
val title = s"Select 0 $colType row ($whereExpr)".replace("value AND
value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
Seq(
s"value = '$searchValue'",
s"value <=> '$searchValue'",
s"'$searchValue' <= value AND value <= '$searchValue'"
).foreach { whereExpr =>
val title = s"Select 1 $colType row ($whereExpr)".replace("value AND
value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ",
MAX(value)")
Seq("value IS NOT NULL").foreach { whereExpr =>
filterPushDownBenchmark(
numRows,
s"Select all $colType rows ($whereExpr)",
whereExpr,
selectExpr)
}
}
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
runBenchmark("Pushdown for many distinct value case") {
withTempPath { dir =>
withTempTable("parquetTable") {
Seq(true, false).foreach { useStringForValue =>
prepareTable(dir, numRows, width, useStringForValue)
if (useStringForValue) {
runStringBenchmark(numRows, width, mid, "string")
} else {
runIntBenchmark(numRows, width, mid)
}
}
}
}
}
runBenchmark("Pushdown for few distinct value case (use dictionary
encoding)") {
withTempPath { dir =>
val numDistinctValues = 200
withTempTable("parquetTable") {
prepareStringDictTable(dir, numRows, numDistinctValues, width)
runStringBenchmark(numRows, width, numDistinctValues / 2,
"distinct string")
}
}
}
runBenchmark("Pushdown benchmark for StringStartsWith") {
withTempPath { dir =>
withTempTable("parquetTable") {
prepareTable(dir, numRows, width, true)
Seq(
"value like '10%'",
"value like '1000%'",
s"value like '${mid.toString.substring(0, mid.toString.length -
1)}%'"
).foreach { whereExpr =>
val title = s"StringStartsWith filter: ($whereExpr)"
filterPushDownBenchmark(numRows, title, whereExpr)
}
}
}
}
runBenchmark(s"Pushdown benchmark for ${DecimalType.simpleString}") {
withTempPath { dir =>
Seq(
s"decimal(${Decimal.MAX_INT_DIGITS}, 2)",
s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)",
s"decimal(${DecimalType.MAX_PRECISION}, 2)"
).foreach { dt =>
val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
val valueCol = if
(dt.equalsIgnoreCase(s"decimal(${Decimal.MAX_INT_DIGITS}, 2)")) {
monotonically_increasing_id() % 9999999
} else {
monotonically_increasing_id()
}
val df = spark.range(numRows)
.selectExpr(columns: _*).withColumn("value", valueCol.cast(dt))
withTempTable("parquetTable") {
saveAsTable(df, dir)
Seq(s"value = $mid").foreach { whereExpr =>
val title = s"Select 1 $dt row ($whereExpr)".replace("value
AND value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("",
",", ", MAX(value)")
Seq(10, 50, 90).foreach { percent =>
filterPushDownBenchmark(
numRows,
s"Select $percent% $dt rows (value < ${numRows * percent /
100})",
s"value < ${numRows * percent / 100}",
selectExpr
)
}
}
}
}
}
runBenchmark("Pushdown benchmark for InSet -> InFilters") {
withTempPath { dir =>
withTempTable("parquetTable") {
prepareTable(dir, numRows, width, false)
Seq(5, 10, 50, 100).foreach { count =>
Seq(10, 50, 90).foreach { distribution =>
val filter =
Range(0, count).map(r => scala.util.Random.nextInt(numRows *
distribution / 100))
val whereExpr = s"value in(${filter.mkString(",")})"
val title = s"InSet -> InFilters (values count: $count,
distribution: $distribution)"
filterPushDownBenchmark(numRows, title, whereExpr)
}
}
}
}
}
runBenchmark(s"Pushdown benchmark for ${ByteType.simpleString}") {
withTempPath { dir =>
val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
val df = spark.range(numRows).selectExpr(columns: _*)
.withColumn("value", (monotonically_increasing_id() %
Byte.MaxValue).cast(ByteType))
.orderBy("value")
withTempTable("parquetTable") {
saveAsTable(df, dir)
Seq(s"value = CAST(${Byte.MaxValue / 2} AS
${ByteType.simpleString})")
.foreach { whereExpr =>
val title = s"Select 1 ${ByteType.simpleString} row
($whereExpr)"
.replace("value AND value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("",
",", ", MAX(value)")
Seq(10, 50, 90).foreach { percent =>
filterPushDownBenchmark(
numRows,
s"Select $percent% ${ByteType.simpleString} rows " +
s"(value < CAST(${Byte.MaxValue * percent / 100} AS
${ByteType.simpleString}))",
s"value < CAST(${Byte.MaxValue * percent / 100} AS
${ByteType.simpleString})",
selectExpr
)
}
}
}
}
runBenchmark(s"Pushdown benchmark for Timestamp") {
withTempPath { dir =>
withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key ->
true.toString) {
ParquetOutputTimestampType.values.toSeq.map(_.toString).foreach {
fileType =>
withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key ->
fileType) {
val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
val df = spark.range(numRows).selectExpr(columns: _*)
.withColumn("value",
timestamp_seconds(monotonically_increasing_id()))
withTempTable("parquetTable") {
saveAsTable(df, dir)
Seq(s"value = timestamp_seconds($mid)").foreach { whereExpr
=>
val title = s"Select 1 timestamp stored as $fileType row
($whereExpr)"
.replace("value AND value", "value")
filterPushDownBenchmark(numRows, title, whereExpr)
}
val selectExpr = (1 to width)
.map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
Seq(10, 50, 90).foreach { percent =>
filterPushDownBenchmark(
numRows,
s"Select $percent% timestamp stored as $fileType rows " +
s"(value < timestamp_seconds(${numRows * percent /
100}))",
s"value < timestamp_seconds(${numRows * percent / 100})",
selectExpr
)
}
}
}
}
}
}
}
runBenchmark(s"Pushdown benchmark with many filters") {
val numRows = 1
val width = 500
withTempPath { dir =>
val columns = (1 to width).map(i => s"id c$i")
val df = spark.range(1).selectExpr(columns: _*)
withTempTable("parquetTable") {
saveAsTable(df, dir)
Seq(1, 250, 500).foreach { numFilter =>
val whereExpr = (1 to numFilter).map(i => s"c$i = 0").mkString("
and ")
// Note: InferFiltersFromConstraints will add more filters to
this given filters
filterPushDownBenchmark(numRows, s"Select 1 row with $numFilter
filters", whereExpr)
}
}
}
}
}
}
```
```
================================================================================================
Pushdown for many distinct value case
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 string row (value IS NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 92 105
9 171.7 5.8 1.0X
Parquet Vectorized (columnIndex) 70 80
8 225.3 4.4 1.3X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 string row ('7864320' < value < '7864320'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 829
849 12 19.0 52.7 1.0X
Parquet Vectorized (columnIndex) 85
92 6 184.8 5.4 9.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row (value = '7864320'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 830 845
11 19.0 52.8 1.0X
Parquet Vectorized (columnIndex) 85 94
7 185.9 5.4 9.8X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row (value <=> '7864320'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 841 895
47 18.7 53.5 1.0X
Parquet Vectorized (columnIndex) 77 81
5 205.4 4.9 11.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row ('7864320' <= value <= '7864320'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 826
843 13 19.0 52.5 1.0X
Parquet Vectorized (columnIndex) 79
84 5 197.9 5.1 10.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all string rows (value IS NOT NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 16542 16827
247 1.0 1051.7 1.0X
Parquet Vectorized (columnIndex) 16491 16571
57 1.0 1048.5 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 int row (value IS NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 60 66
5 263.7 3.8 1.0X
Parquet Vectorized (columnIndex) 59 66
6 267.7 3.7 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 int row (7864320 < value < 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 793 797
5 19.8 50.4 1.0X
Parquet Vectorized (columnIndex) 79 85
5 199.6 5.0 10.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (value = 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 792 807
11 19.9 50.3 1.0X
Parquet Vectorized (columnIndex) 72 76
4 218.7 4.6 11.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (value <=> 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 792 832
37 19.8 50.4 1.0X
Parquet Vectorized (columnIndex) 77 85
9 205.0 4.9 10.3X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (7864320 <= value <= 7864320): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 782 806
19 20.1 49.7 1.0X
Parquet Vectorized (columnIndex) 70 75
4 224.4 4.5 11.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (7864319 < value < 7864321): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 772 797
19 20.4 49.1 1.0X
Parquet Vectorized (columnIndex) 73 79
6 216.1 4.6 10.6X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% int rows (value < 1572864): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2313 2347
30 6.8 147.1 1.0X
Parquet Vectorized (columnIndex) 1657 1681
20 9.5 105.4 1.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% int rows (value < 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8055 8162
69 2.0 512.1 1.0X
Parquet Vectorized (columnIndex) 7800 7861
52 2.0 495.9 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% int rows (value < 14155776): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 13875 14027
174 1.1 882.1 1.0X
Parquet Vectorized (columnIndex) 13954 14061
151 1.1 887.2 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value IS NOT NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 15637 15728
70 1.0 994.2 1.0X
Parquet Vectorized (columnIndex) 15481 15634
101 1.0 984.2 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value > -1): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 15156 15369
144 1.0 963.6 1.0X
Parquet Vectorized (columnIndex) 15255 15409
144 1.0 969.9 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value != -1): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 15220 15440
143 1.0 967.7 1.0X
Parquet Vectorized (columnIndex) 15327 15399
60 1.0 974.5 1.0X
================================================================================================
Pushdown for few distinct value case (use dictionary encoding)
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 distinct string row (value IS NULL): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 51 55
6 309.6 3.2 1.0X
Parquet Vectorized (columnIndex) 49 55
6 319.7 3.1 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 distinct string row ('100' < value < '100'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 907
921 11 17.3 57.7 1.0X
Parquet Vectorized (columnIndex) 56
60 3 279.6 3.6 16.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row (value = '100'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 916 927
8 17.2 58.3 1.0X
Parquet Vectorized (columnIndex) 121 126
6 130.0 7.7 7.6X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row (value <=> '100'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 900 908
7 17.5 57.2 1.0X
Parquet Vectorized (columnIndex) 118 124
5 133.2 7.5 7.6X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row ('100' <= value <= '100'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 924
935 10 17.0 58.8 1.0X
Parquet Vectorized (columnIndex) 125
130 5 126.2 7.9 7.4X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all distinct string rows (value IS NOT NULL): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 16896
17060 109 0.9 1074.2 1.0X
Parquet Vectorized (columnIndex) 17062
17211 114 0.9 1084.8 1.0X
================================================================================================
Pushdown benchmark for StringStartsWith
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '10%'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1028 1060
29 15.3 65.3 1.0X
Parquet Vectorized (columnIndex) 849 863
12 18.5 54.0 1.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '1000%'): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 828 835
6 19.0 52.6 1.0X
Parquet Vectorized (columnIndex) 71 77
4 220.1 4.5 11.6X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '786432%'): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 832
845 10 18.9 52.9 1.0X
Parquet Vectorized (columnIndex) 70
76 3 223.4 4.5 11.8X
================================================================================================
Pushdown benchmark for decimal
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(9, 2) row (value = 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1128 1159
36 13.9 71.7 1.0X
Parquet Vectorized (columnIndex) 46 49
3 345.5 2.9 24.8X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(9, 2) rows (value < 1572864): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 4827
4927 112 3.3 306.9 1.0X
Parquet Vectorized (columnIndex) 2238
2387 123 7.0 142.3 2.2X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(9, 2) rows (value < 7864320): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 9972
10135 113 1.6 634.0 1.0X
Parquet Vectorized (columnIndex) 9395
9503 102 1.7 597.3 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(9, 2) rows (value < 14155776): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 11283
11461 192 1.4 717.3 1.0X
Parquet Vectorized (columnIndex) 11070
11236 144 1.4 703.8 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(18, 2) row (value = 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1170 1181
8 13.4 74.4 1.0X
Parquet Vectorized (columnIndex) 41 43
3 380.2 2.6 28.3X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(18, 2) rows (value < 1572864): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1962
2041 103 8.0 124.7 1.0X
Parquet Vectorized (columnIndex) 1166
1192 23 13.5 74.1 1.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(18, 2) rows (value < 7864320): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 6211
6276 52 2.5 394.9 1.0X
Parquet Vectorized (columnIndex) 5572
5667 60 2.8 354.3 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(18, 2) rows (value < 14155776): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 10231
10430 302 1.5 650.5 1.0X
Parquet Vectorized (columnIndex) 9985
10291 282 1.6 634.8 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(38, 2) row (value = 7864320): Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1746 1796
72 9.0 111.0 1.0X
Parquet Vectorized (columnIndex) 44 47
3 360.3 2.8 40.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(38, 2) rows (value < 1572864): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2758
2861 66 5.7 175.4 1.0X
Parquet Vectorized (columnIndex) 1551
1581 20 10.1 98.6 1.8X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(38, 2) rows (value < 7864320): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8324
8443 91 1.9 529.2 1.0X
Parquet Vectorized (columnIndex) 7661
7719 75 2.1 487.0 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(38, 2) rows (value < 14155776): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 13847
14118 224 1.1 880.4 1.0X
Parquet Vectorized (columnIndex) 13609
13806 150 1.2 865.3 1.0X
================================================================================================
Pushdown benchmark for InSet -> InFilters
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 10): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 758
1035 581 20.8 48.2 1.0X
Parquet Vectorized (columnIndex) 110
118 7 142.6 7.0 6.9X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 50): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2871
2928 38 5.5 182.6 1.0X
Parquet Vectorized (columnIndex) 112
117 5 140.9 7.1 25.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 90): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2197
2218 16 7.2 139.7 1.0X
Parquet Vectorized (columnIndex) 111
118 6 141.9 7.0 19.8X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 10): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 759
779 12 20.7 48.3 1.0X
Parquet Vectorized (columnIndex) 150
155 3 105.2 9.5 5.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 50): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 3633
3674 28 4.3 231.0 1.0X
Parquet Vectorized (columnIndex) 169
181 14 93.0 10.8 21.5X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 90): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 3623
3666 29 4.3 230.3 1.0X
Parquet Vectorized (columnIndex) 166
176 6 94.6 10.6 21.8X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 10): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8220
8366 199 1.9 522.6 1.0X
Parquet Vectorized (columnIndex) 8135
8197 66 1.9 517.2 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 50): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8164
8224 48 1.9 519.1 1.0X
Parquet Vectorized (columnIndex) 8106
8212 60 1.9 515.3 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 90): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8179
8257 50 1.9 520.0 1.0X
Parquet Vectorized (columnIndex) 8176
8269 76 1.9 519.8 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 10): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8199
8339 133 1.9 521.3 1.0X
Parquet Vectorized (columnIndex) 8128
8247 100 1.9 516.8 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 50): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8075
8180 75 1.9 513.4 1.0X
Parquet Vectorized (columnIndex) 8133
8185 57 1.9 517.1 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 90): Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 8121
8163 33 1.9 516.3 1.0X
Parquet Vectorized (columnIndex) 8093
8159 63 1.9 514.5 1.0X
================================================================================================
Pushdown benchmark for tinyint
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 tinyint row (value = CAST(63 AS tinyint)): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1133
1201 46 13.9 72.1 1.0X
Parquet Vectorized (columnIndex) 85
90 5 184.7 5.4 13.3X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 1841
1860 19 8.5 117.1 1.0X
Parquet Vectorized (columnIndex) 1104
1115 10 14.2 70.2 1.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 5747
5818 86 2.7 365.4 1.0X
Parquet Vectorized (columnIndex) 5411
5547 108 2.9 344.1 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 10143
10249 108 1.6 644.9 1.0X
Parquet Vectorized (columnIndex) 9730
9832 78 1.6 618.6 1.0X
================================================================================================
Pushdown benchmark for Timestamp
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):
Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
4023 4060 35 3.9 255.8 1.0X
Parquet Vectorized (columnIndex)
3908 4044 96 4.0 248.5 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as INT96 rows (value <
timestamp_seconds(1572864)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
4758 4967 179 3.3 302.5
1.0X
Parquet Vectorized (columnIndex)
4750 4828 45 3.3 302.0
1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as INT96 rows (value <
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
7682 7792 108 2.0 488.4
1.0X
Parquet Vectorized (columnIndex)
7661 7753 111 2.1 487.1
1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as INT96 rows (value <
timestamp_seconds(14155776)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
10538 10678 134 1.5 670.0
1.0X
Parquet Vectorized (columnIndex)
10489 10602 77 1.5 666.9
1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as TIMESTAMP_MICROS row (value =
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
1053 1064 7 14.9 66.9
1.0X
Parquet Vectorized (columnIndex)
41 46 5 382.0 2.6
25.6X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as TIMESTAMP_MICROS rows (value <
timestamp_seconds(1572864)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
1808 1883 83 8.7
114.9 1.0X
Parquet Vectorized (columnIndex)
1112 1143 19 14.1
70.7 1.6X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as TIMESTAMP_MICROS rows (value <
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
5921 6019 68 2.7
376.4 1.0X
Parquet Vectorized (columnIndex)
5411 5538 100 2.9
344.0 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as TIMESTAMP_MICROS rows (value <
timestamp_seconds(14155776)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
9713 9785 61 1.6
617.6 1.0X
Parquet Vectorized (columnIndex)
9670 9813 151 1.6
614.8 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as TIMESTAMP_MILLIS row (value =
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
1144 1152 5 13.8 72.7
1.0X
Parquet Vectorized (columnIndex)
38 43 5 413.0 2.4
30.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value <
timestamp_seconds(1572864)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
1890 1960 96 8.3
120.2 1.0X
Parquet Vectorized (columnIndex)
1125 1138 13 14.0
71.5 1.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value <
timestamp_seconds(7864320)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
5895 6148 221 2.7
374.8 1.0X
Parquet Vectorized (columnIndex)
5483 5528 71 2.9
348.6 1.1X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value <
timestamp_seconds(14155776)): Best Time(ms) Avg Time(ms) Stdev(ms)
Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized
9928 10021 61 1.6
631.2 1.0X
Parquet Vectorized (columnIndex)
9747 9855 85 1.6
619.7 1.0X
================================================================================================
Pushdown benchmark with many filters
================================================================================================
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 1 filters: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 183 191
4 0.0 182665658.0 1.0X
Parquet Vectorized (columnIndex) 180 187
6 0.0 179942365.0 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 250 filters: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 2206 2240
35 0.0 2206072129.0 1.0X
Parquet Vectorized (columnIndex) 2206 2301
76 0.0 2206015717.0 1.0X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 500 filters: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized 12239 12298
53 0.0 12239406583.0 1.0X
Parquet Vectorized (columnIndex) 12276 12502
200 0.0 12275545185.0 1.0X
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]