wangyum commented on pull request #30517:
URL: https://github.com/apache/spark/pull/30517#issuecomment-743969298


   Parquet column index benchmark code and result:
   ```scala
   /*
    * Licensed to the Apache Software Foundation (ASF) under one or more
    * contributor license agreements.  See the NOTICE file distributed with
    * this work for additional information regarding copyright ownership.
    * The ASF licenses this file to You under the Apache License, Version 2.0
    * (the "License"); you may not use this file except in compliance with
    * the License.  You may obtain a copy of the License at
    *
    *    http://www.apache.org/licenses/LICENSE-2.0
    *
    * Unless required by applicable law or agreed to in writing, software
    * distributed under the License is distributed on an "AS IS" BASIS,
    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    * See the License for the specific language governing permissions and
    * limitations under the License.
    */
   
   package org.apache.spark.sql.execution.benchmark
   
   import java.io.File
   
   import scala.util.Random
   
   import org.apache.spark.SparkConf
   import org.apache.spark.benchmark.Benchmark
   import org.apache.spark.sql.{DataFrame, SparkSession}
   import org.apache.spark.sql.functions.{monotonically_increasing_id, 
timestamp_seconds}
   import org.apache.spark.sql.internal.SQLConf
   import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
   import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
   
   /**
    * Benchmark to measure read performance with Parquet column index.
    * To run this benchmark:
    * {{{
    *   1. without sbt: bin/spark-submit --class <this class&gt; <spark sql 
test jar>
    *   2. build/sbt "sql/test:runMain <this class&gt;"
    *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt 
"sql/test:runMain <this class&gt;"
    *      Results will be written to 
"benchmarks/ParquetFilterPushdownBenchmark-results.txt".
    * }}}
    */
   object ParquetFilterPushdownBenchmark extends SqlBasedBenchmark {
   
     override def getSparkSession: SparkSession = {
       val conf = new SparkConf()
         .setAppName(this.getClass.getSimpleName)
         // Since `spark.master` always exists, overrides this value
         .set("spark.master", "local[1]")
         .setIfMissing("spark.driver.memory", "3g")
         .setIfMissing("spark.executor.memory", "3g")
         .setIfMissing("orc.compression", "snappy")
         .setIfMissing("spark.sql.parquet.compression.codec", "snappy")
   
       SparkSession.builder().config(conf).getOrCreate()
     }
   
     private val numRows = 1024 * 1024 * 15
     private val width = 5
     private val mid = numRows / 2
   
     def withTempTable(tableNames: String*)(f: => Unit): Unit = {
       try f finally tableNames.foreach(spark.catalog.dropTempView)
     }
   
     private def prepareTable(
         dir: File, numRows: Int, width: Int, useStringForValue: Boolean): Unit 
= {
       import spark.implicits._
       val selectExpr = (1 to width).map(i => s"CAST(value AS STRING) c$i")
       val valueCol = if (useStringForValue) {
         monotonically_increasing_id().cast("string")
       } else {
         monotonically_increasing_id()
       }
       val df = spark.range(numRows).map(_ => 
Random.nextLong).selectExpr(selectExpr: _*)
         .withColumn("value", valueCol)
         .sort("value")
   
       saveAsTable(df, dir)
     }
   
     private def prepareStringDictTable(
         dir: File, numRows: Int, numDistinctValues: Int, width: Int): Unit = {
       val selectExpr = (0 to width).map {
         case 0 => s"CAST(id % $numDistinctValues AS STRING) AS value"
         case i => s"CAST(rand() AS STRING) c$i"
       }
       val df = spark.range(numRows).selectExpr(selectExpr: _*).sort("value")
   
       saveAsTable(df, dir, true)
     }
   
     private def saveAsTable(df: DataFrame, dir: File, useDictionary: Boolean = 
false): Unit = {
       val parquetPath = dir.getCanonicalPath + "/parquet"
       df.write.mode("overwrite").parquet(parquetPath)
       spark.read.parquet(parquetPath).createOrReplaceTempView("parquetTable")
     }
   
     def filterPushDownBenchmark(
         values: Int,
         title: String,
         whereExpr: String,
         selectExpr: String = "*"): Unit = {
       val benchmark = new Benchmark(title, values, minNumIters = 5, output = 
output)
   
       Seq(false, true).foreach { columnIndexEnabled =>
         val name = s"Parquet Vectorized ${if (columnIndexEnabled) 
s"(columnIndex)" else ""}"
         benchmark.addCase(name) { _ =>
           withSQLConf("parquet.filter.columnindex.enabled" -> 
s"$columnIndexEnabled") {
             spark.sql(s"SELECT $selectExpr FROM parquetTable WHERE 
$whereExpr").noop()
           }
         }
       }
   
       benchmark.run()
     }
   
     private def runIntBenchmark(numRows: Int, width: Int, mid: Int): Unit = {
       Seq("value IS NULL", s"$mid < value AND value < $mid").foreach { 
whereExpr =>
         val title = s"Select 0 int row ($whereExpr)".replace("value AND 
value", "value")
         filterPushDownBenchmark(numRows, title, whereExpr)
       }
   
       Seq(
         s"value = $mid",
         s"value <=> $mid",
         s"$mid <= value AND value <= $mid",
         s"${mid - 1} < value AND value < ${mid + 1}"
       ).foreach { whereExpr =>
         val title = s"Select 1 int row ($whereExpr)".replace("value AND 
value", "value")
         filterPushDownBenchmark(numRows, title, whereExpr)
       }
   
       val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", 
MAX(value)")
   
       Seq(10, 50, 90).foreach { percent =>
         filterPushDownBenchmark(
           numRows,
           s"Select $percent% int rows (value < ${numRows * percent / 100})",
           s"value < ${numRows * percent / 100}",
           selectExpr
         )
       }
   
       Seq("value IS NOT NULL", "value > -1", "value != -1").foreach { 
whereExpr =>
         filterPushDownBenchmark(
           numRows,
           s"Select all int rows ($whereExpr)",
           whereExpr,
           selectExpr)
       }
     }
   
     private def runStringBenchmark(
         numRows: Int, width: Int, searchValue: Int, colType: String): Unit = {
       Seq("value IS NULL", s"'$searchValue' < value AND value < 
'$searchValue'")
           .foreach { whereExpr =>
         val title = s"Select 0 $colType row ($whereExpr)".replace("value AND 
value", "value")
         filterPushDownBenchmark(numRows, title, whereExpr)
       }
   
       Seq(
         s"value = '$searchValue'",
         s"value <=> '$searchValue'",
         s"'$searchValue' <= value AND value <= '$searchValue'"
       ).foreach { whereExpr =>
         val title = s"Select 1 $colType row ($whereExpr)".replace("value AND 
value", "value")
         filterPushDownBenchmark(numRows, title, whereExpr)
       }
   
       val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", 
MAX(value)")
   
       Seq("value IS NOT NULL").foreach { whereExpr =>
         filterPushDownBenchmark(
           numRows,
           s"Select all $colType rows ($whereExpr)",
           whereExpr,
           selectExpr)
       }
     }
   
     override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
       runBenchmark("Pushdown for many distinct value case") {
         withTempPath { dir =>
           withTempTable("parquetTable") {
             Seq(true, false).foreach { useStringForValue =>
               prepareTable(dir, numRows, width, useStringForValue)
               if (useStringForValue) {
                 runStringBenchmark(numRows, width, mid, "string")
               } else {
                 runIntBenchmark(numRows, width, mid)
               }
             }
           }
         }
       }
   
       runBenchmark("Pushdown for few distinct value case (use dictionary 
encoding)") {
         withTempPath { dir =>
           val numDistinctValues = 200
   
           withTempTable("parquetTable") {
             prepareStringDictTable(dir, numRows, numDistinctValues, width)
             runStringBenchmark(numRows, width, numDistinctValues / 2, 
"distinct string")
           }
         }
       }
   
       runBenchmark("Pushdown benchmark for StringStartsWith") {
         withTempPath { dir =>
           withTempTable("parquetTable") {
             prepareTable(dir, numRows, width, true)
             Seq(
               "value like '10%'",
               "value like '1000%'",
               s"value like '${mid.toString.substring(0, mid.toString.length - 
1)}%'"
             ).foreach { whereExpr =>
               val title = s"StringStartsWith filter: ($whereExpr)"
               filterPushDownBenchmark(numRows, title, whereExpr)
             }
           }
         }
       }
   
       runBenchmark(s"Pushdown benchmark for ${DecimalType.simpleString}") {
         withTempPath { dir =>
           Seq(
             s"decimal(${Decimal.MAX_INT_DIGITS}, 2)",
             s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)",
             s"decimal(${DecimalType.MAX_PRECISION}, 2)"
           ).foreach { dt =>
             val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
             val valueCol = if 
(dt.equalsIgnoreCase(s"decimal(${Decimal.MAX_INT_DIGITS}, 2)")) {
               monotonically_increasing_id() % 9999999
             } else {
               monotonically_increasing_id()
             }
             val df = spark.range(numRows)
               .selectExpr(columns: _*).withColumn("value", valueCol.cast(dt))
             withTempTable("parquetTable") {
               saveAsTable(df, dir)
   
               Seq(s"value = $mid").foreach { whereExpr =>
                 val title = s"Select 1 $dt row ($whereExpr)".replace("value 
AND value", "value")
                 filterPushDownBenchmark(numRows, title, whereExpr)
               }
   
               val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", 
",", ", MAX(value)")
               Seq(10, 50, 90).foreach { percent =>
                 filterPushDownBenchmark(
                   numRows,
                   s"Select $percent% $dt rows (value < ${numRows * percent / 
100})",
                   s"value < ${numRows * percent / 100}",
                   selectExpr
                 )
               }
             }
           }
         }
       }
   
       runBenchmark("Pushdown benchmark for InSet -> InFilters") {
         withTempPath { dir =>
           withTempTable("parquetTable") {
             prepareTable(dir, numRows, width, false)
             Seq(5, 10, 50, 100).foreach { count =>
               Seq(10, 50, 90).foreach { distribution =>
                 val filter =
                   Range(0, count).map(r => scala.util.Random.nextInt(numRows * 
distribution / 100))
                 val whereExpr = s"value in(${filter.mkString(",")})"
                 val title = s"InSet -> InFilters (values count: $count, 
distribution: $distribution)"
                 filterPushDownBenchmark(numRows, title, whereExpr)
               }
             }
           }
         }
       }
   
       runBenchmark(s"Pushdown benchmark for ${ByteType.simpleString}") {
         withTempPath { dir =>
           val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
           val df = spark.range(numRows).selectExpr(columns: _*)
             .withColumn("value", (monotonically_increasing_id() % 
Byte.MaxValue).cast(ByteType))
             .orderBy("value")
           withTempTable("parquetTable") {
             saveAsTable(df, dir)
   
             Seq(s"value = CAST(${Byte.MaxValue / 2} AS 
${ByteType.simpleString})")
               .foreach { whereExpr =>
                 val title = s"Select 1 ${ByteType.simpleString} row 
($whereExpr)"
                   .replace("value AND value", "value")
                 filterPushDownBenchmark(numRows, title, whereExpr)
               }
   
             val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", 
",", ", MAX(value)")
             Seq(10, 50, 90).foreach { percent =>
               filterPushDownBenchmark(
                 numRows,
                 s"Select $percent% ${ByteType.simpleString} rows " +
                   s"(value < CAST(${Byte.MaxValue * percent / 100} AS 
${ByteType.simpleString}))",
                 s"value < CAST(${Byte.MaxValue * percent / 100} AS 
${ByteType.simpleString})",
                 selectExpr
               )
             }
           }
         }
       }
   
       runBenchmark(s"Pushdown benchmark for Timestamp") {
         withTempPath { dir =>
           withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key -> 
true.toString) {
             ParquetOutputTimestampType.values.toSeq.map(_.toString).foreach { 
fileType =>
               withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> 
fileType) {
                 val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
                 val df = spark.range(numRows).selectExpr(columns: _*)
                   .withColumn("value", 
timestamp_seconds(monotonically_increasing_id()))
                 withTempTable("parquetTable") {
                   saveAsTable(df, dir)
   
                   Seq(s"value = timestamp_seconds($mid)").foreach { whereExpr 
=>
                     val title = s"Select 1 timestamp stored as $fileType row 
($whereExpr)"
                       .replace("value AND value", "value")
                     filterPushDownBenchmark(numRows, title, whereExpr)
                   }
   
                   val selectExpr = (1 to width)
                     .map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
                   Seq(10, 50, 90).foreach { percent =>
                     filterPushDownBenchmark(
                       numRows,
                       s"Select $percent% timestamp stored as $fileType rows " +
                         s"(value < timestamp_seconds(${numRows * percent / 
100}))",
                       s"value < timestamp_seconds(${numRows * percent / 100})",
                       selectExpr
                     )
                   }
                 }
               }
             }
           }
         }
       }
   
       runBenchmark(s"Pushdown benchmark with many filters") {
         val numRows = 1
         val width = 500
   
         withTempPath { dir =>
           val columns = (1 to width).map(i => s"id c$i")
           val df = spark.range(1).selectExpr(columns: _*)
           withTempTable("parquetTable") {
             saveAsTable(df, dir)
             Seq(1, 250, 500).foreach { numFilter =>
               val whereExpr = (1 to numFilter).map(i => s"c$i = 0").mkString(" 
and ")
               // Note: InferFiltersFromConstraints will add more filters to 
this given filters
               filterPushDownBenchmark(numRows, s"Select 1 row with $numFilter 
filters", whereExpr)
             }
           }
         }
       }
     }
   }
   
   ```
   ```
   
================================================================================================
   Pushdown for many distinct value case
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                   99            111       
   10        158.9           6.3       1.0X
   Parquet Vectorized (columnindex)                     78             86       
    9        201.6           5.0       1.3X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                             835           
 848          17         18.8          53.1       1.0X
   Parquet Vectorized (columnindex)                                91           
  96           4        173.5           5.8       9.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  839            846       
    6         18.7          53.3       1.0X
   Parquet Vectorized (columnindex)                     85             93       
   10        184.8           5.4       9.9X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                   838            852      
    16         18.8          53.3       1.0X
   Parquet Vectorized (columnindex)                      79             85      
     4        197.9           5.1      10.5X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                               872         
   907          45         18.0          55.4       1.0X
   Parquet Vectorized (columnindex)                                  83         
    89           5        188.9           5.3      10.5X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  16487          16727     
    253          1.0        1048.2       1.0X
   Parquet Vectorized (columnindex)                    16355          16426     
     62          1.0        1039.8       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                   64             68       
    5        246.7           4.1       1.0X
   Parquet Vectorized (columnindex)                     61             66       
    4        258.0           3.9       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      770            795   
       34         20.4          48.9       1.0X
   Parquet Vectorized (columnindex)                         78             84   
        5        201.1           5.0       9.8X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  774            795       
   21         20.3          49.2       1.0X
   Parquet Vectorized (columnindex)                     77             82       
    6        205.3           4.9      10.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  771            777       
    7         20.4          49.0       1.0X
   Parquet Vectorized (columnindex)                     69             76       
    5        226.8           4.4      11.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        769            794 
         29         20.4          48.9       1.0X
   Parquet Vectorized (columnindex)                           74             82 
          6        213.3           4.7      10.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      775            825   
       43         20.3          49.3       1.0X
   Parquet Vectorized (columnindex)                         76             81   
        5        206.3           4.8      10.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                 2326           2395       
   51          6.8         147.9       1.0X
   Parquet Vectorized (columnindex)                   1655           1669       
   13          9.5         105.2       1.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                 7898           8175       
  157          2.0         502.1       1.0X
   Parquet Vectorized (columnindex)                   7658           7731       
   73          2.1         486.9       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                13778          13858       
   78          1.1         876.0       1.0X
   Parquet Vectorized (columnindex)                  13771          13885       
  105          1.1         875.5       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                15072          15281       
  163          1.0         958.3       1.0X
   Parquet Vectorized (columnindex)                  15119          15344       
  194          1.0         961.3       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                15257          15378       
  198          1.0         970.0       1.0X
   Parquet Vectorized (columnindex)                  15296          15519       
  232          1.0         972.5       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                15262          15325       
   92          1.0         970.4       1.0X
   Parquet Vectorized (columnindex)                  15173          15255       
   84          1.0         964.7       1.0X
   
   
   
================================================================================================
   Pushdown for few distinct value case (use dictionary encoding)
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                       53             59   
        5        298.8           3.3       1.0X
   Parquet Vectorized (columnindex)                         52             57   
        6        300.2           3.3       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                              890          
  902           7         17.7          56.6       1.0X
   Parquet Vectorized (columnindex)                                 59          
   62           4        266.2           3.8      15.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      894            905   
       10         17.6          56.9       1.0X
   Parquet Vectorized (columnindex)                        125            130   
        6        126.2           7.9       7.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        901            920 
         26         17.5          57.3       1.0X
   Parquet Vectorized (columnindex)                          119            127 
          4        132.1           7.6       7.6X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                902        
    914          10         17.4          57.3       1.0X
   Parquet Vectorized (columnindex)                                  126        
    132           7        124.8           8.0       7.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                           16872          
16947          97          0.9        1072.7       1.0X
   Parquet Vectorized (columnindex)                             16861          
16970          80          0.9        1072.0       1.0X
   
   
   
================================================================================================
   Pushdown benchmark for StringStartsWith
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                    1025           1038    
      17         15.4          65.1       1.0X
   Parquet Vectorized (columnindex)                       852            868    
      16         18.5          54.2       1.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                       816            838  
        25         19.3          51.9       1.0X
   Parquet Vectorized (columnindex)                          74             79  
         5        213.7           4.7      11.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         817            
836          11         19.2          52.0       1.0X
   Parquet Vectorized (columnindex)                            76             
82           4        207.6           4.8      10.8X
   
   
   
================================================================================================
   Pushdown benchmark for decimal
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                     1125           1138   
       13         14.0          71.6       1.0X
   Parquet Vectorized (columnindex)                         50             55   
        5        313.1           3.2      22.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        4800           
4930         131          3.3         305.2       1.0X
   Parquet Vectorized (columnindex)                          2227           
2274          40          7.1         141.6       2.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                       10016          
10204         202          1.6         636.8       1.0X
   Parquet Vectorized (columnindex)                          9571           
9677          63          1.6         608.5       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        11161          
11403         187          1.4         709.6       1.0X
   Parquet Vectorized (columnindex)                          11103          
11283         130          1.4         705.9       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      1150           1168  
        21         13.7          73.1       1.0X
   Parquet Vectorized (columnindex)                          45             48  
         5        350.0           2.9      25.6X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         1946           
1978          34          8.1         123.7       1.0X
   Parquet Vectorized (columnindex)                           1155           
1189          28         13.6          73.4       1.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         6206           
6413         211          2.5         394.6       1.0X
   Parquet Vectorized (columnindex)                           5659           
5786          96          2.8         359.8       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         10375          
10534         240          1.5         659.6       1.0X
   Parquet Vectorized (columnindex)                           10120          
10334         221          1.6         643.4       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      1694           1748  
        49          9.3         107.7       1.0X
   Parquet Vectorized (columnindex)                          46             50  
         4        338.6           3.0      36.5X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         2712           
2782          48          5.8         172.5       1.0X
   Parquet Vectorized (columnindex)                           1584           
1611          32          9.9         100.7       1.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         8356           
8499          82          1.9         531.3       1.0X
   Parquet Vectorized (columnindex)                           7781           
7979         123          2.0         494.7       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         14128          
14304         182          1.1         898.2       1.0X
   Parquet Vectorized (columnindex)                           13940          
14004          44          1.1         886.3       1.0X
   
   
   
================================================================================================
   Pushdown benchmark for InSet -> InFilters
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                773        
    784          11         20.4          49.1       1.0X
   Parquet Vectorized (columnindex)                                  113        
    118           5        139.4           7.2       6.8X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                               2910        
   2930          13          5.4         185.0       1.0X
   Parquet Vectorized (columnindex)                                  116        
    120           4        136.1           7.3      25.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                               2207        
   2218           8          7.1         140.3       1.0X
   Parquet Vectorized (columnindex)                                  117        
    123           8        134.3           7.4      18.8X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                1493       
    1506          11         10.5          94.9       1.0X
   Parquet Vectorized (columnindex)                                   159       
     164           6         99.2          10.1       9.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                3591       
    3635          32          4.4         228.3       1.0X
   Parquet Vectorized (columnindex)                                   170       
     175           5         92.6          10.8      21.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                5079       
    5147          50          3.1         322.9       1.0X
   Parquet Vectorized (columnindex)                                   172       
     180           5         91.2          11.0      29.5X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                8280       
    8533         245          1.9         526.4       1.0X
   Parquet Vectorized (columnindex)                                  8341       
    8423          93          1.9         530.3       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                8248       
    8347          72          1.9         524.4       1.0X
   Parquet Vectorized (columnindex)                                  8230       
    8303          66          1.9         523.2       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                8219       
    8285          44          1.9         522.6       1.0X
   Parquet Vectorized (columnindex)                                  8183       
    8381         184          1.9         520.3       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                 8354      
     8411          60          1.9         531.1       1.0X
   Parquet Vectorized (columnindex)                                   8181      
     8256          60          1.9         520.1       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                 8151      
     8210          38          1.9         518.3       1.0X
   Parquet Vectorized (columnindex)                                   8169      
     8210          37          1.9         519.4       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                 8131      
     8204          46          1.9         516.9       1.0X
   Parquet Vectorized (columnindex)                                   8167      
     8231          65          1.9         519.3       1.0X
   
   
   
================================================================================================
   Pushdown benchmark for tinyint
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                           1124           
1198          68         14.0          71.4       1.0X
   Parquet Vectorized (columnindex)                               91            
 93           2        173.7           5.8      12.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                              1845         
  1892          37          8.5         117.3       1.0X
   Parquet Vectorized (columnindex)                                1113         
  1123          11         14.1          70.8       1.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                              5850         
  5911          73          2.7         371.9       1.0X
   Parquet Vectorized (columnindex)                                5450         
  5567          91          2.9         346.5       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                              10395        
  10458          62          1.5         660.9       1.0X
   Parquet Vectorized (columnindex)                                 9928        
  10104         176          1.6         631.2       1.0X
   
   
   
================================================================================================
   Pushdown benchmark for Timestamp
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)): 
 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
         3929           4094         107          4.0         249.8       1.0X
   Parquet Vectorized (columnindex)                                             
         3991           4068          74          3.9         253.7       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% timestamp stored as INT96 rows (value < 
timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
            4774           5003         163          3.3         303.5       
1.0X
   Parquet Vectorized (columnindex)                                             
            4769           4880         159          3.3         303.2       
1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% timestamp stored as INT96 rows (value < 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
            7736           7884         142          2.0         491.9       
1.0X
   Parquet Vectorized (columnindex)                                             
            7587           7795         156          2.1         482.4       
1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% timestamp stored as INT96 rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
            10722          10785          61          1.5         681.7       
1.0X
   Parquet Vectorized (columnindex)                                             
            10719          10775          55          1.5         681.5       
1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 timestamp stored as TIMESTAMP_MICROS row (value = 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                    1127           1172          47         14.0          71.6  
     1.0X
   Parquet Vectorized (columnindex)                                             
                      44             46           3        360.8           2.8  
    25.9X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       1861           1916          90          8.5         
118.3       1.0X
   Parquet Vectorized (columnindex)                                             
                       1127           1160          22         14.0          
71.7       1.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       5809           5934         214          2.7         
369.3       1.0X
   Parquet Vectorized (columnindex)                                             
                       5455           5523          93          2.9         
346.8       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                        9777          10097         244          1.6         
621.6       1.0X
   Parquet Vectorized (columnindex)                                             
                        9808           9849          44          1.6         
623.6       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                    1125           1163          24         14.0          71.5  
     1.0X
   Parquet Vectorized (columnindex)                                             
                      43             47           5        369.3           2.7  
    26.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       1905           1977          80          8.3         
121.1       1.0X
   Parquet Vectorized (columnindex)                                             
                       1137           1186          40         13.8          
72.3       1.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       6018           6266         230          2.6         
382.6       1.0X
   Parquet Vectorized (columnindex)                                             
                       5631           5703          69          2.8         
358.0       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       10132          10224         113          1.6         
644.2       1.0X
   Parquet Vectorized (columnindex)                                             
                        9898           9992          69          1.6         
629.3       1.0X
   
   
   
================================================================================================
   Pushdown benchmark with many filters
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  182            190       
    7          0.0   182396465.0       1.0X
   Parquet Vectorized (columnindex)                    187            192       
    5          0.0   187246572.0       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                 2228           2257       
   19          0.0  2228318860.0       1.0X
   Parquet Vectorized (columnindex)                   2212           2244       
   24          0.0  2212486315.0       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                12391          12473       
   86          0.0 12391350628.0       1.0X
   Parquet Vectorized (columnindex)                  12438          12594       
  229          0.0 12438065459.0       1.0X
   
   
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to