wangyum commented on pull request #31393:
URL: https://github.com/apache/spark/pull/31393#issuecomment-769767724


   Benchmark and benchmark result:
   ```scala
   /*
    * Licensed to the Apache Software Foundation (ASF) under one or more
    * contributor license agreements.  See the NOTICE file distributed with
    * this work for additional information regarding copyright ownership.
    * The ASF licenses this file to You under the Apache License, Version 2.0
    * (the "License"); you may not use this file except in compliance with
    * the License.  You may obtain a copy of the License at
    *
    *    http://www.apache.org/licenses/LICENSE-2.0
    *
    * Unless required by applicable law or agreed to in writing, software
    * distributed under the License is distributed on an "AS IS" BASIS,
    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    * See the License for the specific language governing permissions and
    * limitations under the License.
    */
   
   package org.apache.spark.sql.execution.benchmark
   
   import java.io.File
   
   import scala.util.Random
   
   import org.apache.parquet.hadoop.ParquetInputFormat
   
   import org.apache.spark.SparkConf
   import org.apache.spark.benchmark.Benchmark
   import org.apache.spark.sql.{DataFrame, SparkSession}
   import org.apache.spark.sql.functions.{monotonically_increasing_id, 
timestamp_seconds}
   import org.apache.spark.sql.internal.SQLConf
   import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
   import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
   
   /**
    * Benchmark to measure read performance with Parquet column index.
    * To run this benchmark:
    * {{{
    *   1. without sbt: bin/spark-submit --class <this class> <spark sql test 
jar>
    *   2. build/sbt "sql/test:runMain <this class>"
    *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt 
"sql/test:runMain <this class>"
    *      Results will be written to 
"benchmarks/ParquetFilterPushdownBenchmark-results.txt".
    * }}}
    */
   object ParquetColumnIndexBenchmark extends SqlBasedBenchmark {
   
     override def getSparkSession: SparkSession = {
       val conf = new SparkConf()
         .setAppName(this.getClass.getSimpleName)
         // Since `spark.master` always exists, overrides this value
         .set("spark.master", "local[1]")
         .setIfMissing("spark.driver.memory", "3g")
         .setIfMissing("spark.executor.memory", "3g")
         .setIfMissing("orc.compression", "snappy")
         .setIfMissing("spark.sql.parquet.compression.codec", "snappy")
   
       SparkSession.builder().config(conf).getOrCreate()
     }
   
     private val numRows = 1024 * 1024 * 15
     private val width = 5
     private val mid = numRows / 2
   
     def withTempTable(tableNames: String*)(f: => Unit): Unit = {
       try f finally tableNames.foreach(spark.catalog.dropTempView)
     }
   
     private def prepareTable(
         dir: File, numRows: Int, width: Int, useStringForValue: Boolean): Unit 
= {
       import spark.implicits._
       val selectExpr = (1 to width).map(i => s"CAST(value AS STRING) c$i")
       val valueCol = if (useStringForValue) {
         monotonically_increasing_id().cast("string")
       } else {
         monotonically_increasing_id()
       }
       val df = spark.range(numRows).map(_ => 
Random.nextLong).selectExpr(selectExpr: _*)
         .withColumn("value", valueCol)
         .sort("value")
   
       saveAsTable(df, dir)
     }
   
     private def prepareStringDictTable(
         dir: File, numRows: Int, numDistinctValues: Int, width: Int): Unit = {
       val selectExpr = (0 to width).map {
         case 0 => s"CAST(id % $numDistinctValues AS STRING) AS value"
         case i => s"CAST(rand() AS STRING) c$i"
       }
       val df = spark.range(numRows).selectExpr(selectExpr: _*).sort("value")
   
       saveAsTable(df, dir, true)
     }
   
     private def saveAsTable(df: DataFrame, dir: File, useDictionary: Boolean = 
false): Unit = {
       val parquetPath = dir.getCanonicalPath + "/parquet"
       df.write.mode("overwrite").parquet(parquetPath)
       spark.read.parquet(parquetPath).createOrReplaceTempView("parquetTable")
     }
   
     def filterPushDownBenchmark(
         values: Int,
         title: String,
         whereExpr: String,
         selectExpr: String = "*"): Unit = {
       val benchmark = new Benchmark(title, values, minNumIters = 5, output = 
output)
   
       Seq(false, true).foreach { columnIndexEnabled =>
         val name = s"Parquet Vectorized ${if (columnIndexEnabled) 
s"(columnIndex)" else ""}"
         benchmark.addCase(name) { _ =>
           withSQLConf(ParquetInputFormat.COLUMN_INDEX_FILTERING_ENABLED -> 
s"$columnIndexEnabled") {
             spark.sql(s"SELECT $selectExpr FROM parquetTable WHERE 
$whereExpr").noop()
           }
         }
       }
   
       benchmark.run()
     }
   
     private def runIntBenchmark(numRows: Int, width: Int, mid: Int): Unit = {
       Seq("value IS NULL", s"$mid < value AND value < $mid").foreach { 
whereExpr =>
         val title = s"Select 0 int row ($whereExpr)".replace("value AND 
value", "value")
         filterPushDownBenchmark(numRows, title, whereExpr)
       }
   
       Seq(
         s"value = $mid",
         s"value <=> $mid",
         s"$mid <= value AND value <= $mid",
         s"${mid - 1} < value AND value < ${mid + 1}"
       ).foreach { whereExpr =>
         val title = s"Select 1 int row ($whereExpr)".replace("value AND 
value", "value")
         filterPushDownBenchmark(numRows, title, whereExpr)
       }
   
       val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", 
MAX(value)")
   
       Seq(10, 50, 90).foreach { percent =>
         filterPushDownBenchmark(
           numRows,
           s"Select $percent% int rows (value < ${numRows * percent / 100})",
           s"value < ${numRows * percent / 100}",
           selectExpr
         )
       }
   
       Seq("value IS NOT NULL", "value > -1", "value != -1").foreach { 
whereExpr =>
         filterPushDownBenchmark(
           numRows,
           s"Select all int rows ($whereExpr)",
           whereExpr,
           selectExpr)
       }
     }
   
     private def runStringBenchmark(
         numRows: Int, width: Int, searchValue: Int, colType: String): Unit = {
       Seq("value IS NULL", s"'$searchValue' < value AND value < 
'$searchValue'")
         .foreach { whereExpr =>
           val title = s"Select 0 $colType row ($whereExpr)".replace("value AND 
value", "value")
           filterPushDownBenchmark(numRows, title, whereExpr)
         }
   
       Seq(
         s"value = '$searchValue'",
         s"value <=> '$searchValue'",
         s"'$searchValue' <= value AND value <= '$searchValue'"
       ).foreach { whereExpr =>
         val title = s"Select 1 $colType row ($whereExpr)".replace("value AND 
value", "value")
         filterPushDownBenchmark(numRows, title, whereExpr)
       }
   
       val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", 
MAX(value)")
   
       Seq("value IS NOT NULL").foreach { whereExpr =>
         filterPushDownBenchmark(
           numRows,
           s"Select all $colType rows ($whereExpr)",
           whereExpr,
           selectExpr)
       }
     }
   
     override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
       runBenchmark("Pushdown for many distinct value case") {
         withTempPath { dir =>
           withTempTable("parquetTable") {
             Seq(true, false).foreach { useStringForValue =>
               prepareTable(dir, numRows, width, useStringForValue)
               if (useStringForValue) {
                 runStringBenchmark(numRows, width, mid, "string")
               } else {
                 runIntBenchmark(numRows, width, mid)
               }
             }
           }
         }
       }
   
       runBenchmark("Pushdown for few distinct value case (use dictionary 
encoding)") {
         withTempPath { dir =>
           val numDistinctValues = 200
   
           withTempTable("parquetTable") {
             prepareStringDictTable(dir, numRows, numDistinctValues, width)
             runStringBenchmark(numRows, width, numDistinctValues / 2, 
"distinct string")
           }
         }
       }
   
       runBenchmark("Pushdown benchmark for StringStartsWith") {
         withTempPath { dir =>
           withTempTable("parquetTable") {
             prepareTable(dir, numRows, width, true)
             Seq(
               "value like '10%'",
               "value like '1000%'",
               s"value like '${mid.toString.substring(0, mid.toString.length - 
1)}%'"
             ).foreach { whereExpr =>
               val title = s"StringStartsWith filter: ($whereExpr)"
               filterPushDownBenchmark(numRows, title, whereExpr)
             }
           }
         }
       }
   
       runBenchmark(s"Pushdown benchmark for ${DecimalType.simpleString}") {
         withTempPath { dir =>
           Seq(
             s"decimal(${Decimal.MAX_INT_DIGITS}, 2)",
             s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)",
             s"decimal(${DecimalType.MAX_PRECISION}, 2)"
           ).foreach { dt =>
             val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
             val valueCol = if 
(dt.equalsIgnoreCase(s"decimal(${Decimal.MAX_INT_DIGITS}, 2)")) {
               monotonically_increasing_id() % 9999999
             } else {
               monotonically_increasing_id()
             }
             val df = spark.range(numRows)
               .selectExpr(columns: _*).withColumn("value", valueCol.cast(dt))
             withTempTable("parquetTable") {
               saveAsTable(df, dir)
   
               Seq(s"value = $mid").foreach { whereExpr =>
                 val title = s"Select 1 $dt row ($whereExpr)".replace("value 
AND value", "value")
                 filterPushDownBenchmark(numRows, title, whereExpr)
               }
   
               val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", 
",", ", MAX(value)")
               Seq(10, 50, 90).foreach { percent =>
                 filterPushDownBenchmark(
                   numRows,
                   s"Select $percent% $dt rows (value < ${numRows * percent / 
100})",
                   s"value < ${numRows * percent / 100}",
                   selectExpr
                 )
               }
             }
           }
         }
       }
   
       runBenchmark("Pushdown benchmark for InSet -> InFilters") {
         withTempPath { dir =>
           withTempTable("parquetTable") {
             prepareTable(dir, numRows, width, false)
             Seq(5, 10, 50, 100).foreach { count =>
               Seq(10, 50, 90).foreach { distribution =>
                 val filter =
                   Range(0, count).map(r => scala.util.Random.nextInt(numRows * 
distribution / 100))
                 val whereExpr = s"value in(${filter.mkString(",")})"
                 val title = s"InSet -> InFilters (values count: $count, 
distribution: $distribution)"
                 filterPushDownBenchmark(numRows, title, whereExpr)
               }
             }
           }
         }
       }
   
       runBenchmark(s"Pushdown benchmark for ${ByteType.simpleString}") {
         withTempPath { dir =>
           val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
           val df = spark.range(numRows).selectExpr(columns: _*)
             .withColumn("value", (monotonically_increasing_id() % 
Byte.MaxValue).cast(ByteType))
             .orderBy("value")
           withTempTable("parquetTable") {
             saveAsTable(df, dir)
   
             Seq(s"value = CAST(${Byte.MaxValue / 2} AS 
${ByteType.simpleString})")
               .foreach { whereExpr =>
                 val title = s"Select 1 ${ByteType.simpleString} row 
($whereExpr)"
                   .replace("value AND value", "value")
                 filterPushDownBenchmark(numRows, title, whereExpr)
               }
   
             val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", 
",", ", MAX(value)")
             Seq(10, 50, 90).foreach { percent =>
               filterPushDownBenchmark(
                 numRows,
                 s"Select $percent% ${ByteType.simpleString} rows " +
                   s"(value < CAST(${Byte.MaxValue * percent / 100} AS 
${ByteType.simpleString}))",
                 s"value < CAST(${Byte.MaxValue * percent / 100} AS 
${ByteType.simpleString})",
                 selectExpr
               )
             }
           }
         }
       }
   
       runBenchmark(s"Pushdown benchmark for Timestamp") {
         withTempPath { dir =>
           withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key -> 
true.toString) {
             ParquetOutputTimestampType.values.toSeq.map(_.toString).foreach { 
fileType =>
               withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> 
fileType) {
                 val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
                 val df = spark.range(numRows).selectExpr(columns: _*)
                   .withColumn("value", 
timestamp_seconds(monotonically_increasing_id()))
                 withTempTable("parquetTable") {
                   saveAsTable(df, dir)
   
                   Seq(s"value = timestamp_seconds($mid)").foreach { whereExpr 
=>
                     val title = s"Select 1 timestamp stored as $fileType row 
($whereExpr)"
                       .replace("value AND value", "value")
                     filterPushDownBenchmark(numRows, title, whereExpr)
                   }
   
                   val selectExpr = (1 to width)
                     .map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
                   Seq(10, 50, 90).foreach { percent =>
                     filterPushDownBenchmark(
                       numRows,
                       s"Select $percent% timestamp stored as $fileType rows " +
                         s"(value < timestamp_seconds(${numRows * percent / 
100}))",
                       s"value < timestamp_seconds(${numRows * percent / 100})",
                       selectExpr
                     )
                   }
                 }
               }
             }
           }
         }
       }
   
       runBenchmark(s"Pushdown benchmark with many filters") {
         val numRows = 1
         val width = 500
   
         withTempPath { dir =>
           val columns = (1 to width).map(i => s"id c$i")
           val df = spark.range(1).selectExpr(columns: _*)
           withTempTable("parquetTable") {
             saveAsTable(df, dir)
             Seq(1, 250, 500).foreach { numFilter =>
               val whereExpr = (1 to numFilter).map(i => s"c$i = 0").mkString(" 
and ")
               // Note: InferFiltersFromConstraints will add more filters to 
this given filters
               filterPushDownBenchmark(numRows, s"Select 1 row with $numFilter 
filters", whereExpr)
             }
           }
         }
       }
     }
   }
   
   ```
   
   ```
   
================================================================================================
   Pushdown for many distinct value case
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                   92            105       
    9        171.7           5.8       1.0X
   Parquet Vectorized (columnIndex)                     70             80       
    8        225.3           4.4       1.3X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                             829           
 849          12         19.0          52.7       1.0X
   Parquet Vectorized (columnIndex)                                85           
  92           6        184.8           5.4       9.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  830            845       
   11         19.0          52.8       1.0X
   Parquet Vectorized (columnIndex)                     85             94       
    7        185.9           5.4       9.8X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                   841            895      
    47         18.7          53.5       1.0X
   Parquet Vectorized (columnIndex)                      77             81      
     5        205.4           4.9      11.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                               826         
   843          13         19.0          52.5       1.0X
   Parquet Vectorized (columnIndex)                                  79         
    84           5        197.9           5.1      10.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  16542          16827     
    247          1.0        1051.7       1.0X
   Parquet Vectorized (columnIndex)                    16491          16571     
     57          1.0        1048.5       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                   60             66       
    5        263.7           3.8       1.0X
   Parquet Vectorized (columnIndex)                     59             66       
    6        267.7           3.7       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      793            797   
        5         19.8          50.4       1.0X
   Parquet Vectorized (columnIndex)                         79             85   
        5        199.6           5.0      10.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  792            807       
   11         19.9          50.3       1.0X
   Parquet Vectorized (columnIndex)                     72             76       
    4        218.7           4.6      11.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  792            832       
   37         19.8          50.4       1.0X
   Parquet Vectorized (columnIndex)                     77             85       
    9        205.0           4.9      10.3X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        782            806 
         19         20.1          49.7       1.0X
   Parquet Vectorized (columnIndex)                           70             75 
          4        224.4           4.5      11.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      772            797   
       19         20.4          49.1       1.0X
   Parquet Vectorized (columnIndex)                         73             79   
        6        216.1           4.6      10.6X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                 2313           2347       
   30          6.8         147.1       1.0X
   Parquet Vectorized (columnIndex)                   1657           1681       
   20          9.5         105.4       1.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                 8055           8162       
   69          2.0         512.1       1.0X
   Parquet Vectorized (columnIndex)                   7800           7861       
   52          2.0         495.9       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                13875          14027       
  174          1.1         882.1       1.0X
   Parquet Vectorized (columnIndex)                  13954          14061       
  151          1.1         887.2       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                15637          15728       
   70          1.0         994.2       1.0X
   Parquet Vectorized (columnIndex)                  15481          15634       
  101          1.0         984.2       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                15156          15369       
  144          1.0         963.6       1.0X
   Parquet Vectorized (columnIndex)                  15255          15409       
  144          1.0         969.9       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                15220          15440       
  143          1.0         967.7       1.0X
   Parquet Vectorized (columnIndex)                  15327          15399       
   60          1.0         974.5       1.0X
   
   
   
================================================================================================
   Pushdown for few distinct value case (use dictionary encoding)
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                       51             55   
        6        309.6           3.2       1.0X
   Parquet Vectorized (columnIndex)                         49             55   
        6        319.7           3.1       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                              907          
  921          11         17.3          57.7       1.0X
   Parquet Vectorized (columnIndex)                                 56          
   60           3        279.6           3.6      16.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      916            927   
        8         17.2          58.3       1.0X
   Parquet Vectorized (columnIndex)                        121            126   
        6        130.0           7.7       7.6X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        900            908 
          7         17.5          57.2       1.0X
   Parquet Vectorized (columnIndex)                          118            124 
          5        133.2           7.5       7.6X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                924        
    935          10         17.0          58.8       1.0X
   Parquet Vectorized (columnIndex)                                  125        
    130           5        126.2           7.9       7.4X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                           16896          
17060         109          0.9        1074.2       1.0X
   Parquet Vectorized (columnIndex)                             17062          
17211         114          0.9        1084.8       1.0X
   
   
   
================================================================================================
   Pushdown benchmark for StringStartsWith
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                    1028           1060    
      29         15.3          65.3       1.0X
   Parquet Vectorized (columnIndex)                       849            863    
      12         18.5          54.0       1.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                       828            835  
         6         19.0          52.6       1.0X
   Parquet Vectorized (columnIndex)                          71             77  
         4        220.1           4.5      11.6X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         832            
845          10         18.9          52.9       1.0X
   Parquet Vectorized (columnIndex)                            70             
76           3        223.4           4.5      11.8X
   
   
   
================================================================================================
   Pushdown benchmark for decimal
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                     1128           1159   
       36         13.9          71.7       1.0X
   Parquet Vectorized (columnIndex)                         46             49   
        3        345.5           2.9      24.8X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        4827           
4927         112          3.3         306.9       1.0X
   Parquet Vectorized (columnIndex)                          2238           
2387         123          7.0         142.3       2.2X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        9972          
10135         113          1.6         634.0       1.0X
   Parquet Vectorized (columnIndex)                          9395           
9503         102          1.7         597.3       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                        11283          
11461         192          1.4         717.3       1.0X
   Parquet Vectorized (columnIndex)                          11070          
11236         144          1.4         703.8       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      1170           1181  
         8         13.4          74.4       1.0X
   Parquet Vectorized (columnIndex)                          41             43  
         3        380.2           2.6      28.3X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         1962           
2041         103          8.0         124.7       1.0X
   Parquet Vectorized (columnIndex)                           1166           
1192          23         13.5          74.1       1.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         6211           
6276          52          2.5         394.9       1.0X
   Parquet Vectorized (columnIndex)                           5572           
5667          60          2.8         354.3       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         10231          
10430         302          1.5         650.5       1.0X
   Parquet Vectorized (columnIndex)                            9985          
10291         282          1.6         634.8       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                      1746           1796  
        72          9.0         111.0       1.0X
   Parquet Vectorized (columnIndex)                          44             47  
         3        360.3           2.8      40.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         2758           
2861          66          5.7         175.4       1.0X
   Parquet Vectorized (columnIndex)                           1551           
1581          20         10.1          98.6       1.8X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         8324           
8443          91          1.9         529.2       1.0X
   Parquet Vectorized (columnIndex)                           7661           
7719          75          2.1         487.0       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                         13847          
14118         224          1.1         880.4       1.0X
   Parquet Vectorized (columnIndex)                           13609          
13806         150          1.2         865.3       1.0X
   
   
   
================================================================================================
   Pushdown benchmark for InSet -> InFilters
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                758        
   1035         581         20.8          48.2       1.0X
   Parquet Vectorized (columnIndex)                                  110        
    118           7        142.6           7.0       6.9X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                               2871        
   2928          38          5.5         182.6       1.0X
   Parquet Vectorized (columnIndex)                                  112        
    117           5        140.9           7.1      25.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                               2197        
   2218          16          7.2         139.7       1.0X
   Parquet Vectorized (columnIndex)                                  111        
    118           6        141.9           7.0      19.8X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                 759       
     779          12         20.7          48.3       1.0X
   Parquet Vectorized (columnIndex)                                   150       
     155           3        105.2           9.5       5.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                3633       
    3674          28          4.3         231.0       1.0X
   Parquet Vectorized (columnIndex)                                   169       
     181          14         93.0          10.8      21.5X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                3623       
    3666          29          4.3         230.3       1.0X
   Parquet Vectorized (columnIndex)                                   166       
     176           6         94.6          10.6      21.8X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                8220       
    8366         199          1.9         522.6       1.0X
   Parquet Vectorized (columnIndex)                                  8135       
    8197          66          1.9         517.2       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                8164       
    8224          48          1.9         519.1       1.0X
   Parquet Vectorized (columnIndex)                                  8106       
    8212          60          1.9         515.3       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                8179       
    8257          50          1.9         520.0       1.0X
   Parquet Vectorized (columnIndex)                                  8176       
    8269          76          1.9         519.8       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                 8199      
     8339         133          1.9         521.3       1.0X
   Parquet Vectorized (columnIndex)                                   8128      
     8247         100          1.9         516.8       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                 8075      
     8180          75          1.9         513.4       1.0X
   Parquet Vectorized (columnIndex)                                   8133      
     8185          57          1.9         517.1       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   
Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                 8121      
     8163          33          1.9         516.3       1.0X
   Parquet Vectorized (columnIndex)                                   8093      
     8159          63          1.9         514.5       1.0X
   
   
   
================================================================================================
   Pushdown benchmark for tinyint
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                           1133           
1201          46         13.9          72.1       1.0X
   Parquet Vectorized (columnIndex)                               85            
 90           5        184.7           5.4      13.3X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                              1841         
  1860          19          8.5         117.1       1.0X
   Parquet Vectorized (columnIndex)                                1104         
  1115          10         14.2          70.2       1.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                              5747         
  5818          86          2.7         365.4       1.0X
   Parquet Vectorized (columnIndex)                                5411         
  5547         108          2.9         344.1       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                              10143        
  10249         108          1.6         644.9       1.0X
   Parquet Vectorized (columnIndex)                                 9730        
   9832          78          1.6         618.6       1.0X
   
   
   
================================================================================================
   Pushdown benchmark for Timestamp
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)): 
 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
-----------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
         4023           4060          35          3.9         255.8       1.0X
   Parquet Vectorized (columnIndex)                                             
         3908           4044          96          4.0         248.5       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% timestamp stored as INT96 rows (value < 
timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
            4758           4967         179          3.3         302.5       
1.0X
   Parquet Vectorized (columnIndex)                                             
            4750           4828          45          3.3         302.0       
1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% timestamp stored as INT96 rows (value < 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
            7682           7792         108          2.0         488.4       
1.0X
   Parquet Vectorized (columnIndex)                                             
            7661           7753         111          2.1         487.1       
1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% timestamp stored as INT96 rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
---------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
            10538          10678         134          1.5         670.0       
1.0X
   Parquet Vectorized (columnIndex)                                             
            10489          10602          77          1.5         666.9       
1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 timestamp stored as TIMESTAMP_MICROS row (value = 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                    1053           1064           7         14.9          66.9  
     1.0X
   Parquet Vectorized (columnIndex)                                             
                      41             46           5        382.0           2.6  
    25.6X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       1808           1883          83          8.7         
114.9       1.0X
   Parquet Vectorized (columnIndex)                                             
                       1112           1143          19         14.1          
70.7       1.6X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       5921           6019          68          2.7         
376.4       1.0X
   Parquet Vectorized (columnIndex)                                             
                       5411           5538         100          2.9         
344.0       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                        9713           9785          61          1.6         
617.6       1.0X
   Parquet Vectorized (columnIndex)                                             
                        9670           9813         151          1.6         
614.8       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                    1144           1152           5         13.8          72.7  
     1.0X
   Parquet Vectorized (columnIndex)                                             
                      38             43           5        413.0           2.4  
    30.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       1890           1960          96          8.3         
120.2       1.0X
   Parquet Vectorized (columnIndex)                                             
                       1125           1138          13         14.0          
71.5       1.7X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                       5895           6148         221          2.7         
374.8       1.0X
   Parquet Vectorized (columnIndex)                                             
                       5483           5528          71          2.9         
348.6       1.1X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
   
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                                           
                        9928          10021          61          1.6         
631.2       1.0X
   Parquet Vectorized (columnIndex)                                             
                        9747           9855          85          1.6         
619.7       1.0X
   
   
   
================================================================================================
   Pushdown benchmark with many filters
   
================================================================================================
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                  183            191       
    4          0.0   182665658.0       1.0X
   Parquet Vectorized (columnIndex)                    180            187       
    6          0.0   179942365.0       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                 2206           2240       
   35          0.0  2206072129.0       1.0X
   Parquet Vectorized (columnIndex)                   2206           2301       
   76          0.0  2206015717.0       1.0X
   
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
   Intel Core Processor (Broadwell, IBRS)
   Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Parquet Vectorized                                12239          12298       
   53          0.0 12239406583.0       1.0X
   Parquet Vectorized (columnIndex)                  12276          12502       
  200          0.0 12275545185.0       1.0X
   
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to