[ 
https://issues.apache.org/jira/browse/SPARK-26345?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17248374#comment-17248374
 ] 

Yuming Wang commented on SPARK-26345:
-------------------------------------

Benchmark and benchmark result:

{code:scala}
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.execution.benchmark

import java.io.File

import scala.util.Random

import org.apache.spark.SparkConf
import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions.{monotonically_increasing_id, 
timestamp_seconds}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}

/**
 * Benchmark to measure read performance with Parquet column index.
 * To run this benchmark:
 * {{{
 *   1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
 *   2. build/sbt "sql/test:runMain <this class>"
 *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt 
"sql/test:runMain <this class>"
 *      Results will be written to 
"benchmarks/ParquetFilterPushdownBenchmark-results.txt".
 * }}}
 */
object ParquetFilterPushdownBenchmark extends SqlBasedBenchmark {

  override def getSparkSession: SparkSession = {
    val conf = new SparkConf()
      .setAppName(this.getClass.getSimpleName)
      // Since `spark.master` always exists, overrides this value
      .set("spark.master", "local[1]")
      .setIfMissing("spark.driver.memory", "3g")
      .setIfMissing("spark.executor.memory", "3g")
      .setIfMissing("orc.compression", "snappy")
      .setIfMissing("spark.sql.parquet.compression.codec", "snappy")

    SparkSession.builder().config(conf).getOrCreate()
  }

  private val numRows = 1024 * 1024 * 15
  private val width = 5
  private val mid = numRows / 2

  def withTempTable(tableNames: String*)(f: => Unit): Unit = {
    try f finally tableNames.foreach(spark.catalog.dropTempView)
  }

  private def prepareTable(
      dir: File, numRows: Int, width: Int, useStringForValue: Boolean): Unit = {
    import spark.implicits._
    val selectExpr = (1 to width).map(i => s"CAST(value AS STRING) c$i")
    val valueCol = if (useStringForValue) {
      monotonically_increasing_id().cast("string")
    } else {
      monotonically_increasing_id()
    }
    val df = spark.range(numRows).map(_ => 
Random.nextLong).selectExpr(selectExpr: _*)
      .withColumn("value", valueCol)
      .sort("value")

    saveAsTable(df, dir)
  }

  private def prepareStringDictTable(
      dir: File, numRows: Int, numDistinctValues: Int, width: Int): Unit = {
    val selectExpr = (0 to width).map {
      case 0 => s"CAST(id % $numDistinctValues AS STRING) AS value"
      case i => s"CAST(rand() AS STRING) c$i"
    }
    val df = spark.range(numRows).selectExpr(selectExpr: _*).sort("value")

    saveAsTable(df, dir, true)
  }

  private def saveAsTable(df: DataFrame, dir: File, useDictionary: Boolean = 
false): Unit = {
    val parquetPath = dir.getCanonicalPath + "/parquet"
    df.write.mode("overwrite").parquet(parquetPath)
    spark.read.parquet(parquetPath).createOrReplaceTempView("parquetTable")
  }

  def filterPushDownBenchmark(
      values: Int,
      title: String,
      whereExpr: String,
      selectExpr: String = "*"): Unit = {
    val benchmark = new Benchmark(title, values, minNumIters = 5, output = 
output)

    Seq(false, true).foreach { columnIndexEnabled =>
      val name = s"Parquet Vectorized ${if (columnIndexEnabled) 
s"(columnIndex)" else ""}"
      benchmark.addCase(name) { _ =>
        withSQLConf("parquet.filter.columnindex.enabled" -> 
s"$columnIndexEnabled") {
          spark.sql(s"SELECT $selectExpr FROM parquetTable WHERE 
$whereExpr").noop()
        }
      }
    }

    benchmark.run()
  }

  private def runIntBenchmark(numRows: Int, width: Int, mid: Int): Unit = {
    Seq("value IS NULL", s"$mid < value AND value < $mid").foreach { whereExpr 
=>
      val title = s"Select 0 int row ($whereExpr)".replace("value AND value", 
"value")
      filterPushDownBenchmark(numRows, title, whereExpr)
    }

    Seq(
      s"value = $mid",
      s"value <=> $mid",
      s"$mid <= value AND value <= $mid",
      s"${mid - 1} < value AND value < ${mid + 1}"
    ).foreach { whereExpr =>
      val title = s"Select 1 int row ($whereExpr)".replace("value AND value", 
"value")
      filterPushDownBenchmark(numRows, title, whereExpr)
    }

    val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", 
MAX(value)")

    Seq(10, 50, 90).foreach { percent =>
      filterPushDownBenchmark(
        numRows,
        s"Select $percent% int rows (value < ${numRows * percent / 100})",
        s"value < ${numRows * percent / 100}",
        selectExpr
      )
    }

    Seq("value IS NOT NULL", "value > -1", "value != -1").foreach { whereExpr =>
      filterPushDownBenchmark(
        numRows,
        s"Select all int rows ($whereExpr)",
        whereExpr,
        selectExpr)
    }
  }

  private def runStringBenchmark(
      numRows: Int, width: Int, searchValue: Int, colType: String): Unit = {
    Seq("value IS NULL", s"'$searchValue' < value AND value < '$searchValue'")
        .foreach { whereExpr =>
      val title = s"Select 0 $colType row ($whereExpr)".replace("value AND 
value", "value")
      filterPushDownBenchmark(numRows, title, whereExpr)
    }

    Seq(
      s"value = '$searchValue'",
      s"value <=> '$searchValue'",
      s"'$searchValue' <= value AND value <= '$searchValue'"
    ).foreach { whereExpr =>
      val title = s"Select 1 $colType row ($whereExpr)".replace("value AND 
value", "value")
      filterPushDownBenchmark(numRows, title, whereExpr)
    }

    val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", 
MAX(value)")

    Seq("value IS NOT NULL").foreach { whereExpr =>
      filterPushDownBenchmark(
        numRows,
        s"Select all $colType rows ($whereExpr)",
        whereExpr,
        selectExpr)
    }
  }

  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
    runBenchmark("Pushdown for many distinct value case") {
      withTempPath { dir =>
        withTempTable("parquetTable") {
          Seq(true, false).foreach { useStringForValue =>
            prepareTable(dir, numRows, width, useStringForValue)
            if (useStringForValue) {
              runStringBenchmark(numRows, width, mid, "string")
            } else {
              runIntBenchmark(numRows, width, mid)
            }
          }
        }
      }
    }

    runBenchmark("Pushdown for few distinct value case (use dictionary 
encoding)") {
      withTempPath { dir =>
        val numDistinctValues = 200

        withTempTable("parquetTable") {
          prepareStringDictTable(dir, numRows, numDistinctValues, width)
          runStringBenchmark(numRows, width, numDistinctValues / 2, "distinct 
string")
        }
      }
    }

    runBenchmark("Pushdown benchmark for StringStartsWith") {
      withTempPath { dir =>
        withTempTable("parquetTable") {
          prepareTable(dir, numRows, width, true)
          Seq(
            "value like '10%'",
            "value like '1000%'",
            s"value like '${mid.toString.substring(0, mid.toString.length - 
1)}%'"
          ).foreach { whereExpr =>
            val title = s"StringStartsWith filter: ($whereExpr)"
            filterPushDownBenchmark(numRows, title, whereExpr)
          }
        }
      }
    }

    runBenchmark(s"Pushdown benchmark for ${DecimalType.simpleString}") {
      withTempPath { dir =>
        Seq(
          s"decimal(${Decimal.MAX_INT_DIGITS}, 2)",
          s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)",
          s"decimal(${DecimalType.MAX_PRECISION}, 2)"
        ).foreach { dt =>
          val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
          val valueCol = if 
(dt.equalsIgnoreCase(s"decimal(${Decimal.MAX_INT_DIGITS}, 2)")) {
            monotonically_increasing_id() % 9999999
          } else {
            monotonically_increasing_id()
          }
          val df = spark.range(numRows)
            .selectExpr(columns: _*).withColumn("value", valueCol.cast(dt))
          withTempTable("parquetTable") {
            saveAsTable(df, dir)

            Seq(s"value = $mid").foreach { whereExpr =>
              val title = s"Select 1 $dt row ($whereExpr)".replace("value AND 
value", "value")
              filterPushDownBenchmark(numRows, title, whereExpr)
            }

            val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", 
",", ", MAX(value)")
            Seq(10, 50, 90).foreach { percent =>
              filterPushDownBenchmark(
                numRows,
                s"Select $percent% $dt rows (value < ${numRows * percent / 
100})",
                s"value < ${numRows * percent / 100}",
                selectExpr
              )
            }
          }
        }
      }
    }

    runBenchmark("Pushdown benchmark for InSet -> InFilters") {
      withTempPath { dir =>
        withTempTable("parquetTable") {
          prepareTable(dir, numRows, width, false)
          Seq(5, 10, 50, 100).foreach { count =>
            Seq(10, 50, 90).foreach { distribution =>
              val filter =
                Range(0, count).map(r => scala.util.Random.nextInt(numRows * 
distribution / 100))
              val whereExpr = s"value in(${filter.mkString(",")})"
              val title = s"InSet -> InFilters (values count: $count, 
distribution: $distribution)"
              filterPushDownBenchmark(numRows, title, whereExpr)
            }
          }
        }
      }
    }

    runBenchmark(s"Pushdown benchmark for ${ByteType.simpleString}") {
      withTempPath { dir =>
        val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
        val df = spark.range(numRows).selectExpr(columns: _*)
          .withColumn("value", (monotonically_increasing_id() % 
Byte.MaxValue).cast(ByteType))
          .orderBy("value")
        withTempTable("parquetTable") {
          saveAsTable(df, dir)

          Seq(s"value = CAST(${Byte.MaxValue / 2} AS ${ByteType.simpleString})")
            .foreach { whereExpr =>
              val title = s"Select 1 ${ByteType.simpleString} row ($whereExpr)"
                .replace("value AND value", "value")
              filterPushDownBenchmark(numRows, title, whereExpr)
            }

          val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", 
", MAX(value)")
          Seq(10, 50, 90).foreach { percent =>
            filterPushDownBenchmark(
              numRows,
              s"Select $percent% ${ByteType.simpleString} rows " +
                s"(value < CAST(${Byte.MaxValue * percent / 100} AS 
${ByteType.simpleString}))",
              s"value < CAST(${Byte.MaxValue * percent / 100} AS 
${ByteType.simpleString})",
              selectExpr
            )
          }
        }
      }
    }

    runBenchmark(s"Pushdown benchmark for Timestamp") {
      withTempPath { dir =>
        withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key -> 
true.toString) {
          ParquetOutputTimestampType.values.toSeq.map(_.toString).foreach { 
fileType =>
            withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> fileType) {
              val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
              val df = spark.range(numRows).selectExpr(columns: _*)
                .withColumn("value", 
timestamp_seconds(monotonically_increasing_id()))
              withTempTable("parquetTable") {
                saveAsTable(df, dir)

                Seq(s"value = timestamp_seconds($mid)").foreach { whereExpr =>
                  val title = s"Select 1 timestamp stored as $fileType row 
($whereExpr)"
                    .replace("value AND value", "value")
                  filterPushDownBenchmark(numRows, title, whereExpr)
                }

                val selectExpr = (1 to width)
                  .map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
                Seq(10, 50, 90).foreach { percent =>
                  filterPushDownBenchmark(
                    numRows,
                    s"Select $percent% timestamp stored as $fileType rows " +
                      s"(value < timestamp_seconds(${numRows * percent / 
100}))",
                    s"value < timestamp_seconds(${numRows * percent / 100})",
                    selectExpr
                  )
                }
              }
            }
          }
        }
      }
    }

    runBenchmark(s"Pushdown benchmark with many filters") {
      val numRows = 1
      val width = 500

      withTempPath { dir =>
        val columns = (1 to width).map(i => s"id c$i")
        val df = spark.range(1).selectExpr(columns: _*)
        withTempTable("parquetTable") {
          saveAsTable(df, dir)
          Seq(1, 250, 500).foreach { numFilter =>
            val whereExpr = (1 to numFilter).map(i => s"c$i = 0").mkString(" 
and ")
            // Note: InferFiltersFromConstraints will add more filters to this 
given filters
            filterPushDownBenchmark(numRows, s"Select 1 row with $numFilter 
filters", whereExpr)
          }
        }
      }
    }
  }
}

{code}


{noformat}
================================================================================================
Pushdown for many distinct value case
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                   99            111          
10        158.9           6.3       1.0X
Parquet Vectorized (columnindex)                     78             86          
 9        201.6           5.0       1.3X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                             835            
848          17         18.8          53.1       1.0X
Parquet Vectorized (columnindex)                                91             
96           4        173.5           5.8       9.2X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                  839            846          
 6         18.7          53.3       1.0X
Parquet Vectorized (columnindex)                     85             93          
10        184.8           5.4       9.9X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                   838            852         
 16         18.8          53.3       1.0X
Parquet Vectorized (columnindex)                      79             85         
  4        197.9           5.1      10.5X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                               872            
907          45         18.0          55.4       1.0X
Parquet Vectorized (columnindex)                                  83            
 89           5        188.9           5.3      10.5X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                  16487          16727        
 253          1.0        1048.2       1.0X
Parquet Vectorized (columnindex)                    16355          16426        
  62          1.0        1039.8       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                   64             68          
 5        246.7           4.1       1.0X
Parquet Vectorized (columnindex)                     61             66          
 4        258.0           3.9       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                      770            795      
    34         20.4          48.9       1.0X
Parquet Vectorized (columnindex)                         78             84      
     5        201.1           5.0       9.8X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                  774            795          
21         20.3          49.2       1.0X
Parquet Vectorized (columnindex)                     77             82          
 6        205.3           4.9      10.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                  771            777          
 7         20.4          49.0       1.0X
Parquet Vectorized (columnindex)                     69             76          
 5        226.8           4.4      11.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                        769            794    
      29         20.4          48.9       1.0X
Parquet Vectorized (columnindex)                           74             82    
       6        213.3           4.7      10.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                      775            825      
    43         20.3          49.3       1.0X
Parquet Vectorized (columnindex)                         76             81      
     5        206.3           4.8      10.2X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                 2326           2395          
51          6.8         147.9       1.0X
Parquet Vectorized (columnindex)                   1655           1669          
13          9.5         105.2       1.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                 7898           8175         
157          2.0         502.1       1.0X
Parquet Vectorized (columnindex)                   7658           7731          
73          2.1         486.9       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                13778          13858          
78          1.1         876.0       1.0X
Parquet Vectorized (columnindex)                  13771          13885         
105          1.1         875.5       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                15072          15281         
163          1.0         958.3       1.0X
Parquet Vectorized (columnindex)                  15119          15344         
194          1.0         961.3       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                15257          15378         
198          1.0         970.0       1.0X
Parquet Vectorized (columnindex)                  15296          15519         
232          1.0         972.5       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                15262          15325          
92          1.0         970.4       1.0X
Parquet Vectorized (columnindex)                  15173          15255          
84          1.0         964.7       1.0X


================================================================================================
Pushdown for few distinct value case (use dictionary encoding)
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                       53             59      
     5        298.8           3.3       1.0X
Parquet Vectorized (columnindex)                         52             57      
     6        300.2           3.3       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                              890            
902           7         17.7          56.6       1.0X
Parquet Vectorized (columnindex)                                 59             
62           4        266.2           3.8      15.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                      894            905      
    10         17.6          56.9       1.0X
Parquet Vectorized (columnindex)                        125            130      
     6        126.2           7.9       7.2X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                        901            920    
      26         17.5          57.3       1.0X
Parquet Vectorized (columnindex)                          119            127    
       4        132.1           7.6       7.6X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                902           
 914          10         17.4          57.3       1.0X
Parquet Vectorized (columnindex)                                  126           
 132           7        124.8           8.0       7.2X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                           16872          
16947          97          0.9        1072.7       1.0X
Parquet Vectorized (columnindex)                             16861          
16970          80          0.9        1072.0       1.0X


================================================================================================
Pushdown benchmark for StringStartsWith
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                    1025           1038       
   17         15.4          65.1       1.0X
Parquet Vectorized (columnindex)                       852            868       
   16         18.5          54.2       1.2X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                       816            838     
     25         19.3          51.9       1.0X
Parquet Vectorized (columnindex)                          74             79     
      5        213.7           4.7      11.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                         817            836   
       11         19.2          52.0       1.0X
Parquet Vectorized (columnindex)                            76             82   
        4        207.6           4.8      10.8X


================================================================================================
Pushdown benchmark for decimal
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                     1125           1138      
    13         14.0          71.6       1.0X
Parquet Vectorized (columnindex)                         50             55      
     5        313.1           3.2      22.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                        4800           4930   
      131          3.3         305.2       1.0X
Parquet Vectorized (columnindex)                          2227           2274   
       40          7.1         141.6       2.2X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                       10016          10204   
      202          1.6         636.8       1.0X
Parquet Vectorized (columnindex)                          9571           9677   
       63          1.6         608.5       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                        11161          11403  
       187          1.4         709.6       1.0X
Parquet Vectorized (columnindex)                          11103          11283  
       130          1.4         705.9       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                      1150           1168     
     21         13.7          73.1       1.0X
Parquet Vectorized (columnindex)                          45             48     
      5        350.0           2.9      25.6X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                         1946           1978  
        34          8.1         123.7       1.0X
Parquet Vectorized (columnindex)                           1155           1189  
        28         13.6          73.4       1.7X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                         6206           6413  
       211          2.5         394.6       1.0X
Parquet Vectorized (columnindex)                           5659           5786  
        96          2.8         359.8       1.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                         10375          10534 
        240          1.5         659.6       1.0X
Parquet Vectorized (columnindex)                           10120          10334 
        221          1.6         643.4       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-----------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                      1694           1748     
     49          9.3         107.7       1.0X
Parquet Vectorized (columnindex)                          46             50     
      4        338.6           3.0      36.5X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                         2712           2782  
        48          5.8         172.5       1.0X
Parquet Vectorized (columnindex)                           1584           1611  
        32          9.9         100.7       1.7X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms) 
  Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                         8356           8499  
        82          1.9         531.3       1.0X
Parquet Vectorized (columnindex)                           7781           7979  
       123          2.0         494.7       1.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                         14128          14304 
        182          1.1         898.2       1.0X
Parquet Vectorized (columnindex)                           13940          14004 
         44          1.1         886.3       1.0X


================================================================================================
Pushdown benchmark for InSet -> InFilters
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                773           
 784          11         20.4          49.1       1.0X
Parquet Vectorized (columnindex)                                  113           
 118           5        139.4           7.2       6.8X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                               2910           
2930          13          5.4         185.0       1.0X
Parquet Vectorized (columnindex)                                  116           
 120           4        136.1           7.3      25.2X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                               2207           
2218           8          7.1         140.3       1.0X
Parquet Vectorized (columnindex)                                  117           
 123           8        134.3           7.4      18.8X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                1493          
 1506          11         10.5          94.9       1.0X
Parquet Vectorized (columnindex)                                   159          
  164           6         99.2          10.1       9.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                3591          
 3635          32          4.4         228.3       1.0X
Parquet Vectorized (columnindex)                                   170          
  175           5         92.6          10.8      21.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                5079          
 5147          50          3.1         322.9       1.0X
Parquet Vectorized (columnindex)                                   172          
  180           5         91.2          11.0      29.5X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                8280          
 8533         245          1.9         526.4       1.0X
Parquet Vectorized (columnindex)                                  8341          
 8423          93          1.9         530.3       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                8248          
 8347          72          1.9         524.4       1.0X
Parquet Vectorized (columnindex)                                  8230          
 8303          66          1.9         523.2       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                8219          
 8285          44          1.9         522.6       1.0X
Parquet Vectorized (columnindex)                                  8183          
 8381         184          1.9         520.3       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                 8354         
  8411          60          1.9         531.1       1.0X
Parquet Vectorized (columnindex)                                   8181         
  8256          60          1.9         520.1       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                 8151         
  8210          38          1.9         518.3       1.0X
Parquet Vectorized (columnindex)                                   8169         
  8210          37          1.9         519.4       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                 8131         
  8204          46          1.9         516.9       1.0X
Parquet Vectorized (columnindex)                                   8167         
  8231          65          1.9         519.3       1.0X


================================================================================================
Pushdown benchmark for tinyint
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                           1124           
1198          68         14.0          71.4       1.0X
Parquet Vectorized (columnindex)                               91             
93           2        173.7           5.8      12.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                              1845           
1892          37          8.5         117.3       1.0X
Parquet Vectorized (columnindex)                                1113           
1123          11         14.1          70.8       1.7X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                              5850           
5911          73          2.7         371.9       1.0X
Parquet Vectorized (columnindex)                                5450           
5567          91          2.9         346.5       1.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                              10395          
10458          62          1.5         660.9       1.0X
Parquet Vectorized (columnindex)                                 9928          
10104         176          1.6         631.2       1.0X


================================================================================================
Pushdown benchmark for Timestamp
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  
Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-----------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
      3929           4094         107          4.0         249.8       1.0X
Parquet Vectorized (columnindex)                                                
      3991           4068          74          3.9         253.7       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)): 
 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
         4774           5003         163          3.3         303.5       1.0X
Parquet Vectorized (columnindex)                                                
         4769           4880         159          3.3         303.2       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)): 
 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
         7736           7884         142          2.0         491.9       1.0X
Parquet Vectorized (columnindex)                                                
         7587           7795         156          2.1         482.4       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as INT96 rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
         10722          10785          61          1.5         681.7       1.0X
Parquet Vectorized (columnindex)                                                
         10719          10775          55          1.5         681.5       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as TIMESTAMP_MICROS row (value = 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
                 1127           1172          47         14.0          71.6     
  1.0X
Parquet Vectorized (columnindex)                                                
                   44             46           3        360.8           2.8     
 25.9X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
                    1861           1916          90          8.5         118.3  
     1.0X
Parquet Vectorized (columnindex)                                                
                    1127           1160          22         14.0          71.7  
     1.7X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
                    5809           5934         214          2.7         369.3  
     1.0X
Parquet Vectorized (columnindex)                                                
                    5455           5523          93          2.9         346.8  
     1.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
                     9777          10097         244          1.6         621.6 
      1.0X
Parquet Vectorized (columnindex)                                                
                     9808           9849          44          1.6         623.6 
      1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
                 1125           1163          24         14.0          71.5     
  1.0X
Parquet Vectorized (columnindex)                                                
                   43             47           5        369.3           2.7     
 26.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
                    1905           1977          80          8.3         121.1  
     1.0X
Parquet Vectorized (columnindex)                                                
                    1137           1186          40         13.8          72.3  
     1.7X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
                    6018           6266         230          2.6         382.6  
     1.0X
Parquet Vectorized (columnindex)                                                
                    5631           5703          69          2.8         358.0  
     1.1X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < 
timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                                              
                    10132          10224         113          1.6         644.2 
      1.0X
Parquet Vectorized (columnindex)                                                
                     9898           9992          69          1.6         629.3 
      1.0X


================================================================================================
Pushdown benchmark with many filters
================================================================================================

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                  182            190          
 7          0.0   182396465.0       1.0X
Parquet Vectorized (columnindex)                    187            192          
 5          0.0   187246572.0       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                 2228           2257          
19          0.0  2228318860.0       1.0X
Parquet Vectorized (columnindex)                   2212           2244          
24          0.0  2212486315.0       1.0X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_221-b11 on Linux 
3.10.0-957.10.1.el7.x86_64
Intel Core Processor (Broadwell, IBRS)
Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Parquet Vectorized                                12391          12473          
86          0.0 12391350628.0       1.0X
Parquet Vectorized (columnindex)                  12438          12594         
229          0.0 12438065459.0       1.0X


{noformat}



> Parquet support Column indexes
> ------------------------------
>
>                 Key: SPARK-26345
>                 URL: https://issues.apache.org/jira/browse/SPARK-26345
>             Project: Spark
>          Issue Type: Umbrella
>          Components: SQL
>    Affects Versions: 3.1.0
>            Reporter: Yuming Wang
>            Priority: Major
>
> Parquet 1.11.0 supports column indexing. Spark can supports this feature for 
> good read performance.
> More details:
> https://issues.apache.org/jira/browse/PARQUET-1201



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to