Github user dongjoon-hyun commented on a diff in the pull request:
https://github.com/apache/spark/pull/20265#discussion_r161620557
--- Diff:
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala ---
@@ -483,6 +484,64 @@ object OrcReadBenchmark {
}
}
+ def filterPushDownBenchmark(values: Int, width: Int): Unit = {
+ val benchmark = new Benchmark(s"Filter Pushdown", values)
+
+ withTempPath { dir =>
+ withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
+ import spark.implicits._
+ val selectExpr = (1 to width).map(i => s"CAST(value AS STRING)
c$i")
+ val whereExpr = (1 to width).map(i => s"NOT c$i LIKE
'%not%exist%'").mkString(" AND ")
+ val df = spark.range(values).map(_ =>
Random.nextLong).selectExpr(selectExpr: _*)
+ .withColumn("uniqueID", monotonically_increasing_id())
+
+ df.createOrReplaceTempView("t1")
+ prepareTable(dir, spark.sql("SELECT * FROM t1"))
+
+ Seq(false, true).foreach { value =>
+ benchmark.addCase(s"Native ORC MR (Pushdown=$value)") { _ =>
+ withSQLConf(
+ SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false",
+ SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> s"$value") {
+ spark.sql(
+ s"""
+ |SELECT c1
+ |FROM nativeOrcTable
+ |WHERE uniqueID = 0 AND $whereExpr
+ """.stripMargin).collect()
+ }
+ }
+ }
+
+ Seq(false, true).foreach { value =>
+ benchmark.addCase(s"Native ORC Vectorized (Pushdown=$value)") {
_ =>
+ withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key ->
s"$value") {
+ spark.sql(
+ s"""
+ |SELECT c1
+ |FROM nativeOrcTable
+ |WHERE uniqueID = 0 AND $whereExpr
+ """.stripMargin).collect()
+ }
+ }
+ }
+
+ /*
+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.2
+ Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
+
+ Filter Pushdown: Best/Avg Time(ms)
Rate(M/s) Per Row(ns) Relative
+
------------------------------------------------------------------------------------------------
+ Native ORC MR (Pushdown=false) 16169 / 16193
0.3 3084.0 1.0X
--- End diff --
Yep. I see. Focusing on PPD on the best reader.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]