[GitHub] [spark] MaxGekk commented on a change in pull request #27366: [SPARK-30648][SQL] Support filters pushdown in JSON datasource

GitBox Mon, 22 Jun 2020 23:33:03 -0700


MaxGekk commented on a change in pull request #27366:
URL: https://github.com/apache/spark/pull/27366#discussion_r443991161




##########
File path: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
##########
@@ -495,6 +496,45 @@ object JsonBenchmark extends SqlBasedBenchmark {
     }
   }
 
+  private def filtersPushdownBenchmark(rowsNum: Int, numIters: Int): Unit = {
+    val benchmark = new Benchmark(s"Filters pushdown", rowsNum, output = 
output)
+    val colsNum = 100
+    val fields = Seq.tabulate(colsNum)(i => StructField(s"col$i", 
TimestampType))
+    val schema = StructType(StructField("key", IntegerType) +: fields)
+    def columns(): Seq[Column] = {
+      val ts = Seq.tabulate(colsNum) { i =>
+        lit(Instant.ofEpochSecond(i * 12345678)).as(s"col$i")
+      }
+      ($"id" % 1000).as("key") +: ts
+    }
+    withTempPath { path =>
+      spark.range(rowsNum).select(columns(): 
_*).write.json(path.getAbsolutePath)
+      def readback = {
+        spark.read.schema(schema).json(path.getAbsolutePath)
+      }
+
+      benchmark.addCase(s"w/o filters", numIters) { _ =>
+        readback.noop()
+      }
+
+      def withFilter(configEnabled: Boolean): Unit = {
+        withSQLConf(SQLConf.JSON_FILTER_PUSHDOWN_ENABLED.key -> 
configEnabled.toString()) {
+          readback.filter($"key" === 0).noop()
+        }
+      }
+
+      benchmark.addCase(s"pushdown disabled", numIters) { _ =>

Review comment:
       I will remove it here and in other places. Thanks




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] MaxGekk commented on a change in pull request #27366: [SPARK-30648][SQL] Support filters pushdown in JSON datasource

Reply via email to