sunchao commented on a change in pull request #33695:
URL: https://github.com/apache/spark/pull/33695#discussion_r699719818
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
##########
@@ -540,6 +548,106 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
}
}
+ /**
+ * Similar to [[numericScanBenchmark]] but accessed column is a struct field.
+ */
+ def nestedNumericScanBenchmark(values: Int, dataType: DataType): Unit = {
+ val sqlBenchmark = new Benchmark(
+ s"SQL Single ${dataType.sql} Column Scan in Struct",
+ values,
+ output = output)
+
+ withTempPath { dir =>
+ withTempTable("t1", "parquetTable", "orcTable") {
+ import spark.implicits._
+ spark.range(values).map(_ =>
Random.nextLong).createOrReplaceTempView("t1")
+
+ prepareTable(dir,
+ spark.sql(s"SELECT named_struct('f', CAST(value as ${dataType.sql}))
as col FROM t1"),
+ isComplexType = true)
+
+ sqlBenchmark.addCase("SQL Parquet MR") { _ =>
+ withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key ->
"false") {
+ spark.sql("select sum(col.f) from parquetTable").noop()
+ }
+ }
+
+ sqlBenchmark.addCase("SQL Parquet Vectorized (Disabled Nested
Column)") { _ =>
+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key ->
"false") {
+ spark.sql("select sum(col.f) from parquetTable").noop()
+ }
+ }
+
+ sqlBenchmark.addCase("SQL Parquet Vectorized (Enabled Nested Column)")
{ _ =>
+
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key ->
"true") {
+ spark.sql("select sum(col.f) from parquetTable").noop()
+ }
+ }
+
+ sqlBenchmark.run()
+ }
+ }
+ }
+
+ def nestedColumnScanBenchmark(values: Int): Unit = {
Review comment:
This is borrowed from #31958 by @c21
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]