gaoyangxiaozhu commented on code in PR #6318:
URL: https://github.com/apache/incubator-gluten/pull/6318#discussion_r1672040921
##########
gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala:
##########
@@ -18,38 +18,31 @@ package org.apache.spark.sql
import org.apache.spark.sql.execution.ProjectExec
import org.apache.spark.sql.functions.{expr, input_file_name}
-import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType,
StructField, StructType}
class GlutenColumnExpressionSuite extends ColumnExpressionSuite with
GlutenSQLTestsTrait {
- testGluten("input_file_name with scan is fallback") {
- withTempPath {
- dir =>
- val rawData = Seq(
- Row(1, "Alice", Seq(Row(Seq(1, 2, 3)))),
- Row(2, "Bob", Seq(Row(Seq(4, 5)))),
- Row(3, "Charlie", Seq(Row(Seq(6, 7, 8, 9))))
- )
- val schema = StructType(
- Array(
- StructField("id", IntegerType, nullable = false),
- StructField("name", StringType, nullable = false),
- StructField(
- "nested_column",
- ArrayType(
- StructType(Array(
- StructField("array_in_struct", ArrayType(IntegerType),
nullable = true)
- ))),
- nullable = true)
- ))
- val data: DataFrame =
spark.createDataFrame(sparkContext.parallelize(rawData), schema)
- data.write.parquet(dir.getCanonicalPath)
+ import testImplicits._
+ testGluten(
+ "input_file_name, input_file_block_start, input_file_block_length with
scan is fallback") {
+ withSQLConf(("spark.gluten.sql.columnar.filescan", "false")) {
+ withTempPath {
+ dir =>
+ val data = sparkContext.parallelize(0 to 10).toDF("id")
+ data.write.parquet(dir.getCanonicalPath)
- val q =
- spark.read.parquet(dir.getCanonicalPath).select(input_file_name(),
expr("nested_column"))
- val firstRow = q.head()
- assert(firstRow.getString(0).contains(dir.toURI.getPath))
- val project = q.queryExecution.executedPlan.collect { case p:
ProjectExec => p }
- assert(project.size == 1)
+ val q =
+ spark.read
+ .parquet(dir.getCanonicalPath)
+ .select(
+ input_file_name(),
+ expr("input_file_block_start()"),
Review Comment:
it should be fine, don't see any pros here. spark ut also use same way -
https://github.com/apache/spark/blob/master/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala#L805
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]