This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new b08673475 [VL] Verify empty2null is offloaded when v1writer fallback
(#6859)
b08673475 is described below
commit b08673475c901caf78b938bad10a14a920c7ec56
Author: Yang Zhang <[email protected]>
AuthorDate: Thu Aug 15 14:36:27 2024 +0800
[VL] Verify empty2null is offloaded when v1writer fallback (#6859)
---
.../spark/sql/sources/GlutenInsertSuite.scala | 32 +++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
index 5c60115c5..ca0ada39c 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/sources/GlutenInsertSuite.scala
@@ -16,7 +16,7 @@
*/
package org.apache.spark.sql.sources
-import org.apache.gluten.execution.SortExecTransformer
+import org.apache.gluten.execution.{ProjectExecTransformer,
SortExecTransformer}
import org.apache.gluten.extension.GlutenPlan
import org.apache.spark.SparkConf
@@ -147,6 +147,36 @@ class GlutenInsertSuite
assert(parts == expectedPartitionNames)
}
+ testGluten("offload empty2null when v1writes fallback") {
+ withSQLConf((SQLConf.MAX_RECORDS_PER_FILE.key, "1000")) {
+ withTable("pt") {
+ spark.sql("CREATE TABLE pt (c1 int) USING PARQUET PARTITIONED BY(p
string)")
+
+ val df = spark.sql(s"""
+ |INSERT OVERWRITE TABLE pt PARTITION(p)
+ |SELECT c1, c2 as p FROM source
+ |""".stripMargin)
+
+ val writeFiles = stripAQEPlan(
+ df.queryExecution.executedPlan
+ .asInstanceOf[CommandResultExec]
+ .commandPhysicalPlan).children.head
+ assert(!writeFiles.isInstanceOf[ColumnarWriteFilesExec])
+ assert(writeFiles.exists(_.isInstanceOf[ProjectExecTransformer]))
+ val projectExecTransformer = writeFiles
+ .find(_.isInstanceOf[ProjectExecTransformer])
+ .get
+ .asInstanceOf[ProjectExecTransformer]
+
projectExecTransformer.projectList.find(_.toString().contains("empty2null"))
+
+ // The partition column should never be empty
+ checkAnswer(
+ spark.sql("SELECT * FROM pt"),
+ spark.sql("SELECT c1, if(c2 = '', null, c2) FROM source"))
+ }
+ }
+ }
+
testGluten("remove v1writes sort and project") {
// Only string type has empty2null expression
withTable("pt") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]