This is an automated email from the ASF dual-hosted git repository.
loneylee pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 49cbb3cf5e [CH] Support deletion vector optimize for mergetree(add ut)
(#9762)
49cbb3cf5e is described below
commit 49cbb3cf5e343c57120fdae316ec1fc9e8a958e0
Author: Shuai li <[email protected]>
AuthorDate: Wed May 28 09:50:55 2025 +0800
[CH] Support deletion vector optimize for mergetree(add ut) (#9762)
* [CH] Support deletion vector optimize for mergetree(add ut)
---
.../GlutenDeltaMergeTreeDeletionVectorSuite.scala | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git
a/backends-clickhouse/src-delta-33/test/scala/org/apache/spark/gluten/delta/GlutenDeltaMergeTreeDeletionVectorSuite.scala
b/backends-clickhouse/src-delta-33/test/scala/org/apache/spark/gluten/delta/GlutenDeltaMergeTreeDeletionVectorSuite.scala
index 5922a9bf49..d084c3c1d2 100644
---
a/backends-clickhouse/src-delta-33/test/scala/org/apache/spark/gluten/delta/GlutenDeltaMergeTreeDeletionVectorSuite.scala
+++
b/backends-clickhouse/src-delta-33/test/scala/org/apache/spark/gluten/delta/GlutenDeltaMergeTreeDeletionVectorSuite.scala
@@ -21,7 +21,9 @@ import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.execution.{CreateMergeTreeSuite,
FileSourceScanExecTransformer}
import org.apache.spark.SparkConf
+import org.apache.spark.sql.delta.files.TahoeFileIndex
import org.apache.spark.sql.delta.stats.PreparedDeltaFileIndex
+import
org.apache.spark.sql.execution.datasources.v2.clickhouse.metadata.AddMergeTreeParts
// Some sqls' line length exceeds 100
// scalastyle:off line.size.limit
@@ -110,6 +112,25 @@ class GlutenDeltaMergeTreeDeletionVectorSuite extends
CreateMergeTreeSuite {
df1.collect().apply(0).get(0) === 1200650
)
checkFallbackOperators(df1, 0)
+
+ spark.sql(s"optimize $tableName")
+
+ val df2 = spark.sql(s"""
+ | select sum(l_linenumber) from $tableName
+ |""".stripMargin)
+ val result2 = df2.collect()
+ assert(result2.apply(0).get(0) === 1200650)
+
+ val scanExec = collect(df2.queryExecution.executedPlan) {
+ case f: FileSourceScanExecTransformer => f
+ }
+ assertResult(1)(scanExec.size)
+ val mergetreeScan = scanExec.head
+ assert(mergetreeScan.nodeName.startsWith("ScanTransformer
mergetree"))
+ val fileIndex =
mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex]
+ val addFiles =
+ fileIndex.matchingFiles(Nil, Nil).map(f =>
f.asInstanceOf[AddMergeTreeParts])
+ assertResult(1)(addFiles.size)
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]