This is an automated email from the ASF dual-hosted git repository.
zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f0e2885af [GLUTEN-5062][CH] Add a UT to ensure that IN filtering can
apply on CH primary key (#5072)
f0e2885af is described below
commit f0e2885af60b3eaf2ad7554a95e4bd730a4f03c8
Author: Zhichao Zhang <[email protected]>
AuthorDate: Thu Mar 21 19:27:31 2024 +0800
[GLUTEN-5062][CH] Add a UT to ensure that IN filtering can apply on CH
primary key (#5072)
Add a UT to ensure that IN filtering can apply on CH primary key
Close #5062.
---
.../GlutenClickHouseMergeTreeWriteSuite.scala | 91 +++++++++++++++++++++-
1 file changed, 90 insertions(+), 1 deletion(-)
diff --git
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
index 457c88e34..6750e251c 100644
---
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
+++
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
@@ -17,7 +17,7 @@
package io.glutenproject.execution
import org.apache.spark.{SPARK_VERSION_SHORT, SparkConf}
-import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.{DataFrame, SaveMode}
import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
import org.apache.spark.sql.delta.files.TahoeFileIndex
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
@@ -1452,5 +1452,94 @@ class GlutenClickHouseMergeTreeWriteSuite
assert(addFiles(0).rows == 10)
})
}
+
+ test("GLUTEN-5062: Add a UT to ensure that IN filtering can apply on CH
primary key") {
+ spark.sql(s"""
+ |DROP TABLE IF EXISTS lineitem_mergetree_5062;
+ |""".stripMargin)
+
+ spark.sql(s"""
+ |CREATE TABLE IF NOT EXISTS lineitem_mergetree_5062
+ |(
+ | l_orderkey bigint,
+ | l_partkey bigint,
+ | l_suppkey bigint,
+ | l_linenumber bigint,
+ | l_quantity double,
+ | l_extendedprice double,
+ | l_discount double,
+ | l_tax double,
+ | l_returnflag string,
+ | l_linestatus string,
+ | l_shipdate date,
+ | l_commitdate date,
+ | l_receiptdate date,
+ | l_shipinstruct string,
+ | l_shipmode string,
+ | l_comment string
+ |)
+ |USING clickhouse
+ |TBLPROPERTIES (orderByKey='l_returnflag,l_shipdate',
+ | primaryKey='l_returnflag,l_shipdate')
+ |LOCATION '$basePath/lineitem_mergetree_5062'
+ |""".stripMargin)
+
+ spark.sql(s"""
+ | insert into table lineitem_mergetree_5062
+ | select * from lineitem
+ |""".stripMargin)
+
+ def checkSelectedMarksCnt(df: DataFrame, exceptedCnt: Long): Unit = {
+ val scanExec = collect(df.queryExecution.executedPlan) {
+ case f: FileSourceScanExecTransformer => f
+ }
+ assert(scanExec.size == 1)
+
+ val mergetreeScan = scanExec(0)
+ assert(mergetreeScan.nodeName.startsWith("Scan mergetree"))
+
+ val fileIndex =
mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex]
+ val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f =>
f.asInstanceOf[AddMergeTreeParts])
+ assert(
+ (addFiles.map(_.marks).sum - addFiles.size) ==
mergetreeScan.metrics("totalMarksPk").value)
+ assert(mergetreeScan.metrics("selectedMarksPk").value == exceptedCnt)
+ }
+
+ val sqlStr1 =
+ s"""
+ |SELECT
+ | sum(l_extendedprice)
+ |FROM
+ | lineitem_mergetree_5062
+ |WHERE
+ | l_shipdate in (date'1998-08-15', date'1993-12-05',
date'1993-03-01')
+ |""".stripMargin
+ runSql(sqlStr1)(
+ df => {
+ val result = df.collect()
+ assert(result.size == 1)
+ assert(result(0).getDouble(0).toString.substring(0,
6).equals("2.6480"))
+
+ checkSelectedMarksCnt(df, 34)
+ })
+
+ val sqlStr2 =
+ s"""
+ |SELECT
+ | sum(l_extendedprice)
+ |FROM
+ | lineitem_mergetree_5062
+ |WHERE
+ | l_returnflag not in ('N', 'A')
+ |""".stripMargin
+ runSql(sqlStr2)(
+ df => {
+ val result = df.collect()
+ assert(result.size == 1)
+ assert(result(0).getDouble(0).toString.substring(0,
6).equals("5.3379"))
+
+ checkSelectedMarksCnt(df, 29)
+ })
+ }
}
// scalastyle:off line.size.limit
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]