This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 1a34445ff0 [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241030)
(#7720)
1a34445ff0 is described below
commit 1a34445ff071dddab119ec735f06afa9bc536d4e
Author: Kyligence Git <[email protected]>
AuthorDate: Wed Oct 30 08:04:19 2024 -0500
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20241030) (#7720)
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241030)
* Fix Build due to https://github.com/ClickHouse/ClickHouse/pull/71053
* Add a case for https://github.com/apache/incubator-gluten/issues/7726
* Try to fix occasional failed case "read data from orc file format", now,
we don't generate data on the fly, instead we use pre-generated data.
---------
Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
---
...l_data_types_with_non_primitive_type.snappy.orc | Bin 0 -> 6157 bytes
.../resources/queries/tpch-schema-related/7726.sql | 135 +++++++++++++++++++++
.../GlutenClickHouseFileFormatSuite.scala | 8 +-
...lutenClickHouseWholeStageTransformerSuite.scala | 1 +
...ckHouseTPCHColumnarShuffleParquetAQESuite.scala | 14 ++-
cpp-ch/clickhouse.version | 4 +-
.../Storages/MergeTree/SparkStorageMergeTree.cpp | 1 +
7 files changed, 156 insertions(+), 7 deletions(-)
diff --git
a/backends-clickhouse/src/test/resources/orc-data/all_data_types_with_non_primitive_type.snappy.orc
b/backends-clickhouse/src/test/resources/orc-data/all_data_types_with_non_primitive_type.snappy.orc
new file mode 100644
index 0000000000..a299cbe01d
Binary files /dev/null and
b/backends-clickhouse/src/test/resources/orc-data/all_data_types_with_non_primitive_type.snappy.orc
differ
diff --git
a/backends-clickhouse/src/test/resources/queries/tpch-schema-related/7726.sql
b/backends-clickhouse/src/test/resources/queries/tpch-schema-related/7726.sql
new file mode 100644
index 0000000000..a613c6a966
--- /dev/null
+++
b/backends-clickhouse/src/test/resources/queries/tpch-schema-related/7726.sql
@@ -0,0 +1,135 @@
+select LINEITEM.L_DISCOUNT,
+ PART.P_TYPE,
+ LINEITEM.L_COMMENT,
+ LINEITEM.L_SUPPKEY,
+ PART.P_PARTKEY,
+ PART.P_SIZE,
+ LINEITEM.L_RETURNFLAG,
+ LINEITEM.L_RECEIPTDATE,
+ PART.P_NAME,
+ SUPPLIER.S_COMMENT,
+ LINEITEM.L_ORDERKEY,
+ PART.P_MFGR,
+ SUPPLIER.S_ACCTBAL,
+ SUPPLIER.S_SUPPKEY,
+ LINEITEM.L_SHIPMODE,
+ SUPPLIER.S_NATIONKEY,
+ LINEITEM.L_SHIPDATE,
+ LINEITEM.L_COMMITDATE,
+ SUPPLIER.S_NAME,
+ PART.P_COMMENT,
+ LINEITEM.L_TAX,
+ LINEITEM.L_QUANTITY,
+ LINEITEM.L_PARTKEY,
+ PART.P_CONTAINER,
+ MIN(LINEITEM.L_EXTENDEDPRICE),
+ COUNT(LINEITEM.L_QUANTITY),
+ COUNT(DISTINCT LINEITEM.L_PARTKEY),
+ MIN(LINEITEM.L_TAX),
+ MIN(ORDERS.O_TOTALPRICE),
+ COUNT(LINEITEM.L_EXTENDEDPRICE),
+ COUNT(ORDERS.O_SHIPPRIORITY),
+ COUNT(1),
+ MAX(LINEITEM.L_DISCOUNT)
+from LINEITEM
+ INNER JOIN SUPPLIER AS SUPPLIER ON LINEITEM.L_SUPPKEY =
SUPPLIER.S_SUPPKEY
+ INNER JOIN PART AS PART ON LINEITEM.L_PARTKEY = PART.P_PARTKEY
+ INNER JOIN ORDERS AS ORDERS ON LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY
+where (not (((P_RETAILPRICE is not null or
+ ((S_NATIONKEY is not null and P_MFGR like '%Manufacturer#1') or
P_BRAND not like 'Brand#11')) or
+ ((S_SUPPKEY not in
+ (1206, 1496, 1191, 2445, 491, 1407, 1969, 261, 1418, 310, 2099,
1343, 327, 261, 707, 37, 753, 696, 1363,
+ 628, 1158, 2239, 26, 1180, 2448, 1698, 2099, 1326, 1247, 1203,
161, 1698, 310, 692, 491, 1920, 28, 370,
+ 370, 261, 2258, 1146, 983, 683, 24, 1611, 5161, 3141, 2258,
1287, 683, 1720, 1887, 310, 707, 1836, 1287,
+ 2065, 1859, 1203, 1611, 1835, 2099, 701, 2314, 692, 1418,
2367, 425, 1720, 8285, 1969, 1804, 310, 2258,
+ 1418, 463, 2048, 368, 1253, 549, 2258, 327, 1973, 817) and
1300 > S_SUPPKEY) or (S_PHONE not in
+
('10-246-381-9259',
+
'10-211-466-9198',
+
'10-509-209-3829',
+
'10-741-929-4244',
+
'10-393-500-3856',
+
'10-495-104-1252',
+
'10-983-665-2259',
+
'10-295-590-8708',
+
'10-983-665-2259',
+
'10-745-572-7198',
+
'10-384-209-1825',
+
'10-734-420-5738',
+
'10-845-970-4551',
+
'10-630-928-4130',
+
'10-325-193-7475',
+
'%10-475-868-5521',
+
'10-903-990-3612',
+
'10-352-443-2162%',
+
'10-842-403-7954',
+
'10-789-325-3069',
+
'10-996-906-4890',
+
'10-404-519-2270',
+
'10-848-716-8078',
+
'10-246-381-9259',
+
'10-262-377-2302',
+
'10-361-729-1693',
+
'10-745-572-7198',
+
'10-384-209-1825',
+
'10-262-132-6639',
+
'10-361-729-1693',
+
'10-746-144-5600',
+
'10-409-763-8909',
+
'10-123-465-1292',
+
'10-745-572-7198%',
+
'10-599-740-9848',
+
'10-453-843-1585',
+
'10-191-563-6127',
+
'10-848-716-8078',
+
'10-763-945-1271',
+
'10-393-500-3856') and
+
(not (P_NAME not like 'light dark lemon lace medium%' and
P_NAME is null))))) or
+ ((((S_ADDRESS is null or P_CONTAINER in
+ ('LG JAR', 'JUMBO CASE', 'JUMBO CASE',
'MED BOX', 'WRAP BAG', 'SM CASE',
+ 'WRAP JAR', 'JUMBO PKG', 'SM CAN', 'SM
BOX', 'JUMBO CASE', 'MED BOX', 'LG JAR',
+ 'JUMBO CASE', 'MED DRUM', 'JUMBO PKG',
'SM CAN', 'WRAP JAR', 'LG CASE', 'LG BAG',
+ 'SM PACK', 'JUMBO DRUM', 'WRAP BOX',
'JUMBO CAN', 'LG PKG', 'WRAP CAN',
+ 'MED PACK', 'SM BOX', 'SM DRUM', 'SM
PACK', 'MED DRUM', 'MED PACK', 'MED BOX',
+ 'MED CAN%', 'SM JAR', 'SM CAN', 'JUMBO
BOX', 'JUMBO BAG', 'LG BAG', 'LG PKG',
+ 'LG PACK', 'LG BAG', 'JUMBO BOX', 'SM
BOX', 'JUMBO CAN', 'JUMBO PKG', 'LG BAG',
+ 'MED BOX', 'JUMBO CASE', 'MED BOX', 'LG
BAG', 'LG PACK', 'MED BOX', 'LG PKG',
+ 'SM BOX', 'WRAP BOX', 'LG CASE', 'MED
PACK', 'LG PKG', '%LG CASE', 'LG JAR',
+ 'LG BAG', 'LG BOX', 'SM CAN', 'WRAP
CAN', 'WRAP PACK', 'JUMBO CASE', 'SM BOX',
+ 'SM PACK', 'WRAP PKG', 'MED CAN', 'SM
BOX', 'LG CASE', 'JUMBO CAN', 'LG JAR',
+ 'SM DRUM', 'MED PKG', 'JUMBO BAG', 'SM
CASE', 'MED BAG', 'SM PACK',
+ 'SM PACK')) and S_SUPPKEY is not null)
and (P_PARTKEY not in
+
(1358682, 1592117, 1114403, 839396,
+
1114617, 959268, 1114713, 1358631,
+
806397, 959018, 1114926, 812800,
+
1568237, 959088, 839340, 959419,
+
1115053, 1358740, 1114282) and
+
(S_SUPPKEY between 463 and 1887 or
+
S_SUPPKEY not in
+
(1287, 1422, 1878, 1191, 1804, 476,
+
1097, 1326, 1597, 1158, 261, 1689,
+
1493, 2314, 817, 1097, 2239, 327,
+
1887, 118, 1547, 476, 2131, 1247,
+
1496, 1698, 1717, 454, 1692, 1920,
+
1973, 2010, 1804, 774, 1611, 425,
+
28, 1611, 183, 983, 800, 5915, 1311,
+
24, 2298, 118, 183, 784, 1592, 1549,
+
983, 1283, 1418, 291, 118, 1407,
+
2072, 291, 1180, 1404, 1097, 1724,
+
1611, 692, 491, 316, 161, 2314,
+
1404, 696, 2072, 2072, 491, 1692,
+
764, 742, 118, 425)))) and
+ (P_CONTAINER in
+ ('SM PKG', 'LG PKG', 'LG CASE', 'MED PKG', 'WRAP JAR', 'LG BAG',
'SM BOX', 'JUMBO BOX', 'SM PKG',
+ 'SM PKG', 'JUMBO BOX', 'MED BOX', 'JUMBO PKG', 'WRAP CAN', 'MED
DRUM', 'MED JAR', 'SM BAG', 'MED CAN',
+ 'SM PACK', 'SM CASE', 'MED BAG', 'JUMBO PKG', 'LG CASE', 'SM
PKG', 'MED BOX', 'LG CASE', 'JUMBO DRUM',
+ 'MED BAG', 'JUMBO CASE', 'SM BOX', 'JUMBO PACK', 'WRAP BOX',
'%JUMBO BOX', 'JUMBO BOX', 'JUMBO CASE',
+ 'SM CAN', 'JUMBO BOX', 'SM CAN', 'LG CASE') and P_BRAND is
null))))
+ or (((P_BRAND is not null or (P_SIZE not in
+ (25, 11, 48, 15, 48, 16, 3, 45, 37, 42, 47,
42, 42, 16, 97, 16, 48, 12, 87, 13, 27, 22,
+ 42, 37, 50, 9, 34) and S_NATIONKEY >= 0)) or
955.65 = S_ACCTBAL) or
+ (P_TYPE not like 'MEDIUM POLISHED STEEL%' or (not S_ACCTBAL is null)))
+group by LINEITEM.L_DISCOUNT, PART.P_TYPE, LINEITEM.L_COMMENT,
LINEITEM.L_SUPPKEY, PART.P_PARTKEY, PART.P_SIZE,
+ LINEITEM.L_RETURNFLAG, LINEITEM.L_RECEIPTDATE, PART.P_NAME,
SUPPLIER.S_COMMENT, LINEITEM.L_ORDERKEY,
+ PART.P_MFGR, SUPPLIER.S_ACCTBAL, SUPPLIER.S_SUPPKEY,
LINEITEM.L_SHIPMODE, SUPPLIER.S_NATIONKEY,
+ LINEITEM.L_SHIPDATE, LINEITEM.L_COMMITDATE, SUPPLIER.S_NAME,
PART.P_COMMENT, LINEITEM.L_TAX,
+ LINEITEM.L_QUANTITY, LINEITEM.L_PARTKEY, PART.P_CONTAINER
\ No newline at end of file
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
index 4beb8fab5b..e8ddbd12f1 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
@@ -1038,8 +1038,7 @@ class GlutenClickHouseFileFormatSuite
}
test("read data from orc file format") {
- val filePath = basePath + "/orc_test.orc"
- // val filePath = "/data2/case_insensitive_column_matching.orc"
+ val filePath =
s"$orcDataPath/all_data_types_with_non_primitive_type.snappy.orc"
val orcFileFormat = "orc"
val sql =
s"""
@@ -1047,7 +1046,7 @@ class GlutenClickHouseFileFormatSuite
| from $orcFileFormat.`$filePath`
| where long_field > 30
|""".stripMargin
- testFileFormatBase(filePath, orcFileFormat, sql, df => {})
+ compareResultsAgainstVanillaSpark(sql, compareResult = true, df => {},
noFallBack = true)
}
// TODO: Fix: if the field names has upper case form, it will return null
value
@@ -1085,8 +1084,9 @@ class GlutenClickHouseFileFormatSuite
customCheck: DataFrame => Unit,
noFallBack: Boolean = true
): Unit = {
+ val data = genTestData()
spark
- .createDataFrame(genTestData())
+ .createDataFrame(data)
.write
.mode("overwrite")
.format(fileFormat)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala
index 3c77bcba17..b3e1bd21e9 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala
@@ -157,6 +157,7 @@ class GlutenClickHouseWholeStageTransformerSuite extends
WholeStageTransformerSu
}
final protected val rootPath: String = this.getClass.getResource("/").getPath
+ final protected val queryPath: String = s"${rootPath}queries"
final protected val basePath: String =
if (UTSystemParameters.diskOutputDataPath.equals("/")) rootPath +
"tests-working-home"
else UTSystemParameters.diskOutputDataPath + "/" + rootPath +
"tests-working-home"
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala
index 4ff6cb0d02..885e50b046 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala
@@ -19,12 +19,17 @@ package org.apache.gluten.execution.tpch
import org.apache.gluten.GlutenConfig
import org.apache.gluten.execution._
import org.apache.gluten.extension.GlutenPlan
+import org.apache.gluten.utils.Arm
import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.optimizer.BuildLeft
import org.apache.spark.sql.execution.InputIteratorTransformer
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec,
AdaptiveSparkPlanHelper}
+import java.io.File
+
+import scala.io.Source
+
class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
extends GlutenClickHouseTPCHAbstractSuite
with AdaptiveSparkPlanHelper {
@@ -32,7 +37,7 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
override protected val needCopyParquetToTablePath = true
override protected val tablesPath: String = basePath + "/tpch-data"
- override protected val tpchQueries: String = rootPath +
"queries/tpch-queries-ch"
+ override protected val tpchQueries: String = s"$queryPath/tpch-queries-ch"
override protected val queriesResults: String = rootPath + "queries-output"
/** Run Gluten + ClickHouse Backend with SortShuffleManager */
@@ -349,4 +354,11 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
runQueryAndCompare(sql) { df => }
}
}
+
+ ignore("https://github.com/apache/incubator-gluten/issues/7726") {
+ runQueryAndCompare(Arm.withResource(
+ Source.fromFile(new File(s"$queryPath/tpch-schema-related/7726.sql"),
"UTF-8"))(_.mkString)) {
+ df =>
+ }
+ }
}
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index 507ba25388..6bdb05c332 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20241026
-CH_COMMIT=3691d19817d
\ No newline at end of file
+CH_BRANCH=rebase_ch/20241030
+CH_COMMIT=847cfa6237c
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.cpp
b/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.cpp
index 67253ac827..9c90c67f69 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.cpp
@@ -473,6 +473,7 @@ MergeTreeDataWriter::TemporaryPart
SparkMergeTreeDataWriter::writeTempPart(
txn ? txn->tid : Tx::PrehistoricTID,
false,
false,
+ false,
context->getWriteSettings());
out->writeWithPermutation(block, perm_ptr);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]