This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 1a34445ff0 [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241030) 
(#7720)
1a34445ff0 is described below

commit 1a34445ff071dddab119ec735f06afa9bc536d4e
Author: Kyligence Git <[email protected]>
AuthorDate: Wed Oct 30 08:04:19 2024 -0500

    [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241030) (#7720)
    
    * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241030)
    
    * Fix Build due to https://github.com/ClickHouse/ClickHouse/pull/71053
    * Add a case for https://github.com/apache/incubator-gluten/issues/7726
    
    * Try to fix occasional failed case "read data from orc file format", now, 
we don't generate data on the fly, instead we use pre-generated data.
    
    ---------
    
    Co-authored-by: kyligence-git <[email protected]>
    Co-authored-by: Chang Chen <[email protected]>
---
 ...l_data_types_with_non_primitive_type.snappy.orc | Bin 0 -> 6157 bytes
 .../resources/queries/tpch-schema-related/7726.sql | 135 +++++++++++++++++++++
 .../GlutenClickHouseFileFormatSuite.scala          |   8 +-
 ...lutenClickHouseWholeStageTransformerSuite.scala |   1 +
 ...ckHouseTPCHColumnarShuffleParquetAQESuite.scala |  14 ++-
 cpp-ch/clickhouse.version                          |   4 +-
 .../Storages/MergeTree/SparkStorageMergeTree.cpp   |   1 +
 7 files changed, 156 insertions(+), 7 deletions(-)

diff --git 
a/backends-clickhouse/src/test/resources/orc-data/all_data_types_with_non_primitive_type.snappy.orc
 
b/backends-clickhouse/src/test/resources/orc-data/all_data_types_with_non_primitive_type.snappy.orc
new file mode 100644
index 0000000000..a299cbe01d
Binary files /dev/null and 
b/backends-clickhouse/src/test/resources/orc-data/all_data_types_with_non_primitive_type.snappy.orc
 differ
diff --git 
a/backends-clickhouse/src/test/resources/queries/tpch-schema-related/7726.sql 
b/backends-clickhouse/src/test/resources/queries/tpch-schema-related/7726.sql
new file mode 100644
index 0000000000..a613c6a966
--- /dev/null
+++ 
b/backends-clickhouse/src/test/resources/queries/tpch-schema-related/7726.sql
@@ -0,0 +1,135 @@
+select LINEITEM.L_DISCOUNT,
+       PART.P_TYPE,
+       LINEITEM.L_COMMENT,
+       LINEITEM.L_SUPPKEY,
+       PART.P_PARTKEY,
+       PART.P_SIZE,
+       LINEITEM.L_RETURNFLAG,
+       LINEITEM.L_RECEIPTDATE,
+       PART.P_NAME,
+       SUPPLIER.S_COMMENT,
+       LINEITEM.L_ORDERKEY,
+       PART.P_MFGR,
+       SUPPLIER.S_ACCTBAL,
+       SUPPLIER.S_SUPPKEY,
+       LINEITEM.L_SHIPMODE,
+       SUPPLIER.S_NATIONKEY,
+       LINEITEM.L_SHIPDATE,
+       LINEITEM.L_COMMITDATE,
+       SUPPLIER.S_NAME,
+       PART.P_COMMENT,
+       LINEITEM.L_TAX,
+       LINEITEM.L_QUANTITY,
+       LINEITEM.L_PARTKEY,
+       PART.P_CONTAINER,
+       MIN(LINEITEM.L_EXTENDEDPRICE),
+       COUNT(LINEITEM.L_QUANTITY),
+       COUNT(DISTINCT LINEITEM.L_PARTKEY),
+       MIN(LINEITEM.L_TAX),
+       MIN(ORDERS.O_TOTALPRICE),
+       COUNT(LINEITEM.L_EXTENDEDPRICE),
+       COUNT(ORDERS.O_SHIPPRIORITY),
+       COUNT(1),
+       MAX(LINEITEM.L_DISCOUNT)
+from LINEITEM
+         INNER JOIN SUPPLIER AS SUPPLIER ON LINEITEM.L_SUPPKEY = 
SUPPLIER.S_SUPPKEY
+         INNER JOIN PART AS PART ON LINEITEM.L_PARTKEY = PART.P_PARTKEY
+         INNER JOIN ORDERS AS ORDERS ON LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY
+where (not (((P_RETAILPRICE is not null or
+              ((S_NATIONKEY is not null and P_MFGR like '%Manufacturer#1') or 
P_BRAND not like 'Brand#11')) or
+             ((S_SUPPKEY not in
+               (1206, 1496, 1191, 2445, 491, 1407, 1969, 261, 1418, 310, 2099, 
1343, 327, 261, 707, 37, 753, 696, 1363,
+                628, 1158, 2239, 26, 1180, 2448, 1698, 2099, 1326, 1247, 1203, 
161, 1698, 310, 692, 491, 1920, 28, 370,
+                370, 261, 2258, 1146, 983, 683, 24, 1611, 5161, 3141, 2258, 
1287, 683, 1720, 1887, 310, 707, 1836, 1287,
+                2065, 1859, 1203, 1611, 1835, 2099, 701, 2314, 692, 1418, 
2367, 425, 1720, 8285, 1969, 1804, 310, 2258,
+                1418, 463, 2048, 368, 1253, 549, 2258, 327, 1973, 817) and 
1300 > S_SUPPKEY) or (S_PHONE not in
+                                                                               
                  ('10-246-381-9259',
+                                                                               
                   '10-211-466-9198',
+                                                                               
                   '10-509-209-3829',
+                                                                               
                   '10-741-929-4244',
+                                                                               
                   '10-393-500-3856',
+                                                                               
                   '10-495-104-1252',
+                                                                               
                   '10-983-665-2259',
+                                                                               
                   '10-295-590-8708',
+                                                                               
                   '10-983-665-2259',
+                                                                               
                   '10-745-572-7198',
+                                                                               
                   '10-384-209-1825',
+                                                                               
                   '10-734-420-5738',
+                                                                               
                   '10-845-970-4551',
+                                                                               
                   '10-630-928-4130',
+                                                                               
                   '10-325-193-7475',
+                                                                               
                   '%10-475-868-5521',
+                                                                               
                   '10-903-990-3612',
+                                                                               
                   '10-352-443-2162%',
+                                                                               
                   '10-842-403-7954',
+                                                                               
                   '10-789-325-3069',
+                                                                               
                   '10-996-906-4890',
+                                                                               
                   '10-404-519-2270',
+                                                                               
                   '10-848-716-8078',
+                                                                               
                   '10-246-381-9259',
+                                                                               
                   '10-262-377-2302',
+                                                                               
                   '10-361-729-1693',
+                                                                               
                   '10-745-572-7198',
+                                                                               
                   '10-384-209-1825',
+                                                                               
                   '10-262-132-6639',
+                                                                               
                   '10-361-729-1693',
+                                                                               
                   '10-746-144-5600',
+                                                                               
                   '10-409-763-8909',
+                                                                               
                   '10-123-465-1292',
+                                                                               
                   '10-745-572-7198%',
+                                                                               
                   '10-599-740-9848',
+                                                                               
                   '10-453-843-1585',
+                                                                               
                   '10-191-563-6127',
+                                                                               
                   '10-848-716-8078',
+                                                                               
                   '10-763-945-1271',
+                                                                               
                   '10-393-500-3856') and
+                                                                               
                  (not (P_NAME not like 'light dark lemon lace medium%' and 
P_NAME is null))))) or
+            ((((S_ADDRESS is null or P_CONTAINER in
+                                     ('LG JAR', 'JUMBO CASE', 'JUMBO CASE', 
'MED BOX', 'WRAP BAG', 'SM CASE',
+                                      'WRAP JAR', 'JUMBO PKG', 'SM CAN', 'SM 
BOX', 'JUMBO CASE', 'MED BOX', 'LG JAR',
+                                      'JUMBO CASE', 'MED DRUM', 'JUMBO PKG', 
'SM CAN', 'WRAP JAR', 'LG CASE', 'LG BAG',
+                                      'SM PACK', 'JUMBO DRUM', 'WRAP BOX', 
'JUMBO CAN', 'LG PKG', 'WRAP CAN',
+                                      'MED PACK', 'SM BOX', 'SM DRUM', 'SM 
PACK', 'MED DRUM', 'MED PACK', 'MED BOX',
+                                      'MED CAN%', 'SM JAR', 'SM CAN', 'JUMBO 
BOX', 'JUMBO BAG', 'LG BAG', 'LG PKG',
+                                      'LG PACK', 'LG BAG', 'JUMBO BOX', 'SM 
BOX', 'JUMBO CAN', 'JUMBO PKG', 'LG BAG',
+                                      'MED BOX', 'JUMBO CASE', 'MED BOX', 'LG 
BAG', 'LG PACK', 'MED BOX', 'LG PKG',
+                                      'SM BOX', 'WRAP BOX', 'LG CASE', 'MED 
PACK', 'LG PKG', '%LG CASE', 'LG JAR',
+                                      'LG BAG', 'LG BOX', 'SM CAN', 'WRAP 
CAN', 'WRAP PACK', 'JUMBO CASE', 'SM BOX',
+                                      'SM PACK', 'WRAP PKG', 'MED CAN', 'SM 
BOX', 'LG CASE', 'JUMBO CAN', 'LG JAR',
+                                      'SM DRUM', 'MED PKG', 'JUMBO BAG', 'SM 
CASE', 'MED BAG', 'SM PACK',
+                                      'SM PACK')) and S_SUPPKEY is not null) 
and (P_PARTKEY not in
+                                                                               
   (1358682, 1592117, 1114403, 839396,
+                                                                               
    1114617, 959268, 1114713, 1358631,
+                                                                               
    806397, 959018, 1114926, 812800,
+                                                                               
    1568237, 959088, 839340, 959419,
+                                                                               
    1115053, 1358740, 1114282) and
+                                                                               
   (S_SUPPKEY between 463 and 1887 or
+                                                                               
    S_SUPPKEY not in
+                                                                               
    (1287, 1422, 1878, 1191, 1804, 476,
+                                                                               
     1097, 1326, 1597, 1158, 261, 1689,
+                                                                               
     1493, 2314, 817, 1097, 2239, 327,
+                                                                               
     1887, 118, 1547, 476, 2131, 1247,
+                                                                               
     1496, 1698, 1717, 454, 1692, 1920,
+                                                                               
     1973, 2010, 1804, 774, 1611, 425,
+                                                                               
     28, 1611, 183, 983, 800, 5915, 1311,
+                                                                               
     24, 2298, 118, 183, 784, 1592, 1549,
+                                                                               
     983, 1283, 1418, 291, 118, 1407,
+                                                                               
     2072, 291, 1180, 1404, 1097, 1724,
+                                                                               
     1611, 692, 491, 316, 161, 2314,
+                                                                               
     1404, 696, 2072, 2072, 491, 1692,
+                                                                               
     764, 742, 118, 425)))) and
+             (P_CONTAINER in
+              ('SM PKG', 'LG PKG', 'LG CASE', 'MED PKG', 'WRAP JAR', 'LG BAG', 
'SM BOX', 'JUMBO BOX', 'SM PKG',
+               'SM PKG', 'JUMBO BOX', 'MED BOX', 'JUMBO PKG', 'WRAP CAN', 'MED 
DRUM', 'MED JAR', 'SM BAG', 'MED CAN',
+               'SM PACK', 'SM CASE', 'MED BAG', 'JUMBO PKG', 'LG CASE', 'SM 
PKG', 'MED BOX', 'LG CASE', 'JUMBO DRUM',
+               'MED BAG', 'JUMBO CASE', 'SM BOX', 'JUMBO PACK', 'WRAP BOX', 
'%JUMBO BOX', 'JUMBO BOX', 'JUMBO CASE',
+               'SM CAN', 'JUMBO BOX', 'SM CAN', 'LG CASE') and P_BRAND is 
null))))
+   or (((P_BRAND is not null or (P_SIZE not in
+                                 (25, 11, 48, 15, 48, 16, 3, 45, 37, 42, 47, 
42, 42, 16, 97, 16, 48, 12, 87, 13, 27, 22,
+                                  42, 37, 50, 9, 34) and S_NATIONKEY >= 0)) or 
955.65 = S_ACCTBAL) or
+       (P_TYPE not like 'MEDIUM POLISHED STEEL%' or (not S_ACCTBAL is null)))
+group by LINEITEM.L_DISCOUNT, PART.P_TYPE, LINEITEM.L_COMMENT, 
LINEITEM.L_SUPPKEY, PART.P_PARTKEY, PART.P_SIZE,
+         LINEITEM.L_RETURNFLAG, LINEITEM.L_RECEIPTDATE, PART.P_NAME, 
SUPPLIER.S_COMMENT, LINEITEM.L_ORDERKEY,
+         PART.P_MFGR, SUPPLIER.S_ACCTBAL, SUPPLIER.S_SUPPKEY, 
LINEITEM.L_SHIPMODE, SUPPLIER.S_NATIONKEY,
+         LINEITEM.L_SHIPDATE, LINEITEM.L_COMMITDATE, SUPPLIER.S_NAME, 
PART.P_COMMENT, LINEITEM.L_TAX,
+         LINEITEM.L_QUANTITY, LINEITEM.L_PARTKEY, PART.P_CONTAINER
\ No newline at end of file
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
index 4beb8fab5b..e8ddbd12f1 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
@@ -1038,8 +1038,7 @@ class GlutenClickHouseFileFormatSuite
   }
 
   test("read data from orc file format") {
-    val filePath = basePath + "/orc_test.orc"
-    // val filePath = "/data2/case_insensitive_column_matching.orc"
+    val filePath = 
s"$orcDataPath/all_data_types_with_non_primitive_type.snappy.orc"
     val orcFileFormat = "orc"
     val sql =
       s"""
@@ -1047,7 +1046,7 @@ class GlutenClickHouseFileFormatSuite
          | from $orcFileFormat.`$filePath`
          | where long_field > 30
          |""".stripMargin
-    testFileFormatBase(filePath, orcFileFormat, sql, df => {})
+    compareResultsAgainstVanillaSpark(sql, compareResult = true, df => {}, 
noFallBack = true)
   }
 
   // TODO: Fix: if the field names has upper case form, it will return null 
value
@@ -1085,8 +1084,9 @@ class GlutenClickHouseFileFormatSuite
       customCheck: DataFrame => Unit,
       noFallBack: Boolean = true
   ): Unit = {
+    val data = genTestData()
     spark
-      .createDataFrame(genTestData())
+      .createDataFrame(data)
       .write
       .mode("overwrite")
       .format(fileFormat)
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala
index 3c77bcba17..b3e1bd21e9 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala
@@ -157,6 +157,7 @@ class GlutenClickHouseWholeStageTransformerSuite extends 
WholeStageTransformerSu
   }
 
   final protected val rootPath: String = this.getClass.getResource("/").getPath
+  final protected val queryPath: String = s"${rootPath}queries"
   final protected val basePath: String =
     if (UTSystemParameters.diskOutputDataPath.equals("/")) rootPath + 
"tests-working-home"
     else UTSystemParameters.diskOutputDataPath + "/" + rootPath + 
"tests-working-home"
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala
index 4ff6cb0d02..885e50b046 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala
@@ -19,12 +19,17 @@ package org.apache.gluten.execution.tpch
 import org.apache.gluten.GlutenConfig
 import org.apache.gluten.execution._
 import org.apache.gluten.extension.GlutenPlan
+import org.apache.gluten.utils.Arm
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.optimizer.BuildLeft
 import org.apache.spark.sql.execution.InputIteratorTransformer
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, 
AdaptiveSparkPlanHelper}
 
+import java.io.File
+
+import scala.io.Source
+
 class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
   extends GlutenClickHouseTPCHAbstractSuite
   with AdaptiveSparkPlanHelper {
@@ -32,7 +37,7 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
   override protected val needCopyParquetToTablePath = true
 
   override protected val tablesPath: String = basePath + "/tpch-data"
-  override protected val tpchQueries: String = rootPath + 
"queries/tpch-queries-ch"
+  override protected val tpchQueries: String = s"$queryPath/tpch-queries-ch"
   override protected val queriesResults: String = rootPath + "queries-output"
 
   /** Run Gluten + ClickHouse Backend with SortShuffleManager */
@@ -349,4 +354,11 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
       runQueryAndCompare(sql) { df => }
     }
   }
+
+  ignore("https://github.com/apache/incubator-gluten/issues/7726";) {
+    runQueryAndCompare(Arm.withResource(
+      Source.fromFile(new File(s"$queryPath/tpch-schema-related/7726.sql"), 
"UTF-8"))(_.mkString)) {
+      df =>
+    }
+  }
 }
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index 507ba25388..6bdb05c332 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20241026
-CH_COMMIT=3691d19817d
\ No newline at end of file
+CH_BRANCH=rebase_ch/20241030
+CH_COMMIT=847cfa6237c
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.cpp 
b/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.cpp
index 67253ac827..9c90c67f69 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkStorageMergeTree.cpp
@@ -473,6 +473,7 @@ MergeTreeDataWriter::TemporaryPart 
SparkMergeTreeDataWriter::writeTempPart(
         txn ? txn->tid : Tx::PrehistoricTID,
         false,
         false,
+        false,
         context->getWriteSettings());
 
     out->writeWithPermutation(block, perm_ptr);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to