(incubator-gluten) branch main updated: [GLUTEN-6681] [CH]fix array(decimal32) in CH columnar to row (#6722)

taiyangli Wed, 07 Aug 2024 23:16:02 -0700

This is an automated email from the ASF dual-hosted git repository.

taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 1ec37bd04 [GLUTEN-6681] [CH]fix array(decimal32) in CH columnar to row 
(#6722)
1ec37bd04 is described below

commit 1ec37bd04709db5fc09001aef9bcc5099b4a225f
Author: loudongfeng <[email protected]>
AuthorDate: Thu Aug 8 14:15:54 2024 +0800

    [GLUTEN-6681] [CH]fix array(decimal32) in CH columnar to row (#6722)
    
    * [GLUTEN-6681] [CH]fix array(decimal32) in CH columnar to row
    
    map function issue: Unscaled value too large for precision
    
    * fix UT: not check fallback
    
    * add a test for spark row to CH column
    
    * fix style
    
    * fix checkstyle
    
    * more test cases
---
 .../execution/GlutenClickhouseFunctionSuite.scala  | 28 ++++++++++++++++++++++
 cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp  | 11 ++++++---
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
index 8853dfc77..1d4d1b6f8 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
@@ -226,4 +226,32 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
     spark.sql("drop table t2")
   }
 
+  test("array decimal32 CH column to row") {
+    compareResultsAgainstVanillaSpark("SELECT array(1.0, 2.0)", true, { _ => 
}, false)
+    compareResultsAgainstVanillaSpark("SELECT map(1.0, '2', 3.0, '4')", true, 
{ _ => }, false)
+  }
+
+  test("array decimal32 spark row to CH column") {
+    withTable("test_array_decimal") {
+      sql("""
+            |create table test_array_decimal(val array<decimal(5,1)>)
+            |using parquet
+            |""".stripMargin)
+      sql("""
+            |insert into test_array_decimal
+            |values array(1.0, 2.0), array(3.0, 4.0),
+            |array(5.0, 6.0), array(7.0, 8.0), array(7.0, 7.0)
+            |""".stripMargin)
+      // disable native scan so will get a spark row to CH column
+      withSQLConf(GlutenConfig.COLUMNAR_FILESCAN_ENABLED.key -> "false") {
+        val q = "SELECT max(val) from test_array_decimal"
+        compareResultsAgainstVanillaSpark(q, true, { _ => }, false)
+        val q2 = "SELECT max(val[0]) from test_array_decimal"
+        compareResultsAgainstVanillaSpark(q2, true, { _ => }, false)
+        val q3 = "SELECT max(val[1]) from test_array_decimal"
+        compareResultsAgainstVanillaSpark(q3, true, { _ => }, false)
+      }
+    }
+  }
+
 }
diff --git a/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp 
b/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
index 5bb66e4b3..3d5a7731b 100644
--- a/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
+++ b/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
@@ -586,12 +586,11 @@ int64_t 
BackingDataLengthCalculator::getArrayElementSize(const DataTypePtr & nes
     else if (nested_which.isUInt16() || nested_which.isInt16() || 
nested_which.isDate())
         return 2;
     else if (
-        nested_which.isUInt32() || nested_which.isInt32() || 
nested_which.isFloat32() || nested_which.isDate32()
-        || nested_which.isDecimal32())
+        nested_which.isUInt32() || nested_which.isInt32() || 
nested_which.isFloat32() || nested_which.isDate32())
         return 4;
     else if (
         nested_which.isUInt64() || nested_which.isInt64() || 
nested_which.isFloat64() || nested_which.isDateTime64()
-        || nested_which.isDecimal64())
+        || nested_which.isDecimal32() || nested_which.isDecimal64())
         return 8;
     else
         return 8;
@@ -702,6 +701,12 @@ int64_t VariableLengthDataWriter::writeArray(size_t 
row_idx, const DB::Array & a
                     auto v = elem.get<Float64>();
                     writer.unsafeWrite(reinterpret_cast<const char *>(&v), 
buffer_address + offset + start + 8 + len_null_bitmap + i * elem_size);
                 }
+                else if (writer.getWhichDataType().isDecimal32())
+                {
+                  // We can not use get<char>() directly here to process 
Decimal32 field,
+                  // because it will get 4 byte data, but Decimal32 is 8 byte 
in Spark, which will cause error conversion.
+                  writer.write(elem, buffer_address + offset + start + 8 + 
len_null_bitmap + i * elem_size);
+                }
                 else
                     writer.unsafeWrite(
                         reinterpret_cast<const char *>(&elem.get<char>()),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-6681] [CH]fix array(decimal32) in CH columnar to row (#6722)

Reply via email to