This is an automated email from the ASF dual-hosted git repository.

zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new c87029f52 [GLUTEN-5921][CH] Function trim of trim_character support 
value from column (#5922)
c87029f52 is described below

commit c87029f521489c805bbf718c6e02a0ecc7d4266b
Author: Shuai li <[email protected]>
AuthorDate: Fri May 31 09:45:48 2024 +0800

    [GLUTEN-5921][CH] Function trim of trim_character support value from column 
(#5922)
    
    [CH] Function trim of trim_character support value from column
---
 .../execution/GlutenFunctionValidateSuite.scala    | 18 ++++-
 .../local-engine/Functions/SparkFunctionTrim.cpp   | 76 ++++++++++++++++++----
 2 files changed, 80 insertions(+), 14 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index 5a1ca6799..cfe8ea95a 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -708,6 +708,23 @@ class GlutenFunctionValidateSuite extends 
GlutenClickHouseWholeStageTransformerS
 
   }
 
+  test("GLUTEN-5821: trim_character support value from column.") {
+    withTable("trim") {
+      sql("create table trim(a String, b String) using parquet")
+      sql("""
+            |insert into trim values ('aba', 'a'),('bba', 'b'),('abcdef', 
'abcd')
+            |""".stripMargin)
+
+      val sql_str =
+        s"""select
+           |    trim(both b from a)
+           |  from trim
+          """.stripMargin
+
+      runQueryAndCompare(sql_str) { _ => }
+    }
+  }
+
   test("GLUTEN-5897: fix regexp_extract with bracket") {
     withTable("regexp_extract_bracket") {
       sql("create table regexp_extract_bracket(a String) using parquet")
@@ -727,5 +744,4 @@ class GlutenFunctionValidateSuite extends 
GlutenClickHouseWholeStageTransformerS
       runQueryAndCompare(sql_str) { _ => }
     }
   }
-
 }
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp 
b/cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp
index d8f6be1bf..88ed3f635 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp
+++ b/cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp
@@ -77,8 +77,6 @@ namespace
 
         bool isSuitableForShortCircuitArgumentsExecution(const 
DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
-        ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { 
return {1}; }
-
         DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const 
override
         {
             if (arguments.size() != 2)
@@ -112,19 +110,34 @@ namespace
             if (!src_str_col)
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of 
function {} must be String", getName());
 
-            const ColumnConst * trim_str_col = 
checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
-            if (!trim_str_col)
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument 
of function {} must be Const String", getName());
-
-            String trim_str = trim_str_col->getValue<String>();
-            if (trim_str.empty())
-                return src_str_col->cloneResized(input_rows_count);
 
-            auto res_col = ColumnString::create();
-            res_col->reserve(input_rows_count);
+            if (const auto * trim_const_str_col = 
checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
+            {
+                String trim_str = trim_const_str_col->getValue<String>();
+                if (trim_str.empty())
+                    return src_str_col->cloneResized(input_rows_count);
+
+                auto res_col = ColumnString::create();
+                res_col->reserve(input_rows_count);
+                executeVector(src_str_col->getChars(), 
src_str_col->getOffsets(), res_col->getChars(), res_col->getOffsets(), 
trim_str);
+                return std::move(res_col);
+            }
+            else if (const auto * trim_str_col = 
checkAndGetColumn<ColumnString>(arguments[1].column.get()))
+            {
+                auto res_col = ColumnString::create();
+                res_col->reserve(input_rows_count);
+
+                executeVector(
+                    src_str_col->getChars(),
+                    src_str_col->getOffsets(),
+                    res_col->getChars(),
+                    res_col->getOffsets(),
+                    trim_str_col->getChars(),
+                    trim_str_col->getOffsets());
+                return std::move(res_col);
+            }
 
-            executeVector(src_str_col->getChars(), src_str_col->getOffsets(), 
res_col->getChars(), res_col->getOffsets(), trim_str);
-            return std::move(res_col);
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of 
function {} must be String or Const String", getName());
         }
 
     private:
@@ -159,6 +172,43 @@ namespace
             }
         }
 
+        void executeVector(
+            const ColumnString::Chars & data,
+            const ColumnString::Offsets & offsets,
+            ColumnString::Chars & res_data,
+            ColumnString::Offsets & res_offsets,
+            const ColumnString::Chars & trim_data,
+            const ColumnString::Offsets & trim_offsets) const
+        {
+            res_data.reserve_exact(data.size());
+
+            size_t rows = offsets.size();
+            res_offsets.resize_exact(rows);
+
+            size_t prev_offset = 0;
+            size_t prev_trim_str_offset = 0;
+            size_t res_offset = 0;
+
+            const UInt8 * start;
+            size_t length;
+
+            for (size_t i = 0; i < rows; ++i)
+            {
+                std::unordered_set<char> trim_set(
+                    &trim_data[prev_trim_str_offset], 
&trim_data[prev_trim_str_offset] + trim_offsets[i] - prev_trim_str_offset - 1);
+
+                trim(reinterpret_cast<const UInt8 *>(&data[prev_offset]), 
offsets[i] - prev_offset - 1, start, length, trim_set);
+                res_data.resize_exact(res_data.size() + length + 1);
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], 
start, length);
+                res_offset += length + 1;
+                res_data[res_offset - 1] = '\0';
+
+                res_offsets[i] = res_offset;
+                prev_offset = offsets[i];
+                prev_trim_str_offset = trim_offsets[i];
+            }
+        }
+
         void
         trim(const UInt8 * data, size_t size, const UInt8 *& res_data, size_t 
& res_size, const std::unordered_set<char> & trim_set) const
         {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to