taiyang-li commented on code in PR #6992:
URL: https://github.com/apache/incubator-gluten/pull/6992#discussion_r1731068967


##########
cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp:
##########
@@ -106,125 +106,114 @@ namespace
         ColumnPtr
         executeImpl(const ColumnsWithTypeAndName & arguments, const 
DataTypePtr & /*result_type*/, size_t input_rows_count) const override
         {
-            const ColumnString * src_str_col = 
checkAndGetColumn<ColumnString>(arguments[0].column.get());
-            if (!src_str_col)
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of 
function {} must be String", getName());
+            const ColumnString * src_col = 
checkAndGetColumn<ColumnString>(arguments[0].column.get());
+            const ColumnConst * src_const_col = 
checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
+            const ColumnString * trim_col = 
checkAndGetColumn<ColumnString>(arguments[1].column.get());
+            const ColumnConst * trim_const_col = 
checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
+
+            String src_const_str;
+            String trim_const_str;
+            if (src_const_col)
+                src_const_str = src_const_col->getValue<String>();
+            if (trim_const_col)
+                trim_const_str = trim_const_col->getValue<String>();
+            if (trim_const_col && trim_const_str.empty()) {
+                return arguments[0].column;
+            }
 
+            // If both arguments are constants, it will be simplified to a 
constant. Skipped here.
 
-            if (const auto * trim_const_str_col = 
checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
-            {
-                String trim_str = trim_const_str_col->getValue<String>();
-                if (trim_str.empty())
-                    return src_str_col->cloneResized(input_rows_count);
+            auto res_col = ColumnString::create();
+            ColumnString::Chars & res_data = res_col->getChars();
+            ColumnString::Offsets & res_offsets = res_col->getOffsets();
+            res_offsets.resize_exact(input_rows_count);
 
-                auto res_col = ColumnString::create();
-                res_col->reserve(input_rows_count);
-                executeVector(src_str_col->getChars(), 
src_str_col->getOffsets(), res_col->getChars(), res_col->getOffsets(), 
trim_str);
+            // Source column is constant and trim column is not constant
+            if (src_const_col)
+            {
+                res_data.reserve_exact(src_const_str.size() * 
input_rows_count);
+                for (size_t row = 0; row < input_rows_count; ++row)
+                {
+                    StringRef trim_str_ref = trim_col->getDataAt(row);
+                    std::unique_ptr<std::bitset<256>> trim_set = 
buildTrimSet(trim_str_ref.toString());
+                    executeRow(src_const_str.c_str(), src_const_str.size(), 
res_data, res_offsets, row, trim_set);
+                }
                 return std::move(res_col);
             }
-            else if (const auto * trim_str_col = 
checkAndGetColumn<ColumnString>(arguments[1].column.get()))
+
+            // Source column is not constant and trim column is constant
+            if (trim_const_col)
             {
-                auto res_col = ColumnString::create();
-                res_col->reserve(input_rows_count);
-
-                executeVector(
-                    src_str_col->getChars(),
-                    src_str_col->getOffsets(),
-                    res_col->getChars(),
-                    res_col->getOffsets(),
-                    trim_str_col->getChars(),
-                    trim_str_col->getOffsets());
+                res_data.reserve_exact(src_col->getChars().size());
+                std::unique_ptr<std::bitset<256>> trim_set = 
buildTrimSet(trim_const_str);
+                for (size_t row = 0; row < input_rows_count; ++row)
+                {
+                    StringRef src_str_ref = src_col->getDataAt(row);
+                    executeRow(src_str_ref.data, src_str_ref.size, res_data, 
res_offsets, row, trim_set);
+                }
                 return std::move(res_col);
             }
 
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of 
function {} must be String or Const String", getName());
+            // Both columns are not constant
+            res_data.reserve(src_col->getChars().size());
+            for (size_t row = 0; row < input_rows_count; ++row)
+            {
+                StringRef src_str_ref = src_col->getDataAt(row);
+                StringRef trim_str_ref = trim_col->getDataAt(row);
+                std::unique_ptr<std::bitset<256>> trim_set = 
buildTrimSet(trim_str_ref.toString());
+                executeRow(src_str_ref.data, src_str_ref.size, res_data, 
res_offsets, row, trim_set);
+            }
+            return std::move(res_col);
         }
 
     private:
-        void executeVector(
-            const ColumnString::Chars & data,
-            const ColumnString::Offsets & offsets,
+        void executeRow(
+            const char * src,
+            size_t src_size,
             ColumnString::Chars & res_data,
             ColumnString::Offsets & res_offsets,
-            const String & trim_str) const
+            size_t & row,

Review Comment:
   > It looks like adding a 'const' here would be more secure.
   
   I think we just need to remove reference. unless we need to modify it's 
value, never use reference for primitive types like size_t



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to