lwz9103 commented on code in PR #6992:
URL: https://github.com/apache/incubator-gluten/pull/6992#discussion_r1730631562
##########
cpp-ch/local-engine/Functions/SparkFunctionTrim.cpp:
##########
@@ -104,127 +104,107 @@ namespace
}
ColumnPtr
- executeImpl(const ColumnsWithTypeAndName & arguments, const
DataTypePtr & /*result_type*/, size_t input_rows_count) const override
+ executeImpl(const ColumnsWithTypeAndName & arguments, const
DataTypePtr & result_type, size_t input_rows_count) const override
{
- const ColumnString * src_str_col =
checkAndGetColumn<ColumnString>(arguments[0].column.get());
- if (!src_str_col)
- throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of
function {} must be String", getName());
-
+ const ColumnString * src_col =
checkAndGetColumn<ColumnString>(arguments[0].column.get());
+ const ColumnConst * src_const_col =
checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
+ const ColumnString * trim_col =
checkAndGetColumn<ColumnString>(arguments[1].column.get());
+ const ColumnConst * trim_const_col =
checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
+
+ String src_const_str;
+ String trim_const_str;
+ if (src_const_col)
+ src_const_str = src_const_col->getValue<String>();
+ if (trim_const_col)
+ trim_const_str = trim_const_col->getValue<String>();
+ if (trim_const_col && trim_const_str.empty()) {
+ return arguments[0].column->cloneResized(input_rows_count);
+ }
- if (const auto * trim_const_str_col =
checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
+ if (src_const_col && trim_const_col)
{
- String trim_str = trim_const_str_col->getValue<String>();
- if (trim_str.empty())
- return src_str_col->cloneResized(input_rows_count);
+ const char * dst;
+ size_t dst_size;
+ std::unordered_set<char> trim_set(trim_const_str.begin(),
trim_const_str.end());
+ trim(src_const_str.c_str(), src_const_str.size(), dst,
dst_size, trim_set);
+ return result_type->createColumnConst(input_rows_count,
String(dst, dst_size));
+ }
- auto res_col = ColumnString::create();
- res_col->reserve(input_rows_count);
- executeVector(src_str_col->getChars(),
src_str_col->getOffsets(), res_col->getChars(), res_col->getOffsets(),
trim_str);
+ auto res_col = ColumnString::create();
+ ColumnString::Chars & res_data = res_col->getChars();
+ ColumnString::Offsets & res_offsets = res_col->getOffsets();
+ res_offsets.resize_exact(input_rows_count);
+
+ if (src_const_col)
+ {
+ res_data.reserve_exact(src_const_str.size() *
input_rows_count);
+ for (size_t row = 0; row < input_rows_count; ++row)
+ {
+ StringRef trim_str_ref = trim_col->getDataAt(row);
+ std::unordered_set<char> trim_set(trim_str_ref.data,
trim_str_ref.data + trim_str_ref.size);
Review Comment:
thanks, very useful tips
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]