This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 090a3423401 [Improvement](function) optimization for substr with ascii
string (#29799)
090a3423401 is described below
commit 090a34234012eeb751cf84d71818e6139f94e356
Author: Pxl <[email protected]>
AuthorDate: Thu Jan 11 11:31:35 2024 +0800
[Improvement](function) optimization for substr with ascii string (#29799)
---
be/src/util/simd/vstring_function.h | 57 ++++++++-
be/src/vec/common/pod_array.h | 8 ++
be/src/vec/functions/function_string.h | 226 ++++++++++++++++-----------------
3 files changed, 173 insertions(+), 118 deletions(-)
diff --git a/be/src/util/simd/vstring_function.h
b/be/src/util/simd/vstring_function.h
index 306a18a6d66..dac964b1b94 100644
--- a/be/src/util/simd/vstring_function.h
+++ b/be/src/util/simd/vstring_function.h
@@ -44,6 +44,54 @@ inline uint8_t get_utf8_byte_length(uint8_t character) {
return UTF8_BYTE_LENGTH[character];
}
+// copy from
https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h
+// The function returns true (1) if all chars passed in src are
+// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
+inline bool validate_ascii_fast(const char* src, size_t len) {
+ size_t i = 0;
+ __m128i has_error = _mm_setzero_si128();
+ if (len >= 16) {
+ for (; i <= len - 16; i += 16) {
+ __m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i));
+ has_error = _mm_or_si128(has_error, current_bytes);
+ }
+ }
+ int error_mask = _mm_movemask_epi8(has_error);
+
+ char tail_has_error = 0;
+ for (; i < len; i++) {
+ tail_has_error |= src[i];
+ }
+ error_mask |= (tail_has_error & 0x80);
+
+ return !error_mask;
+}
+
+#ifdef __AVX2__
+#include <x86intrin.h>
+// The function returns true (1) if all chars passed in src are
+// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
+inline bool validate_ascii_fast_avx(const char* src, size_t len) {
+ size_t i = 0;
+ __m256i has_error = _mm256_setzero_si256();
+ if (len >= 32) {
+ for (; i <= len - 32; i += 32) {
+ __m256i current_bytes = _mm256_loadu_si256((const __m256i*)(src +
i));
+ has_error = _mm256_or_si256(has_error, current_bytes);
+ }
+ }
+ int error_mask = _mm256_movemask_epi8(has_error);
+
+ char tail_has_error = 0;
+ for (; i < len; i++) {
+ tail_has_error |= src[i];
+ }
+ error_mask |= (tail_has_error & 0x80);
+
+ return !error_mask;
+}
+#endif
+
namespace simd {
class VStringFunctions {
@@ -219,11 +267,10 @@ public:
// Gcc will do auto simd in this function
static bool is_ascii(const StringRef& str) {
- char or_code = 0;
- for (size_t i = 0; i < str.size; i++) {
- or_code |= str.data[i];
- }
- return !(or_code & 0x80);
+#ifdef __AVX2__
+ return validate_ascii_fast_avx(str.data, str.size);
+#endif
+ return validate_ascii_fast(str.data, str.size);
}
static void reverse(const StringRef& str, StringRef dst) {
diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h
index 91642aebd47..0d7cde6503d 100644
--- a/be/src/vec/common/pod_array.h
+++ b/be/src/vec/common/pod_array.h
@@ -480,6 +480,14 @@ public:
this->c_end += bytes_to_copy;
}
+ template <typename It1, typename It2>
+ void insert_assume_reserved_and_allow_overflow(It1 from_begin, It2
from_end) {
+ size_t bytes_to_copy = this->byte_size(from_end - from_begin);
+ memcpy_small_allow_read_write_overflow15(
+ this->c_end, reinterpret_cast<const void*>(&*from_begin),
bytes_to_copy);
+ this->c_end += bytes_to_copy;
+ }
+
void swap(PODArray& rhs) {
#ifndef NDEBUG
this->unprotect();
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index 2b1d5891a66..4363d040cd5 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -17,7 +17,6 @@
#pragma once
-#include <glog/logging.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
@@ -52,6 +51,7 @@
#include "vec/common/memcpy_small.h"
#include "vec/common/pod_array.h"
#include "vec/common/pod_array_fwd.h"
+#include "vec/common/string_utils/string_utils.h"
#include "vec/common/typeid_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
@@ -120,6 +120,14 @@ struct StringOP {
chars.insert(string_value.data(), string_value.data() +
string_value.size());
offsets[index] = chars.size();
}
+
+ static void push_value_string_reserved_and_allow_overflow(const
std::string_view& string_value,
+ int index,
ColumnString::Chars& chars,
+
ColumnString::Offsets& offsets) {
+ chars.insert_assume_reserved_and_allow_overflow(string_value.data(),
+ string_value.data() +
string_value.size());
+ offsets[index] = chars.size();
+ }
};
struct SubstringUtil {
@@ -147,30 +155,37 @@ struct SubstringUtil {
check_set_nullable(argument_columns[i], null_map, col_const[i]);
}
- auto specific_str_column = assert_cast<const
ColumnString*>(argument_columns[0].get());
- auto specific_start_column =
+ const auto* specific_str_column =
+ assert_cast<const ColumnString*>(argument_columns[0].get());
+ const auto* specific_start_column =
assert_cast<const
ColumnVector<Int32>*>(argument_columns[1].get());
- auto specific_len_column =
+ const auto* specific_len_column =
assert_cast<const
ColumnVector<Int32>*>(argument_columns[2].get());
- if (col_const[1] && col_const[2]) {
- vectors<true>(specific_str_column->get_chars(),
specific_str_column->get_offsets(),
- specific_start_column->get_data(),
specific_len_column->get_data(),
- null_map->get_data(), res->get_chars(),
res->get_offsets());
- } else {
- vectors<false>(specific_str_column->get_chars(),
specific_str_column->get_offsets(),
- specific_start_column->get_data(),
specific_len_column->get_data(),
- null_map->get_data(), res->get_chars(),
res->get_offsets());
+
+ auto vectors = vectors_utf8<false>;
+ bool is_ascii = simd::VStringFunctions::is_ascii(
+ {specific_str_column->get_chars().data(),
specific_str_column->get_chars().size()});
+ if (col_const[1] && col_const[2] && is_ascii) {
+ vectors = vectors_ascii<true>;
+ } else if (col_const[1] && col_const[2]) {
+ vectors = vectors_utf8<true>;
+ } else if (is_ascii) {
+ vectors = vectors_ascii<false>;
}
+ vectors(specific_str_column->get_chars(),
specific_str_column->get_offsets(),
+ specific_start_column->get_data(),
specific_len_column->get_data(),
+ null_map->get_data(), res->get_chars(), res->get_offsets());
+
block.get_by_position(result).column =
ColumnNullable::create(std::move(res), std::move(null_map));
}
private:
- template <bool Const>
- static void vectors(const ColumnString::Chars& chars, const
ColumnString::Offsets& offsets,
- const PaddedPODArray<Int32>& start, const
PaddedPODArray<Int32>& len,
- NullMap& null_map, ColumnString::Chars& res_chars,
- ColumnString::Offsets& res_offsets) {
+ template <bool is_const>
+ static void vectors_utf8(const ColumnString::Chars& chars, const
ColumnString::Offsets& offsets,
+ const PaddedPODArray<Int32>& start, const
PaddedPODArray<Int32>& len,
+ NullMap& null_map, ColumnString::Chars& res_chars,
+ ColumnString::Offsets& res_offsets) {
size_t size = offsets.size();
res_offsets.resize(size);
res_chars.reserve(chars.size());
@@ -179,119 +194,104 @@ private:
PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
PMR::vector<size_t> index {&pool};
- auto* __restrict data_ptr = chars.data();
- auto* __restrict offset_ptr = offsets.data();
-
- if constexpr (Const) {
- const auto start_value = start[0];
- const auto len_value = len[0];
- if (start_value == 0 || len_value <= 0) {
+ if constexpr (is_const) {
+ if (start[0] == 0 || len[0] <= 0) {
for (size_t i = 0; i < size; ++i) {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
- } else {
- for (size_t i = 0; i < size; ++i) {
- const int str_size = offset_ptr[i] - offset_ptr[i - 1];
- const uint8_t* raw_str = data_ptr + offset_ptr[i - 1];
- // return empty string if start > src.length
- if (start_value > str_size || start_value < -str_size ||
str_size == 0) {
- StringOP::push_empty_string(i, res_chars, res_offsets);
- continue;
- }
- // reference to string_function.cpp: substring
- size_t byte_pos = 0;
- index.clear();
- for (size_t j = 0, char_size = 0;
- j < str_size &&
- (start_value <= 0 || index.size() <= start_value +
len_value);
- j += char_size) {
- char_size = UTF8_BYTE_LENGTH[(unsigned
char)(raw_str)[j]];
- index.push_back(j);
- }
+ return;
+ }
+ }
- int fixed_pos = start_value;
- if (fixed_pos < 0) {
- fixed_pos = str_size + fixed_pos + 1;
- } else if (fixed_pos > index.size()) {
- StringOP::push_null_string(i, res_chars, res_offsets,
null_map);
- continue;
- }
+ for (size_t i = 0; i < size; ++i) {
+ int str_size = offsets[i] - offsets[i - 1];
+ const char* str_data = (char*)chars.data() + offsets[i - 1];
+ int start_value = is_const ? start[0] : start[i];
+ int len_value = is_const ? len[0] : len[i];
- byte_pos = index[fixed_pos - 1];
- int fixed_len = str_size - byte_pos;
- if (fixed_pos + len_value <= index.size()) {
- fixed_len = index[fixed_pos + len_value - 1] -
byte_pos;
- }
+ // return empty string if start > src.length
+ if (start_value > str_size || str_size == 0 || start_value == 0 ||
len_value <= 0) {
+ StringOP::push_empty_string(i, res_chars, res_offsets);
+ continue;
+ }
- if (byte_pos <= str_size && fixed_len > 0) {
- // return StringRef(str.data + byte_pos, fixed_len);
- StringOP::push_value_string(
- std::string_view {reinterpret_cast<const
char*>(raw_str + byte_pos),
- (size_t)fixed_len},
- i, res_chars, res_offsets);
- } else {
- StringOP::push_empty_string(i, res_chars, res_offsets);
- }
+ size_t byte_pos = 0;
+ index.clear();
+ for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
+ char_size = get_utf8_byte_length(str_data[j]);
+ index.push_back(j);
+ if (start_value > 0 && index.size() > start_value + len_value)
{
+ break;
}
}
- } else {
- PMR::vector<std::pair<const unsigned char*, int>> strs(&pool);
- strs.resize(size);
- for (int i = 0; i < size; ++i) {
- strs[i].first = data_ptr + offset_ptr[i - 1];
- strs[i].second = offset_ptr[i] - offset_ptr[i - 1];
+
+ int fixed_pos = start_value;
+ if (fixed_pos < -(int)index.size()) {
+ StringOP::push_empty_string(i, res_chars, res_offsets);
+ continue;
+ }
+ if (fixed_pos < 0) {
+ fixed_pos = index.size() + fixed_pos + 1;
+ }
+ if (fixed_pos > index.size()) {
+ StringOP::push_null_string(i, res_chars, res_offsets,
null_map);
+ continue;
}
- for (size_t i = 0; i < size; ++i) {
- auto [raw_str, str_size] = strs[i];
- const auto& start_value = start[i];
- const auto& len_value = len[i];
+ byte_pos = index[fixed_pos - 1];
+ size_t fixed_len = str_size - byte_pos;
+ if (fixed_pos + len_value <= index.size()) {
+ fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
+ }
- // return empty string if start > src.length
- if (start_value > str_size || str_size == 0 || start_value ==
0 || len_value <= 0) {
- StringOP::push_empty_string(i, res_chars, res_offsets);
- continue;
- }
- // reference to string_function.cpp: substring
- size_t byte_pos = 0;
- index.clear();
- for (size_t j = 0, char_size = 0; j < str_size; j +=
char_size) {
- char_size = UTF8_BYTE_LENGTH[(unsigned char)(raw_str)[j]];
- index.push_back(j);
- if (start_value > 0 && index.size() > start_value +
len_value) {
- break;
- }
- }
+ if (byte_pos <= str_size && fixed_len > 0) {
+ StringOP::push_value_string_reserved_and_allow_overflow(
+ {str_data + byte_pos, fixed_len}, i, res_chars,
res_offsets);
+ } else {
+ StringOP::push_empty_string(i, res_chars, res_offsets);
+ }
+ }
+ }
- int fixed_pos = start_value;
- if (fixed_pos < -(int)index.size()) {
+ template <bool is_const>
+ static void vectors_ascii(const ColumnString::Chars& chars,
+ const ColumnString::Offsets& offsets,
+ const PaddedPODArray<Int32>& start, const
PaddedPODArray<Int32>& len,
+ NullMap& null_map, ColumnString::Chars&
res_chars,
+ ColumnString::Offsets& res_offsets) {
+ size_t size = offsets.size();
+ res_offsets.resize(size);
+
+ if constexpr (is_const) {
+ if (start[0] == 0 || len[0] <= 0) {
+ for (size_t i = 0; i < size; ++i) {
StringOP::push_empty_string(i, res_chars, res_offsets);
- continue;
- }
- if (fixed_pos < 0) {
- fixed_pos = index.size() + fixed_pos + 1;
- }
- if (fixed_pos > index.size()) {
- StringOP::push_null_string(i, res_chars, res_offsets,
null_map);
- continue;
}
+ return;
+ }
+ res_chars.reserve(std::min(chars.size(), len[0] * size));
+ } else {
+ res_chars.reserve(chars.size());
+ }
- byte_pos = index[fixed_pos - 1];
- int fixed_len = str_size - byte_pos;
- if (fixed_pos + len_value <= index.size()) {
- fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
- }
+ for (size_t i = 0; i < size; ++i) {
+ int str_size = offsets[i] - offsets[i - 1];
+ const char* str_data = (char*)chars.data() + offsets[i - 1];
- if (byte_pos <= str_size && fixed_len > 0) {
- // return StringRef(str.data + byte_pos, fixed_len);
- StringOP::push_value_string(
- std::string_view {reinterpret_cast<const
char*>(raw_str + byte_pos),
- (size_t)fixed_len},
- i, res_chars, res_offsets);
- } else {
- StringOP::push_empty_string(i, res_chars, res_offsets);
- }
+ int start_value = is_const ? start[0] : start[i];
+ int len_value = is_const ? len[0] : len[i];
+
+ if (start_value > str_size || start_value < -str_size || str_size
== 0) {
+ StringOP::push_empty_string(i, res_chars, res_offsets);
+ continue;
+ }
+ int fixed_pos = start_value - 1;
+ if (fixed_pos < 0) {
+ fixed_pos = str_size + fixed_pos + 1;
}
+ size_t fixed_len = std::min(str_size - fixed_pos, len_value);
+ StringOP::push_value_string_reserved_and_allow_overflow(
+ {str_data + fixed_pos, fixed_len}, i, res_chars,
res_offsets);
}
}
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]