This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ad44f2817f2 [opt](exec) change string ser der way to improve
performance (#30693)
ad44f2817f2 is described below
commit ad44f2817f26c1f12382f883aeb6119e1fe40ced
Author: HappenLee <[email protected]>
AuthorDate: Thu Feb 1 23:07:07 2024 +0800
[opt](exec) change string ser der way to improve performance (#30693)
---
be/src/vec/data_types/data_type_string.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/be/src/vec/data_types/data_type_string.cpp
b/be/src/vec/data_types/data_type_string.cpp
index 16440675b24..644bfac4c31 100644
--- a/be/src/vec/data_types/data_type_string.cpp
+++ b/be/src/vec/data_types/data_type_string.cpp
@@ -94,8 +94,7 @@ int64_t
DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column,
if (auto bytes = data_column.get_chars().size(); bytes <=
SERIALIZED_MEM_SIZE_LIMIT) {
size += bytes;
} else {
- size += sizeof(size_t) +
- std::max(bytes,
streamvbyte_max_compressedbytes(upper_int32(bytes)));
+ size += sizeof(size_t) + std::max(bytes,
(size_t)LZ4_compressBound(bytes));
}
return size;
} else {
@@ -142,9 +141,9 @@ char* DataTypeString::serialize(const IColumn& column,
char* buf, int be_exec_ve
buf += value_len;
return buf;
}
- auto encode_size = streamvbyte_encode(
- reinterpret_cast<const
uint32_t*>(data_column.get_chars().data()),
- upper_int32(value_len), (uint8_t*)(buf + sizeof(size_t)));
+ auto encode_size =
+ LZ4_compress_fast(data_column.get_chars().raw_data(), (buf +
sizeof(size_t)),
+ value_len, LZ4_compressBound(value_len), 1);
*reinterpret_cast<size_t*>(buf) = encode_size;
buf += (sizeof(size_t) + encode_size);
return buf;
@@ -195,15 +194,14 @@ const char* DataTypeString::deserialize(const char* buf,
IColumn* column,
buf += sizeof(uint64_t);
data.resize(value_len);
- // offsets
+ // values
if (value_len <= SERIALIZED_MEM_SIZE_LIMIT) {
memcpy(data.data(), buf, value_len);
buf += value_len;
} else {
size_t encode_size = *reinterpret_cast<const size_t*>(buf);
buf += sizeof(size_t);
- streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(data.data()),
- upper_int32(value_len));
+ LZ4_decompress_safe(buf, reinterpret_cast<char*>(data.data()),
encode_size, value_len);
buf += encode_size;
}
return buf;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]