This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d913ca5731 [Opt](vectorized) Speed up bucket shuffle join hash compute
(#12407)
d913ca5731 is described below
commit d913ca573116139b4cb95f6f649ce2a4b3870788
Author: HappenLee <[email protected]>
AuthorDate: Tue Sep 13 20:19:22 2022 +0800
[Opt](vectorized) Speed up bucket shuffle join hash compute (#12407)
* [Opt](vectorized) Speed up bucket shuffle join hash compute
---
be/src/exec/tablet_info.cpp | 12 ++---
be/src/runtime/data_stream_sender.cpp | 5 +-
be/src/runtime/define_primitive_type.h | 58 ++++++++++++++++++++++
be/src/runtime/primitive_type.h | 38 +-------------
be/src/runtime/raw_value.h | 27 ++--------
be/src/util/hash_util.hpp | 7 +++
be/src/vec/columns/column.h | 21 ++++++++
be/src/vec/columns/column_const.cpp | 18 +++++++
be/src/vec/columns/column_const.h | 3 ++
be/src/vec/columns/column_decimal.cpp | 33 ++++++++++++
be/src/vec/columns/column_decimal.h | 2 +
be/src/vec/columns/column_nullable.cpp | 19 +++++++
be/src/vec/columns/column_nullable.h | 2 +
be/src/vec/columns/column_string.cpp | 20 ++++++++
be/src/vec/columns/column_string.h | 3 ++
be/src/vec/columns/column_vector.cpp | 33 +++++++++++-
be/src/vec/columns/column_vector.h | 3 ++
be/src/vec/sink/vdata_stream_sender.cpp | 18 ++-----
.../org/apache/doris/analysis/DateLiteral.java | 28 ++++++++---
.../org/apache/doris/analysis/DecimalLiteral.java | 16 ++++--
.../org/apache/doris/analysis/LargeIntLiteral.java | 6 +++
21 files changed, 274 insertions(+), 98 deletions(-)
diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index c89e03f0be..08e77d9565 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -220,10 +220,7 @@ Status OlapTablePartitionParam::init() {
if (slot != nullptr) {
hash_val = RawValue::zlib_crc32(slot, slot_desc->type(),
hash_val);
} else {
- //nullptr is treat as 0 when hash
- static const int INT_VALUE = 0;
- static const TypeDescriptor INT_TYPE(TYPE_INT);
- hash_val = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE,
hash_val);
+ hash_val = HashUtil::zlib_crc_hash_null(hash_val);
}
}
return hash_val % num_buckets;
@@ -492,16 +489,13 @@ Status VOlapTablePartitionParam::init() {
uint32_t hash_val = 0;
for (int i = 0; i < _distributed_slot_locs.size(); ++i) {
auto slot_desc = _slots[_distributed_slot_locs[i]];
- auto column =
key->first->get_by_position(_distributed_slot_locs[i]).column;
+ auto& column =
key->first->get_by_position(_distributed_slot_locs[i]).column;
auto val = column->get_data_at(key->second);
if (val.data != nullptr) {
hash_val = RawValue::zlib_crc32(val.data, val.size,
slot_desc->type().type,
hash_val);
} else {
- // NULL is treat as 0 when hash
- static const int INT_VALUE = 0;
- static const TypeDescriptor INT_TYPE(TYPE_INT);
- hash_val = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE,
hash_val);
+ hash_val = HashUtil::zlib_crc_hash_null(hash_val);
}
}
return hash_val % num_buckets;
diff --git a/be/src/runtime/data_stream_sender.cpp
b/be/src/runtime/data_stream_sender.cpp
index 13b2308d7b..38d5aa1d51 100644
--- a/be/src/runtime/data_stream_sender.cpp
+++ b/be/src/runtime/data_stream_sender.cpp
@@ -626,10 +626,7 @@ Status DataStreamSender::process_distribute(RuntimeState*
state, TupleRow* row,
if (partition_val != nullptr) {
hash_val = RawValue::zlib_crc32(partition_val,
ctx->root()->type(), hash_val);
} else {
- //nullptr is treat as 0 when hash
- static const int INT_VALUE = 0;
- static const TypeDescriptor INT_TYPE(TYPE_INT);
- hash_val = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE, hash_val);
+ hash_val = HashUtil::zlib_crc_hash_null(hash_val);
}
}
hash_val %= part->distributed_bucket();
diff --git a/be/src/runtime/define_primitive_type.h
b/be/src/runtime/define_primitive_type.h
new file mode 100644
index 0000000000..aa5e140a6e
--- /dev/null
+++ b/be/src/runtime/define_primitive_type.h
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace doris {
+enum PrimitiveType {
+ INVALID_TYPE = 0,
+ TYPE_NULL, /* 1 */
+ TYPE_BOOLEAN, /* 2 */
+ TYPE_TINYINT, /* 3 */
+ TYPE_SMALLINT, /* 4 */
+ TYPE_INT, /* 5 */
+ TYPE_BIGINT, /* 6 */
+ TYPE_LARGEINT, /* 7 */
+ TYPE_FLOAT, /* 8 */
+ TYPE_DOUBLE, /* 9 */
+ TYPE_VARCHAR, /* 10 */
+ TYPE_DATE, /* 11 */
+ TYPE_DATETIME, /* 12 */
+ TYPE_BINARY,
+ /* 13 */ // Not implemented
+ TYPE_DECIMAL [[deprecated]], /* 14 */
+ TYPE_CHAR, /* 15 */
+
+ TYPE_STRUCT, /* 16 */
+ TYPE_ARRAY, /* 17 */
+ TYPE_MAP, /* 18 */
+ TYPE_HLL, /* 19 */
+ TYPE_DECIMALV2, /* 20 */
+
+ TYPE_TIME, /* 21 */
+ TYPE_OBJECT, /* 22 */
+ TYPE_STRING, /* 23 */
+ TYPE_QUANTILE_STATE, /* 24 */
+ TYPE_DATEV2, /* 25 */
+ TYPE_DATETIMEV2, /* 26 */
+ TYPE_TIMEV2, /* 27 */
+ TYPE_DECIMAL32, /* 28 */
+ TYPE_DECIMAL64, /* 29 */
+ TYPE_DECIMAL128, /* 30 */
+};
+
+}
diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index 7fc6a728de..45efe0336c 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -19,6 +19,7 @@
#include <string>
+#include "runtime/define_primitive_type.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/columns_number.h"
#include "vec/core/types.h"
@@ -33,43 +34,6 @@ class DateTimeValue;
class DecimalV2Value;
struct StringValue;
-enum PrimitiveType {
- INVALID_TYPE = 0,
- TYPE_NULL, /* 1 */
- TYPE_BOOLEAN, /* 2 */
- TYPE_TINYINT, /* 3 */
- TYPE_SMALLINT, /* 4 */
- TYPE_INT, /* 5 */
- TYPE_BIGINT, /* 6 */
- TYPE_LARGEINT, /* 7 */
- TYPE_FLOAT, /* 8 */
- TYPE_DOUBLE, /* 9 */
- TYPE_VARCHAR, /* 10 */
- TYPE_DATE, /* 11 */
- TYPE_DATETIME, /* 12 */
- TYPE_BINARY,
- /* 13 */ // Not implemented
- TYPE_DECIMAL [[deprecated]], /* 14 */
- TYPE_CHAR, /* 15 */
-
- TYPE_STRUCT, /* 16 */
- TYPE_ARRAY, /* 17 */
- TYPE_MAP, /* 18 */
- TYPE_HLL, /* 19 */
- TYPE_DECIMALV2, /* 20 */
-
- TYPE_TIME, /* 21 */
- TYPE_OBJECT, /* 22 */
- TYPE_STRING, /* 23 */
- TYPE_QUANTILE_STATE, /* 24 */
- TYPE_DATEV2, /* 25 */
- TYPE_DATETIMEV2, /* 26 */
- TYPE_TIMEV2, /* 27 */
- TYPE_DECIMAL32, /* 28 */
- TYPE_DECIMAL64, /* 29 */
- TYPE_DECIMAL128, /* 30 */
-};
-
PrimitiveType convert_type_to_primitive(FunctionContext::Type type);
bool is_enumeration_type(PrimitiveType type);
diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h
index 7c5f990061..990e8b9015 100644
--- a/be/src/runtime/raw_value.h
+++ b/be/src/runtime/raw_value.h
@@ -442,10 +442,15 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const
TypeDescriptor& type,
case TYPE_SMALLINT:
return HashUtil::zlib_crc_hash(v, 2, seed);
case TYPE_INT:
+ case TYPE_DATEV2:
+ case TYPE_DECIMAL32:
return HashUtil::zlib_crc_hash(v, 4, seed);
case TYPE_BIGINT:
+ case TYPE_DATETIMEV2:
+ case TYPE_DECIMAL64:
return HashUtil::zlib_crc_hash(v, 8, seed);
case TYPE_LARGEINT:
+ case TYPE_DECIMAL128:
return HashUtil::zlib_crc_hash(v, 16, seed);
case TYPE_FLOAT:
return HashUtil::zlib_crc_hash(v, 4, seed);
@@ -458,21 +463,6 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const
TypeDescriptor& type,
int len = date_val->to_buffer(buf);
return HashUtil::zlib_crc_hash(buf, len, seed);
}
- case TYPE_DATEV2: {
- const vectorized::DateV2Value<doris::vectorized::DateV2ValueType>*
date_v2_val =
- (const
vectorized::DateV2Value<doris::vectorized::DateV2ValueType>*)v;
- char buf[64];
- int len = date_v2_val->to_buffer(buf);
- return HashUtil::zlib_crc_hash(buf, len, seed);
- }
-
- case TYPE_DATETIMEV2: {
- const vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>*
date_v2_val =
- (const
vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>*)v;
- char buf[64];
- int len = date_v2_val->to_buffer(buf);
- return HashUtil::zlib_crc_hash(buf, len, seed);
- }
case TYPE_DECIMALV2: {
const DecimalV2Value* dec_val = (const DecimalV2Value*)v;
@@ -481,13 +471,6 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const
TypeDescriptor& type,
seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), seed);
return HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed);
}
-
- case TYPE_DECIMAL32:
- return HashUtil::zlib_crc_hash(v, 4, seed);
- case TYPE_DECIMAL64:
- return HashUtil::zlib_crc_hash(v, 8, seed);
- case TYPE_DECIMAL128:
- return HashUtil::zlib_crc_hash(v, 16, seed);
default:
DCHECK(false) << "invalid type: " << type;
return 0;
diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp
index 3a7ac11704..0ebcc6e11b 100644
--- a/be/src/util/hash_util.hpp
+++ b/be/src/util/hash_util.hpp
@@ -47,6 +47,13 @@ public:
static uint32_t zlib_crc_hash(const void* data, int32_t bytes, uint32_t
hash) {
return crc32(hash, (const unsigned char*)data, bytes);
}
+
+ static uint32_t zlib_crc_hash_null(uint32_t hash) {
+ // null is treat as 0 when hash
+ static const int INT_VALUE = 0;
+ return crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
+ }
+
#if defined(__SSE4_2__) || defined(__aarch64__)
// Compute the Crc32 hash for data using SSE4 instructions. The input
hash parameter is
// the current hash/seed value.
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 71d737274c..6d085fa7c7 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -20,6 +20,7 @@
#pragma once
+#include "runtime/define_primitive_type.h"
#include "vec/common/cow.h"
#include "vec/common/exception.h"
#include "vec/common/pod_array_fwd.h"
@@ -43,6 +44,18 @@ class SipHash;
} \
}
+#define DO_CRC_HASHES_FUNCTION_COLUMN_IMPL()
\
+ if (null_data == nullptr) {
\
+ for (size_t i = 0; i < s; i++) {
\
+ hashes[i] = HashUtil::zlib_crc_hash(&data[i], sizeof(T),
hashes[i]); \
+ }
\
+ } else {
\
+ for (size_t i = 0; i < s; i++) {
\
+ if (null_data[i] == 0)
\
+ hashes[i] = HashUtil::zlib_crc_hash(&data[i], sizeof(T),
hashes[i]); \
+ }
\
+ }
+
namespace doris::vectorized {
class Arena;
@@ -322,6 +335,14 @@ public:
LOG(FATAL) << "update_hashes_with_value not supported";
};
+ /// Update state of crc32 hash function with value of n elements to avoid
the virtual function call
+ /// null_data to mark whether need to do hash compute, null_data == nullptr
+ /// means all element need to do hash function, else only *null_data != 0
need to do hash func
+ virtual void update_crcs_with_value(std::vector<uint32_t>& hash,
PrimitiveType type,
+ const uint8_t* __restrict null_data =
nullptr) const {
+ LOG(FATAL) << "update_crcs_with_value not supported";
+ };
+
/** Removes elements that don't match the filter.
* Is used in WHERE and HAVING operations.
* If result_size_hint > 0, then makes advance reserve(result_size_hint)
for the result column;
diff --git a/be/src/vec/columns/column_const.cpp
b/be/src/vec/columns/column_const.cpp
index 946074cf61..4bd72ccf8d 100644
--- a/be/src/vec/columns/column_const.cpp
+++ b/be/src/vec/columns/column_const.cpp
@@ -20,6 +20,7 @@
#include "vec/columns/column_const.h"
+#include "runtime/raw_value.h"
#include "vec/columns/columns_common.h"
#include "vec/common/pod_array.h"
#include "vec/common/sip_hash.h"
@@ -103,6 +104,23 @@ void
ColumnConst::update_hashes_with_value(std::vector<SipHash>& hashes,
}
}
+void ColumnConst::update_crcs_with_value(std::vector<uint32_t>& hashes,
doris::PrimitiveType type,
+ const uint8_t* __restrict null_data)
const {
+ DCHECK(null_data == nullptr);
+ DCHECK(hashes.size() == size());
+ auto real_data = data->get_data_at(0);
+ if (real_data.data == nullptr) {
+ for (int i = 0; i < hashes.size(); ++i) {
+ hashes[i] = HashUtil::zlib_crc_hash_null(hashes[i]);
+ }
+ } else {
+ for (int i = 0; i < hashes.size(); ++i) {
+ hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size,
TypeDescriptor {type},
+ hashes[i]);
+ }
+ }
+}
+
MutableColumns ColumnConst::scatter(ColumnIndex num_columns, const Selector&
selector) const {
if (s != selector.size()) {
LOG(FATAL) << fmt::format("Size of selector ({}) doesn't match size of
column ({})",
diff --git a/be/src/vec/columns/column_const.h
b/be/src/vec/columns/column_const.h
index be7f56ab23..422316075b 100644
--- a/be/src/vec/columns/column_const.h
+++ b/be/src/vec/columns/column_const.h
@@ -135,6 +135,9 @@ public:
void update_hashes_with_value(std::vector<SipHash>& hashes,
const uint8_t* __restrict null_data) const
override;
+ void update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType
type,
+ const uint8_t* __restrict null_data) const
override;
+
ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const
override;
ColumnPtr replicate(const Offsets& offsets) const override;
void replicate(const uint32_t* counts, size_t target_size, IColumn&
column) const override;
diff --git a/be/src/vec/columns/column_decimal.cpp
b/be/src/vec/columns/column_decimal.cpp
index ea904c8c30..9b6470a371 100644
--- a/be/src/vec/columns/column_decimal.cpp
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -127,6 +127,39 @@ void
ColumnDecimal<T>::update_hashes_with_value(std::vector<SipHash>& hashes,
SIP_HASHES_FUNCTION_COLUMN_IMPL();
}
+template <typename T>
+void ColumnDecimal<T>::update_crcs_with_value(std::vector<uint32_t>& hashes,
PrimitiveType type,
+ const uint8_t* __restrict
null_data) const {
+ auto s = hashes.size();
+ DCHECK(s == size());
+
+ if constexpr (!std::is_same_v<T, Decimal128>) {
+ DO_CRC_HASHES_FUNCTION_COLUMN_IMPL()
+ } else {
+ if (type == TYPE_DECIMALV2) {
+ auto decimalv2_do_crc = [&](size_t i) {
+ const DecimalV2Value& dec_val = (const DecimalV2Value&)data[i];
+ int64_t int_val = dec_val.int_value();
+ int32_t frac_val = dec_val.frac_value();
+ hashes[i] = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val),
hashes[i]);
+ hashes[i] = HashUtil::zlib_crc_hash(&frac_val,
sizeof(frac_val), hashes[i]);
+ };
+
+ if (null_data == nullptr) {
+ for (size_t i = 0; i < s; i++) {
+ decimalv2_do_crc(i);
+ }
+ } else {
+ for (size_t i = 0; i < s; i++) {
+ if (null_data[i] == 0) decimalv2_do_crc(i);
+ }
+ }
+ } else {
+ DO_CRC_HASHES_FUNCTION_COLUMN_IMPL()
+ }
+ }
+}
+
template <typename T>
void ColumnDecimal<T>::get_permutation(bool reverse, size_t limit, int,
IColumn::Permutation& res) const {
diff --git a/be/src/vec/columns/column_decimal.h
b/be/src/vec/columns/column_decimal.h
index 81e037278c..41dba1827d 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -156,6 +156,8 @@ public:
void update_hash_with_value(size_t n, SipHash& hash) const override;
void update_hashes_with_value(std::vector<SipHash>& hash,
const uint8_t* __restrict null_data) const
override;
+ void update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType
type,
+ const uint8_t* __restrict null_data) const
override;
int compare_at(size_t n, size_t m, const IColumn& rhs_, int
nan_direction_hint) const override;
void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
diff --git a/be/src/vec/columns/column_nullable.cpp
b/be/src/vec/columns/column_nullable.cpp
index 4f18163479..29b1887421 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -68,6 +68,25 @@ void
ColumnNullable::update_hashes_with_value(std::vector<SipHash>& hashes,
}
}
+void ColumnNullable::update_crcs_with_value(std::vector<uint32_t>& hashes,
+ doris::PrimitiveType type,
+ const uint8_t* __restrict
null_data) const {
+ DCHECK(null_data == nullptr);
+ auto s = hashes.size();
+ DCHECK(s == size());
+ auto* __restrict real_null_data = assert_cast<const
ColumnUInt8&>(*null_map).get_data().data();
+ if (!has_null()) {
+ nested_column->update_crcs_with_value(hashes, type, nullptr);
+ } else {
+ for (int i = 0; i < s; ++i) {
+ if (real_null_data[i] != 0) {
+ hashes[i] = HashUtil::zlib_crc_hash_null(hashes[i]);
+ }
+ }
+ nested_column->update_crcs_with_value(hashes, type, real_null_data);
+ }
+}
+
MutableColumnPtr ColumnNullable::clone_resized(size_t new_size) const {
MutableColumnPtr new_nested_col =
get_nested_column().clone_resized(new_size);
auto new_null_map = ColumnUInt8::create();
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index 2a8bf52dd2..523ff337ae 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -161,6 +161,8 @@ public:
void update_hash_with_value(size_t n, SipHash& hash) const override;
void update_hashes_with_value(std::vector<SipHash>& hashes,
const uint8_t* __restrict null_data) const
override;
+ void update_crcs_with_value(std::vector<uint32_t>& hash, PrimitiveType
type,
+ const uint8_t* __restrict null_data) const
override;
void get_extremes(Field& min, Field& max) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector& selector)
const override {
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index 18f0d74d23..20f0d3c534 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -104,6 +104,26 @@ void ColumnString::insert_indices_from(const IColumn& src,
const int* indices_be
}
}
+void ColumnString::update_crcs_with_value(std::vector<uint32_t>& hashes,
doris::PrimitiveType type,
+ const uint8_t* __restrict null_data)
const {
+ auto s = hashes.size();
+ DCHECK(s == size());
+
+ if (null_data == nullptr) {
+ for (size_t i = 0; i < s; i++) {
+ auto data_ref = get_data_at(i);
+ hashes[i] = HashUtil::zlib_crc_hash(data_ref.data, data_ref.size,
hashes[i]);
+ }
+ } else {
+ for (size_t i = 0; i < s; i++) {
+ if (null_data[i] == 0) {
+ auto data_ref = get_data_at(i);
+ hashes[i] = HashUtil::zlib_crc_hash(data_ref.data,
data_ref.size, hashes[i]);
+ }
+ }
+ }
+}
+
ColumnPtr ColumnString::filter(const Filter& filt, ssize_t result_size_hint)
const {
if (offsets.size() == 0) return ColumnString::create();
diff --git a/be/src/vec/columns/column_string.h
b/be/src/vec/columns/column_string.h
index 8c504c6d08..ee8e6cfdd3 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -256,6 +256,9 @@ public:
SIP_HASHES_FUNCTION_COLUMN_IMPL();
}
+ void update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType
type,
+ const uint8_t* __restrict null_data) const
override;
+
void insert_range_from(const IColumn& src, size_t start, size_t length)
override;
void insert_indices_from(const IColumn& src, const int* indices_begin,
diff --git a/be/src/vec/columns/column_vector.cpp
b/be/src/vec/columns/column_vector.cpp
index d60f8816a1..1678e5d7f4 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -26,7 +26,6 @@
#include <cmath>
#include <cstring>
-#include "runtime/datetime_value.h"
#include "util/simd/bits.h"
#include "vec/common/arena.h"
#include "vec/common/assert_cast.h"
@@ -119,6 +118,38 @@ void ColumnVector<T>::sort_column(const ColumnSorter*
sorter, EqualFlags& flags,
sorter->template sort_column(static_cast<const Self&>(*this), flags,
perms, range, last_column);
}
+template <typename T>
+void ColumnVector<T>::update_crcs_with_value(std::vector<uint32_t>& hashes,
PrimitiveType type,
+ const uint8_t* __restrict
null_data) const {
+ auto s = hashes.size();
+ DCHECK(s == size());
+
+ if constexpr (!std::is_same_v<T, Int64>) {
+ DO_CRC_HASHES_FUNCTION_COLUMN_IMPL()
+ } else {
+ if (type == TYPE_DATE || type == TYPE_DATETIME) {
+ char buf[64];
+ auto date_convert_do_crc = [&](size_t i) {
+ const DateTimeValue& date_val = (const DateTimeValue&)data[i];
+ auto len = date_val.to_buffer(buf);
+ hashes[i] = HashUtil::zlib_crc_hash(buf, len, hashes[i]);
+ };
+
+ if (null_data == nullptr) {
+ for (size_t i = 0; i < s; i++) {
+ date_convert_do_crc(i);
+ }
+ } else {
+ for (size_t i = 0; i < s; i++) {
+ if (null_data[i] == 0) date_convert_do_crc(i);
+ }
+ }
+ } else {
+ DO_CRC_HASHES_FUNCTION_COLUMN_IMPL()
+ }
+ }
+}
+
template <typename T>
struct ColumnVector<T>::less {
const Self& parent;
diff --git a/be/src/vec/columns/column_vector.h
b/be/src/vec/columns/column_vector.h
index 3a8ec82382..447886d08f 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -250,6 +250,9 @@ public:
void update_hashes_with_value(std::vector<SipHash>& hashes,
const uint8_t* __restrict null_data) const
override;
+ void update_crcs_with_value(std::vector<uint32_t>& hashes, PrimitiveType
type,
+ const uint8_t* __restrict null_data) const
override;
+
size_t byte_size() const override { return data.size() * sizeof(data[0]); }
size_t allocated_bytes() const override { return data.allocated_bytes(); }
diff --git a/be/src/vec/sink/vdata_stream_sender.cpp
b/be/src/vec/sink/vdata_stream_sender.cpp
index 679890a60c..03482ed3b6 100644
--- a/be/src/vec/sink/vdata_stream_sender.cpp
+++ b/be/src/vec/sink/vdata_stream_sender.cpp
@@ -536,24 +536,12 @@ Status VDataStreamSender::send(RuntimeState* state,
Block* block) {
// vectorized calculate hash val
int rows = block->rows();
// for each row, we have a hash_val
- std::vector<size_t> hash_vals(rows);
+ std::vector<uint32_t> hash_vals(rows);
// result[j] means column index, i means rows index
for (int j = 0; j < result_size; ++j) {
- auto& column = block->get_by_position(result[j]).column;
- for (int i = 0; i < rows; ++i) {
- auto val = column->get_data_at(i);
- if (val.data == nullptr) {
- // nullptr is treat as 0 when hash
- static const int INT_VALUE = 0;
- static const TypeDescriptor INT_TYPE(TYPE_INT);
- hash_vals[i] = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE,
hash_vals[i]);
- } else {
- hash_vals[i] = RawValue::zlib_crc32(val.data, val.size,
-
_partition_expr_ctxs[j]->root()->type(),
- hash_vals[i]);
- }
- }
+ block->get_by_position(result[j]).column->update_crcs_with_value(
+ hash_vals, _partition_expr_ctxs[j]->root()->type().type);
}
Block::erase_useless_column(block, column_to_keep);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
index 91a17cd20c..2bdb3e43ac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
@@ -40,6 +40,7 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.time.Year;
@@ -495,14 +496,27 @@ public class DateLiteral extends LiteralExpr {
// Date column and Datetime column's hash value is not same.
@Override
public ByteBuffer getHashValue(PrimitiveType type) {
- // This hash value should be computed using new String since precision
is introduced to datetime.
- // But it is hard to keep compatibility. So I don't change this
function here.
- String value = convertToString(type);
ByteBuffer buffer;
- try {
- buffer = ByteBuffer.wrap(value.getBytes("UTF-8"));
- } catch (Exception e) {
- throw new RuntimeException(e);
+ if (type == PrimitiveType.DATEV2) {
+ int value = (int) ((year << 9) | (month << 5) | day);
+ buffer = ByteBuffer.allocate(4);
+ buffer.order(ByteOrder.LITTLE_ENDIAN);
+ buffer.putInt(value);
+ } else if (type == PrimitiveType.DATETIMEV2) {
+ long value = (year << 50) | (month << 46) | (day << 41) | (hour
<< 36)
+ | (minute << 30) | (second << 24) | microsecond;
+ buffer = ByteBuffer.allocate(8);
+ buffer.order(ByteOrder.LITTLE_ENDIAN);
+ buffer.putLong(value);
+ } else {
+ // This hash value should be computed using new String since
precision is introduced to datetime.
+ // But it is hard to keep compatibility. So I don't change this
function here.
+ String value = convertToString(type);
+ try {
+ buffer = ByteBuffer.wrap(value.getBytes("UTF-8"));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
}
return buffer;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java
index 1c013cf5f1..53ca2ba9ca 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java
@@ -159,9 +159,6 @@ public class DecimalLiteral extends LiteralExpr {
buffer.putLong(value.longValue());
break;
case DECIMALV2:
- case DECIMAL32:
- case DECIMAL64:
- case DECIMAL128:
buffer = ByteBuffer.allocate(12);
buffer.order(ByteOrder.LITTLE_ENDIAN);
@@ -170,6 +167,19 @@ public class DecimalLiteral extends LiteralExpr {
buffer.putLong(integerValue);
buffer.putInt(fracValue);
break;
+ case DECIMAL32:
+ buffer = ByteBuffer.allocate(4);
+ buffer.order(ByteOrder.LITTLE_ENDIAN);
+ buffer.putInt(value.unscaledValue().intValue());
+ break;
+ case DECIMAL64:
+ buffer = ByteBuffer.allocate(8);
+ buffer.order(ByteOrder.LITTLE_ENDIAN);
+ buffer.putLong(value.unscaledValue().longValue());
+ break;
+ case DECIMAL128:
+ LargeIntLiteral tmp = new
LargeIntLiteral(value.unscaledValue());
+ return tmp.getHashValue(type);
default:
return super.getHashValue(type);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java
index 7cb87669a4..fde708b05b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java
@@ -62,6 +62,12 @@ public class LargeIntLiteral extends LiteralExpr {
analysisDone();
}
+ public LargeIntLiteral(BigInteger v) {
+ super();
+ type = Type.LARGEINT;
+ value = v;
+ }
+
public LargeIntLiteral(String value) throws AnalysisException {
super();
BigInteger bigInt;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]