This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9171769144b [Chore](build) add some compile check (#53637)
9171769144b is described below
commit 9171769144b219e5f002f6be625037585a7f7915
Author: Pxl <[email protected]>
AuthorDate: Tue Jul 22 19:52:38 2025 +0800
[Chore](build) add some compile check (#53637)
add some compile check
---
be/src/olap/comparison_predicate.h | 21 +++++-----
be/src/olap/hll.cpp | 9 ++--
be/src/olap/hll.h | 8 ++--
be/src/olap/in_list_predicate.h | 8 ++--
be/src/olap/like_column_predicate.h | 5 +--
be/src/olap/predicate_creator.h | 9 ++--
be/src/olap/utils.cpp | 17 ++++----
be/src/pipeline/dependency.cpp | 2 +-
be/src/pipeline/exec/aggregation_sink_operator.cpp | 16 ++++----
be/src/pipeline/exec/aggregation_sink_operator.h | 6 +--
.../pipeline/exec/aggregation_source_operator.cpp | 8 ++--
be/src/pipeline/exec/aggregation_source_operator.h | 2 +-
.../distinct_streaming_aggregation_operator.cpp | 4 +-
.../exec/distinct_streaming_aggregation_operator.h | 2 +-
be/src/pipeline/exec/hashjoin_build_sink.cpp | 8 ++--
be/src/pipeline/exec/hashjoin_build_sink.h | 4 +-
.../pipeline/exec/join/process_hash_table_probe.h | 4 +-
.../exec/join/process_hash_table_probe_impl.h | 2 +-
.../pipeline/exec/partition_sort_sink_operator.cpp | 2 +-
.../exec/partitioned_aggregation_sink_operator.h | 2 +-
.../exec/streaming_aggregation_operator.cpp | 6 +--
.../pipeline/exec/streaming_aggregation_operator.h | 2 +-
be/src/util/hash_util.hpp | 19 +++++----
be/src/util/murmur_hash3.cpp | 8 ++--
be/src/util/parse_util.cpp | 14 +++----
be/src/util/perf_counters.cpp | 18 ++++----
be/src/util/perf_counters.h | 4 +-
be/src/util/quantile_state.cpp | 22 +++++-----
be/src/vec/common/hash_table/hash_map_context.h | 48 +++++++++++-----------
be/src/vec/common/hash_table/hash_table.h | 11 +++--
be/src/vec/common/hash_table/join_hash_table.h | 12 +++---
31 files changed, 156 insertions(+), 147 deletions(-)
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 2b7f32ba1c2..f93c8844432 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -28,7 +28,7 @@
#include "vec/columns/column_dictionary.h"
namespace doris {
-
+#include "common/compile_check_begin.h"
template <PrimitiveType Type, PredicateType PT>
class ComparisonPredicateBase : public ColumnPredicate {
public:
@@ -215,7 +215,7 @@ public:
sizeof(decimal12_t));
// Datev1 using uint24_t in bloom filter
} else if constexpr (Type == PrimitiveType::TYPE_DATE) {
- uint24_t date_value(_value.to_olap_date());
+ uint24_t date_value(uint32_t(_value.to_olap_date()));
return bf->test_bytes(
const_cast<char*>(reinterpret_cast<const
char*>(&date_value)),
sizeof(uint24_t));
@@ -259,8 +259,8 @@ public:
}
template <bool is_and>
- __attribute__((flatten)) void _evaluate_vec_internal(const
vectorized::IColumn& column,
- uint16_t size, bool*
flags) const {
+ void __attribute__((flatten))
+ _evaluate_vec_internal(const vectorized::IColumn& column, uint16_t size,
bool* flags) const {
uint16_t current_evaluated_rows = 0;
uint16_t current_passed_rows = 0;
if (_can_ignore()) {
@@ -487,10 +487,9 @@ private:
}
template <bool is_nullable, bool is_and, typename TArray, typename TValue>
- __attribute__((flatten)) void _base_loop_vec(uint16_t size, bool*
__restrict bflags,
- const uint8_t* __restrict
null_map,
- const TArray* __restrict
data_array,
- const TValue& value) const {
+ void __attribute__((flatten))
+ _base_loop_vec(uint16_t size, bool* __restrict bflags, const uint8_t*
__restrict null_map,
+ const TArray* __restrict data_array, const TValue& value)
const {
//uint8_t helps compiler to generate vectorized code
auto* flags = reinterpret_cast<uint8_t*>(bflags);
if constexpr (is_and) {
@@ -605,8 +604,8 @@ private:
}
}
- __attribute__((flatten)) int32_t _find_code_from_dictionary_column(
- const vectorized::ColumnDictI32& column) const {
+ int32_t __attribute__((flatten))
+ _find_code_from_dictionary_column(const vectorized::ColumnDictI32& column)
const {
int32_t code = 0;
if (_segment_id_to_cached_code.if_contains(
column.get_rowset_segment_id(),
@@ -644,5 +643,5 @@ private:
_segment_id_to_cached_code;
T _value;
};
-
+#include "common/compile_check_end.h"
} //namespace doris
diff --git a/be/src/olap/hll.cpp b/be/src/olap/hll.cpp
index 2b5d213c952..9f8e38b839b 100644
--- a/be/src/olap/hll.cpp
+++ b/be/src/olap/hll.cpp
@@ -29,7 +29,7 @@ using std::string;
using std::stringstream;
namespace doris {
-
+#include "common/compile_check_begin.h"
HyperLogLog::HyperLogLog(const Slice& src) {
// When deserialize return false, we make this object a empty
if (!deserialize(src)) {
@@ -194,7 +194,7 @@ size_t HyperLogLog::serialize(uint8_t* dst) const {
encode_fixed32_le(ptr, num_non_zero_registers);
ptr += 4;
- for (uint32_t i = 0; i < HLL_REGISTERS_COUNT; ++i) {
+ for (uint16_t i = 0; i < HLL_REGISTERS_COUNT; ++i) {
if (_registers[i] == 0) {
continue;
}
@@ -354,7 +354,8 @@ int64_t HyperLogLog::estimate_cardinality() const {
if (estimate <= num_streams * 2.5 && num_zero_registers != 0) {
// Estimated cardinality is too low. Hll is too inaccurate here,
instead use
// linear counting.
- estimate = num_streams * log(static_cast<float>(num_streams) /
num_zero_registers);
+ estimate = num_streams *
+ log(static_cast<double>(num_streams) /
static_cast<double>(num_zero_registers));
} else if (num_streams == 16384 && estimate < 72000) {
// when Linear Couint change to HyperLogLog according to HyperLogLog
Correction,
// there are relatively large fluctuations, we fixed the problem refer
to redis.
@@ -366,5 +367,5 @@ int64_t HyperLogLog::estimate_cardinality() const {
}
return (int64_t)(estimate + 0.5);
}
-
+#include "common/compile_check_end.h"
} // namespace doris
diff --git a/be/src/olap/hll.h b/be/src/olap/hll.h
index fbf8d62a6cc..776289c7588 100644
--- a/be/src/olap/hll.h
+++ b/be/src/olap/hll.h
@@ -30,14 +30,14 @@
#include "vec/common/hash_table/phmap_fwd_decl.h"
namespace doris {
-
+#include "common/compile_check_begin.h"
struct Slice;
inline const int HLL_COLUMN_PRECISION = 14;
inline const int HLL_ZERO_COUNT_BITS = (64 - HLL_COLUMN_PRECISION);
inline const int HLL_EXPLICIT_INT64_NUM = 160;
inline const int HLL_SPARSE_THRESHOLD = 4096;
-inline const int HLL_REGISTERS_COUNT = 16 * 1024;
+inline const uint16_t HLL_REGISTERS_COUNT = 16 * 1024;
// maximum size in byte of serialized HLL: type(1) + registers (2^14)
inline const int HLL_COLUMN_DEFAULT_LEN = HLL_REGISTERS_COUNT + 1;
@@ -270,7 +270,7 @@ private:
hash_value >>= HLL_COLUMN_PRECISION;
// make sure max first_one_bit is HLL_ZERO_COUNT_BITS + 1
hash_value |= ((uint64_t)1 << HLL_ZERO_COUNT_BITS);
- uint8_t first_one_bit = __builtin_ctzl(hash_value) + 1;
+ auto first_one_bit = uint8_t(__builtin_ctzl(hash_value) + 1);
_registers[idx] = (_registers[idx] < first_one_bit ? first_one_bit :
_registers[idx]);
}
@@ -302,5 +302,5 @@ private:
// it only when it is really needed.
uint8_t* _registers = nullptr;
};
-
+#include "common/compile_check_end.h"
} // namespace doris
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index a146a64272f..d18d2a71fce 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -53,7 +53,7 @@ struct std::equal_to<doris::uint24_t> {
};
namespace doris {
-
+#include "common/compile_check_begin.h"
/**
* Use HybridSetType can avoid virtual function call in the loop.
* @tparam Type
@@ -326,7 +326,7 @@ public:
}
} else if constexpr (Type == PrimitiveType::TYPE_DATE) {
const T* value = (const T*)(iter->get_value());
- uint24_t date_value(value->to_olap_date());
+ uint24_t date_value(uint32_t(value->to_olap_date()));
if (bf->test_bytes(
const_cast<char*>(reinterpret_cast<const
char*>(&date_value)),
sizeof(uint24_t))) {
@@ -367,7 +367,7 @@ public:
private:
uint16_t _evaluate_inner(const vectorized::IColumn& column, uint16_t* sel,
uint16_t size) const override {
- int64_t new_size = 0;
+ int16_t new_size = 0;
if (column.is_nullable()) {
const auto* nullable_col =
@@ -676,5 +676,5 @@ ColumnPredicate* create_in_list_predicate(uint32_t
column_id,
return _create_in_list_predicate<Type, PT>(column_id, hybrid_set,
char_length);
}
}
-
+#include "common/compile_check_end.h"
} //namespace doris
diff --git a/be/src/olap/like_column_predicate.h
b/be/src/olap/like_column_predicate.h
index 58498c9bfc3..7f58118b0bd 100644
--- a/be/src/olap/like_column_predicate.h
+++ b/be/src/olap/like_column_predicate.h
@@ -144,9 +144,8 @@ private:
}
}
}
-
- __attribute__((flatten)) std::vector<bool>
_find_code_from_dictionary_column(
- const vectorized::ColumnDictI32& column) const {
+ std::vector<bool> __attribute__((flatten))
+ _find_code_from_dictionary_column(const vectorized::ColumnDictI32& column)
const {
std::vector<bool> res;
if (_segment_id_to_cached_res_flags.if_contains(
column.get_rowset_segment_id(),
diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h
index f6fd8441f44..ae3783e3de6 100644
--- a/be/src/olap/predicate_creator.h
+++ b/be/src/olap/predicate_creator.h
@@ -35,7 +35,7 @@
#include "util/string_util.h"
namespace doris {
-
+#include "common/compile_check_begin.h"
template <typename ConditionType>
class PredicateCreator {
public:
@@ -95,8 +95,9 @@ private:
static CppType convert(const TabletColumn& column, const std::string&
condition) {
StringParser::ParseResult result =
StringParser::ParseResult::PARSE_SUCCESS;
// return CppType value cast from int128_t
- return CppType(StringParser::string_to_decimal<Type>(
- condition.data(), condition.size(), column.precision(),
column.frac(), &result));
+ return CppType(
+ StringParser::string_to_decimal<Type>(condition.data(),
(int)condition.size(),
+ column.precision(),
column.frac(), &result));
}
};
@@ -324,5 +325,5 @@ inline ColumnPredicate* parse_to_predicate(const
TabletColumn& column, uint32_t
}
return create(column, index, condition.condition_values[0], opposite,
arena);
}
-
+#include "common/compile_check_end.h"
} //namespace doris
diff --git a/be/src/olap/utils.cpp b/be/src/olap/utils.cpp
index 52d05133379..c32851dce35 100644
--- a/be/src/olap/utils.cpp
+++ b/be/src/olap/utils.cpp
@@ -46,14 +46,15 @@
#include "vec/runtime/ipv6_value.h"
namespace doris {
+#include "common/compile_check_begin.h"
using namespace ErrorCode;
uint32_t olap_adler32_init() {
- return adler32(0L, Z_NULL, 0);
+ return (uint32_t)adler32(0, Z_NULL, 0);
}
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len) {
- return adler32(adler, reinterpret_cast<const Bytef*>(buf), len);
+ return (uint32_t)adler32(adler, reinterpret_cast<const Bytef*>(buf),
(uint32_t)len);
}
Status gen_timestamp_string(std::string* out_string) {
@@ -240,32 +241,32 @@ bool valid_datetime(const std::string& value_str, const
uint32_t scale) {
return false;
}
- int month = strtol(what[2].str().c_str(), nullptr, 10);
+ int64_t month = strtol(what[2].str().c_str(), nullptr, 10);
if (month < 1 || month > 12) {
LOG(WARNING) << "invalid month. [month=" << month << "]";
return false;
}
- int day = strtol(what[3].str().c_str(), nullptr, 10);
+ int64_t day = strtol(what[3].str().c_str(), nullptr, 10);
if (day < 1 || day > 31) {
LOG(WARNING) << "invalid day. [day=" << day << "]";
return false;
}
if (what[4].length()) {
- int hour = strtol(what[5].str().c_str(), nullptr, 10);
+ int64_t hour = strtol(what[5].str().c_str(), nullptr, 10);
if (hour < 0 || hour > 23) {
LOG(WARNING) << "invalid hour. [hour=" << hour << "]";
return false;
}
- int minute = strtol(what[6].str().c_str(), nullptr, 10);
+ int64_t minute = strtol(what[6].str().c_str(), nullptr, 10);
if (minute < 0 || minute > 59) {
LOG(WARNING) << "invalid minute. [minute=" << minute << "]";
return false;
}
- int second = strtol(what[7].str().c_str(), nullptr, 10);
+ int64_t second = strtol(what[7].str().c_str(), nullptr, 10);
if (second < 0 || second > 59) {
LOG(WARNING) << "invalid second. [second=" << second << "]";
return false;
@@ -309,5 +310,5 @@ bool valid_ipv4(const std::string& value_str) {
bool valid_ipv6(const std::string& value_str) {
return IPv6Value::is_valid_string(value_str.c_str(), value_str.size());
}
-
+#include "common/compile_check_end.h"
} // namespace doris
diff --git a/be/src/pipeline/dependency.cpp b/be/src/pipeline/dependency.cpp
index 035178763fb..8b9c6346bab 100644
--- a/be/src/pipeline/dependency.cpp
+++ b/be/src/pipeline/dependency.cpp
@@ -223,7 +223,7 @@ vectorized::MutableColumns
AggSharedState::_get_keys_hash_table() {
using KeyType =
std::decay_t<decltype(agg_method)>::Key;
std::vector<KeyType> keys(size);
- size_t num_rows = 0;
+ uint32_t num_rows = 0;
auto iter = aggregate_data_container->begin();
{
while (iter != aggregate_data_container->end()) {
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp
b/be/src/pipeline/exec/aggregation_sink_operator.cpp
index 00241ffdafc..df1c0a67fca 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp
@@ -285,7 +285,7 @@ Status
AggSinkLocalState::_merge_with_serialized_key_helper(vectorized::Block* b
}
if (limit && !_shared_state->do_sort_limit) {
- _find_in_hash_table(_places.data(), key_columns, rows);
+ _find_in_hash_table(_places.data(), key_columns, (uint32_t)rows);
for (int i = 0; i < Base::_shared_state->aggregate_evaluators.size();
++i) {
if (Base::_shared_state->aggregate_evaluators[i]->is_merge()) {
@@ -327,10 +327,10 @@ Status
AggSinkLocalState::_merge_with_serialized_key_helper(vectorized::Block* b
bool need_do_agg = true;
if (limit) {
need_do_agg = _emplace_into_hash_table_limit(_places.data(),
block, key_locs,
- key_columns, rows);
+ key_columns,
(uint32_t)rows);
rows = block->rows();
} else {
- _emplace_into_hash_table(_places.data(), key_columns, rows);
+ _emplace_into_hash_table(_places.data(), key_columns,
(uint32_t)rows);
}
if (need_do_agg) {
@@ -462,7 +462,7 @@ Status
AggSinkLocalState::_execute_with_serialized_key_helper(vectorized::Block*
}
}
- size_t rows = block->rows();
+ auto rows = (uint32_t)block->rows();
if (_places.size() < rows) {
_places.resize(rows);
}
@@ -526,7 +526,7 @@ size_t AggSinkLocalState::_get_hash_table_size() const {
void AggSinkLocalState::_emplace_into_hash_table(vectorized::AggregateDataPtr*
places,
vectorized::ColumnRawPtrs&
key_columns,
- size_t num_rows) {
+ uint32_t num_rows) {
std::visit(vectorized::Overload {
[&](std::monostate& arg) -> void {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"uninited hash table");
@@ -578,7 +578,7 @@ bool
AggSinkLocalState::_emplace_into_hash_table_limit(vectorized::AggregateData
vectorized::Block*
block,
const std::vector<int>&
key_locs,
vectorized::ColumnRawPtrs& key_columns,
- size_t num_rows) {
+ uint32_t num_rows) {
return std::visit(
vectorized::Overload {
[&](std::monostate& arg) {
@@ -607,7 +607,7 @@ bool
AggSinkLocalState::_emplace_into_hash_table_limit(vectorized::AggregateData
key_columns[i] =
block->get_by_position(key_locs[i]).column.get();
}
- num_rows = block->rows();
+ num_rows = (uint32_t)block->rows();
}
AggState state(key_columns);
@@ -663,7 +663,7 @@ bool
AggSinkLocalState::_emplace_into_hash_table_limit(vectorized::AggregateData
void AggSinkLocalState::_find_in_hash_table(vectorized::AggregateDataPtr*
places,
vectorized::ColumnRawPtrs&
key_columns,
- size_t num_rows) {
+ uint32_t num_rows) {
std::visit(vectorized::Overload {[&](std::monostate& arg) -> void {
throw
doris::Exception(ErrorCode::INTERNAL_ERROR,
"uninited hash
table");
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.h
b/be/src/pipeline/exec/aggregation_sink_operator.h
index 3ce04ec6e0a..ab6e0953c82 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/aggregation_sink_operator.h
@@ -82,12 +82,12 @@ protected:
template <bool limit>
Status _execute_with_serialized_key_helper(vectorized::Block* block);
void _find_in_hash_table(vectorized::AggregateDataPtr* places,
- vectorized::ColumnRawPtrs& key_columns, size_t
num_rows);
+ vectorized::ColumnRawPtrs& key_columns, uint32_t
num_rows);
void _emplace_into_hash_table(vectorized::AggregateDataPtr* places,
- vectorized::ColumnRawPtrs& key_columns,
size_t num_rows);
+ vectorized::ColumnRawPtrs& key_columns,
uint32_t num_rows);
bool _emplace_into_hash_table_limit(vectorized::AggregateDataPtr* places,
vectorized::Block* block, const
std::vector<int>& key_locs,
- vectorized::ColumnRawPtrs&
key_columns, size_t num_rows);
+ vectorized::ColumnRawPtrs&
key_columns, uint32_t num_rows);
size_t _get_hash_table_size() const;
template <bool limit, bool for_spill = false>
diff --git a/be/src/pipeline/exec/aggregation_source_operator.cpp
b/be/src/pipeline/exec/aggregation_source_operator.cpp
index 14da686448f..24e13d5c0ec 100644
--- a/be/src/pipeline/exec/aggregation_source_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_source_operator.cpp
@@ -139,7 +139,7 @@ Status
AggLocalState::_get_results_with_serialized_key(RuntimeState* state,
shared_state.values.resize(size + 1);
}
- size_t num_rows = 0;
+ uint32_t num_rows = 0;
shared_state.aggregate_data_container->init_once();
auto& iter =
shared_state.aggregate_data_container->iterator;
@@ -263,7 +263,7 @@ Status
AggLocalState::_get_with_serialized_key_result(RuntimeState* state, vecto
shared_state.values.resize(size);
}
- size_t num_rows = 0;
+ uint32_t num_rows = 0;
shared_state.aggregate_data_container->init_once();
auto& iter =
shared_state.aggregate_data_container->iterator;
@@ -490,7 +490,7 @@ Status
AggLocalState::merge_with_serialized_key_helper(vectorized::Block* block)
key_columns[i] = block->get_by_position(i).column.get();
}
- size_t rows = block->rows();
+ uint32_t rows = (uint32_t)block->rows();
if (_places.size() < rows) {
_places.resize(rows);
}
@@ -544,7 +544,7 @@ size_t
AggSourceOperatorX::get_estimated_memory_size_for_merging(RuntimeState* s
void AggLocalState::_emplace_into_hash_table(vectorized::AggregateDataPtr*
places,
vectorized::ColumnRawPtrs&
key_columns,
- size_t num_rows) {
+ uint32_t num_rows) {
std::visit(
vectorized::Overload {
[&](std::monostate& arg) -> void {
diff --git a/be/src/pipeline/exec/aggregation_source_operator.h
b/be/src/pipeline/exec/aggregation_source_operator.h
index 64d15a1d9df..d2cff32246a 100644
--- a/be/src/pipeline/exec/aggregation_source_operator.h
+++ b/be/src/pipeline/exec/aggregation_source_operator.h
@@ -65,7 +65,7 @@ protected:
}
void _emplace_into_hash_table(vectorized::AggregateDataPtr* places,
- vectorized::ColumnRawPtrs& key_columns,
size_t num_rows);
+ vectorized::ColumnRawPtrs& key_columns,
uint32_t num_rows);
vectorized::PODArray<vectorized::AggregateDataPtr> _places;
vectorized::Arena _agg_arena_pool;
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp
b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp
index ebedcaf288e..7d2159d1b89 100644
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp
+++ b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp
@@ -188,7 +188,7 @@ Status
DistinctStreamingAggLocalState::_distinct_pre_agg_with_serialized_key(
}
}
- const size_t rows = in_block->rows();
+ const uint32_t rows = (uint32_t)in_block->rows();
_distinct_row.clear();
if (_parent->cast<DistinctStreamingAggOperatorX>()._is_streaming_preagg &&
low_memory_mode()) {
@@ -282,7 +282,7 @@ void
DistinctStreamingAggLocalState::_make_nullable_output_key(vectorized::Block
void DistinctStreamingAggLocalState::_emplace_into_hash_table_to_distinct(
vectorized::IColumn::Selector& distinct_row,
vectorized::ColumnRawPtrs& key_columns,
- const size_t num_rows) {
+ const uint32_t num_rows) {
std::visit(
vectorized::Overload {
[&](std::monostate& arg) -> void {
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
index bf2be9d850b..3a2ecf5ac4f 100644
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
+++ b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
@@ -56,7 +56,7 @@ private:
Status _init_hash_method(const vectorized::VExprContextSPtrs& probe_exprs);
void _emplace_into_hash_table_to_distinct(vectorized::IColumn::Selector&
distinct_row,
vectorized::ColumnRawPtrs&
key_columns,
- const size_t num_rows);
+ const uint32_t num_rows);
void _make_nullable_output_key(vectorized::Block* block);
bool _should_expand_preagg_hash_tables();
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp
b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index f0fc3cee4ae..f123f173cc8 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -190,8 +190,8 @@ size_t
HashJoinBuildSinkLocalState::get_reserve_mem_size(RuntimeState* state, bo
},
[&](auto&& hash_map_context) {
size_to_reserve +=
hash_map_context.estimated_size(
- raw_ptrs,
block.rows(), true, true,
- bucket_size);
+ raw_ptrs,
(uint32_t)block.rows(), true,
+ true, bucket_size);
}},
_shared_state->hash_table_variant_vector.front()->method_variant);
}
@@ -355,7 +355,7 @@ Status
HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state,
DCHECK(_should_build_hash_table);
auto& p = _parent->cast<HashJoinBuildSinkOperatorX>();
SCOPED_TIMER(_build_table_timer);
- size_t rows = block.rows();
+ auto rows = (uint32_t)block.rows();
if (UNLIKELY(rows == 0)) {
return Status::OK();
}
@@ -389,7 +389,7 @@ Status
HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state,
_set_build_side_has_external_nullmap(block, _build_col_ids);
if (_build_side_has_external_nullmap) {
null_map_val = vectorized::ColumnUInt8::create();
- null_map_val->get_data().assign(rows, (uint8_t)0);
+ null_map_val->get_data().assign((size_t)rows, (uint8_t)0);
}
// Get the key column that needs to be built
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h
b/be/src/pipeline/exec/hashjoin_build_sink.h
index ff477cd1105..aec6adf084c 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -179,7 +179,7 @@ private:
template <class HashTableContext>
struct ProcessHashTableBuild {
- ProcessHashTableBuild(size_t rows, vectorized::ColumnRawPtrs&
build_raw_ptrs,
+ ProcessHashTableBuild(uint32_t rows, vectorized::ColumnRawPtrs&
build_raw_ptrs,
HashJoinBuildSinkLocalState* parent, int batch_size,
RuntimeState* state)
: _rows(rows),
_build_raw_ptrs(build_raw_ptrs),
@@ -246,7 +246,7 @@ struct ProcessHashTableBuild {
}
private:
- const size_t _rows;
+ const uint32_t _rows;
vectorized::ColumnRawPtrs& _build_raw_ptrs;
HashJoinBuildSinkLocalState* _parent = nullptr;
int _batch_size;
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe.h
b/be/src/pipeline/exec/join/process_hash_table_probe.h
index 558aaaadf15..68cbf4e4819 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe.h
@@ -70,8 +70,8 @@ struct ProcessHashTableProbe {
size_t column_to_keep);
template <typename HashTableType>
- typename HashTableType::State _init_probe_side(HashTableType&
hash_table_ctx, size_t probe_rows,
- const uint8_t* null_map);
+ typename HashTableType::State _init_probe_side(HashTableType&
hash_table_ctx,
+ uint32_t probe_rows, const
uint8_t* null_map);
// Process full outer join/ right join / right semi/anti join to output
the join result
// in hash table
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 7e181d13db3..0735d1133a5 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -161,7 +161,7 @@ void
ProcessHashTableProbe<JoinOpType>::probe_side_output_column(vectorized::Mut
template <int JoinOpType>
template <typename HashTableType>
typename HashTableType::State
ProcessHashTableProbe<JoinOpType>::_init_probe_side(
- HashTableType& hash_table_ctx, size_t probe_rows, const uint8_t*
null_map) {
+ HashTableType& hash_table_ctx, uint32_t probe_rows, const uint8_t*
null_map) {
// may over batch size 1 for some outer join case
_probe_indexs.resize(_batch_size + 1);
_build_indexs.resize(_batch_size + 1);
diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.cpp
b/be/src/pipeline/exec/partition_sort_sink_operator.cpp
index c7aa11548b6..ea3b94d4320 100644
--- a/be/src/pipeline/exec/partition_sort_sink_operator.cpp
+++ b/be/src/pipeline/exec/partition_sort_sink_operator.cpp
@@ -218,7 +218,7 @@ Status PartitionSortSinkOperatorX::_emplace_into_hash_table(
using AggState = typename HashMethodType::State;
AggState state(key_columns);
- size_t num_rows = input_block->rows();
+ uint32_t num_rows = (uint32_t)input_block->rows();
agg_method.init_serialized_keys(key_columns, num_rows);
auto creator = [&](const auto& ctor, auto& key, auto&
origin) {
diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
index be1e8fa1665..92af2c272d2 100644
--- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
@@ -178,7 +178,7 @@ public:
std::vector<vectorized::AggregateDataPtr>& values,
const vectorized::AggregateDataPtr null_key_data) {
SCOPED_TIMER(_spill_serialize_hash_table_timer);
- context.insert_keys_into_columns(keys, key_columns_, keys.size());
+ context.insert_keys_into_columns(keys, key_columns_,
(uint32_t)keys.size());
if (null_key_data) {
// only one key of group by support wrap null key
diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.cpp
b/be/src/pipeline/exec/streaming_aggregation_operator.cpp
index 6e44b95c97f..ed3ea60c009 100644
--- a/be/src/pipeline/exec/streaming_aggregation_operator.cpp
+++ b/be/src/pipeline/exec/streaming_aggregation_operator.cpp
@@ -358,7 +358,7 @@ Status
StreamingAggLocalState::_pre_agg_with_serialized_key(doris::vectorized::B
}
}
- size_t rows = in_block->rows();
+ uint32_t rows = (uint32_t)in_block->rows();
_places.resize(rows);
if (_should_not_do_pre_agg(rows)) {
@@ -467,7 +467,7 @@ Status
StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* st
_values.resize(size + 1);
}
- size_t num_rows = 0;
+ uint32_t num_rows = 0;
_aggregate_data_container->init_once();
auto& iter = _aggregate_data_container->iterator;
@@ -562,7 +562,7 @@ void
StreamingAggLocalState::_destroy_agg_status(vectorized::AggregateDataPtr da
void
StreamingAggLocalState::_emplace_into_hash_table(vectorized::AggregateDataPtr*
places,
vectorized::ColumnRawPtrs& key_columns,
- const size_t num_rows) {
+ const uint32_t num_rows)
{
std::visit(vectorized::Overload {
[&](std::monostate& arg) -> void {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"uninited hash table");
diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.h
b/be/src/pipeline/exec/streaming_aggregation_operator.h
index 4882db35f20..663431d26b0 100644
--- a/be/src/pipeline/exec/streaming_aggregation_operator.h
+++ b/be/src/pipeline/exec/streaming_aggregation_operator.h
@@ -67,7 +67,7 @@ private:
Status _get_results_with_serialized_key(RuntimeState* state,
vectorized::Block* block,
bool* eos);
void _emplace_into_hash_table(vectorized::AggregateDataPtr* places,
- vectorized::ColumnRawPtrs& key_columns,
const size_t num_rows);
+ vectorized::ColumnRawPtrs& key_columns,
const uint32_t num_rows);
Status _create_agg_status(vectorized::AggregateDataPtr data);
size_t _get_hash_table_size();
diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp
index ba47e51a4bc..ef6c98c4618 100644
--- a/be/src/util/hash_util.hpp
+++ b/be/src/util/hash_util.hpp
@@ -36,18 +36,18 @@
#include "util/sse_util.hpp"
namespace doris {
-
+#include "common/compile_check_begin.h"
// Utility class to compute hash values.
class HashUtil {
public:
static uint32_t zlib_crc_hash(const void* data, uint32_t bytes, uint32_t
hash) {
- return crc32(hash, (const unsigned char*)data, bytes);
+ return (uint32_t)crc32(hash, (const unsigned char*)data, bytes);
}
static uint32_t zlib_crc_hash_null(uint32_t hash) {
// null is treat as 0 when hash
static const int INT_VALUE = 0;
- return crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
+ return (uint32_t)crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
}
#if defined(__SSE4_2__) || defined(__aarch64__)
@@ -362,8 +362,8 @@ public:
template <>
struct std::hash<doris::TUniqueId> {
- std::size_t operator()(const doris::TUniqueId& id) const {
- std::size_t seed = 0;
+ size_t operator()(const doris::TUniqueId& id) const {
+ uint32_t seed = 0;
seed = doris::HashUtil::hash(&id.lo, sizeof(id.lo), seed);
seed = doris::HashUtil::hash(&id.hi, sizeof(id.hi), seed);
return seed;
@@ -373,8 +373,9 @@ struct std::hash<doris::TUniqueId> {
template <>
struct std::hash<doris::TNetworkAddress> {
size_t operator()(const doris::TNetworkAddress& address) const {
- std::size_t seed = 0;
- seed = doris::HashUtil::hash(address.hostname.data(),
address.hostname.size(), seed);
+ uint32_t seed = 0;
+ seed = doris::HashUtil::hash(address.hostname.data(),
(uint32_t)address.hostname.size(),
+ seed);
seed = doris::HashUtil::hash(&address.port, 4, seed);
return seed;
}
@@ -383,7 +384,7 @@ struct std::hash<doris::TNetworkAddress> {
template <>
struct std::hash<std::pair<doris::TUniqueId, int64_t>> {
size_t operator()(const std::pair<doris::TUniqueId, int64_t>& pair) const {
- size_t seed = 0;
+ uint32_t seed = 0;
seed = doris::HashUtil::hash(&pair.first.lo, sizeof(pair.first.lo),
seed);
seed = doris::HashUtil::hash(&pair.first.hi, sizeof(pair.first.hi),
seed);
seed = doris::HashUtil::hash(&pair.second, sizeof(pair.second), seed);
@@ -399,3 +400,5 @@ struct std::hash<std::pair<First, Second>> {
return util_hash::HashLen16(h1, h2);
}
};
+
+#include "common/compile_check_end.h"
diff --git a/be/src/util/murmur_hash3.cpp b/be/src/util/murmur_hash3.cpp
index 299ccebc18e..0cab7ee3047 100644
--- a/be/src/util/murmur_hash3.cpp
+++ b/be/src/util/murmur_hash3.cpp
@@ -10,12 +10,12 @@
#include "murmur_hash3.h"
#include "vec/common/unaligned.h"
-
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
+#include "common/compile_check_begin.h"
#if defined(_MSC_VER)
#define FORCE_INLINE __forceinline
@@ -89,7 +89,7 @@ FORCE_INLINE uint64_t fmix64(uint64_t k) {
void murmur_hash3_x86_32(const void* key, int64_t len, uint32_t seed, void*
out) {
const uint8_t* data = (const uint8_t*)key;
- const int nblocks = len / 4;
+ const int nblocks = (int)len / 4;
uint32_t h1 = seed;
@@ -439,7 +439,7 @@ void murmur_hash3_x64_128(const void* key, const int len,
const uint32_t seed, v
void murmur_hash3_x64_64(const void* key, const int64_t len, const uint64_t
seed, void* out) {
const uint8_t* data = (const uint8_t*)key;
- const int nblocks = len / 8;
+ const int nblocks = (int)len / 8;
uint64_t h1 = seed;
const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
@@ -503,5 +503,5 @@ void murmur_hash3_x64_64(const void* key, const int64_t
len, const uint64_t seed
((uint64_t*)out)[0] = h1;
}
-
+#include "common/compile_check_end.h"
//-----------------------------------------------------------------------------
diff --git a/be/src/util/parse_util.cpp b/be/src/util/parse_util.cpp
index e1bd796e974..f531170d7b4 100644
--- a/be/src/util/parse_util.cpp
+++ b/be/src/util/parse_util.cpp
@@ -23,7 +23,7 @@
#include "util/string_parser.hpp"
namespace doris {
-
+#include "common/compile_check_begin.h"
int64_t ParseUtil::parse_mem_spec(const std::string& mem_spec_str, int64_t
parent_limit,
int64_t physical_mem, bool* is_percent) {
if (mem_spec_str.empty()) {
@@ -31,7 +31,7 @@ int64_t ParseUtil::parse_mem_spec(const std::string&
mem_spec_str, int64_t paren
}
// Assume last character indicates unit or percent.
- int32_t number_str_len = mem_spec_str.size() - 1;
+ auto number_str_len = mem_spec_str.size() - 1;
*is_percent = false;
int64_t multiplier = -1;
@@ -65,7 +65,7 @@ int64_t ParseUtil::parse_mem_spec(const std::string&
mem_spec_str, int64_t paren
break;
default:
// No unit was given. Default to bytes.
- number_str_len = mem_spec_str.size();
+ number_str_len = (int)mem_spec_str.size();
break;
}
@@ -82,12 +82,12 @@ int64_t ParseUtil::parse_mem_spec(const std::string&
mem_spec_str, int64_t paren
}
if (multiplier != -1) {
- bytes = int64_t(multiplier * limit_val);
+ bytes = int64_t((double)multiplier * limit_val);
} else if (*is_percent) {
if (parent_limit == -1) {
- bytes = int64_t(static_cast<double>(limit_val) / 100.0 *
physical_mem);
+ bytes = int64_t(limit_val / 100.0 * (double)physical_mem);
} else {
- bytes = int64_t(static_cast<double>(limit_val) / 100.0 *
parent_limit);
+ bytes = int64_t(limit_val / 100.0 * (double)parent_limit);
}
}
} else {
@@ -101,7 +101,7 @@ int64_t ParseUtil::parse_mem_spec(const std::string&
mem_spec_str, int64_t paren
auto limit_val_double =
StringParser::string_to_float<double>(mem_spec_str.data(),
number_str_len, &result);
- if (result == StringParser::PARSE_SUCCESS && limit_val_double !=
limit_val) {
+ if (result == StringParser::PARSE_SUCCESS && limit_val_double !=
(double)limit_val) {
return -1; // mem_spec_str is double.
}
diff --git a/be/src/util/perf_counters.cpp b/be/src/util/perf_counters.cpp
index 5c79cfd0da0..5a98eaf0088 100644
--- a/be/src/util/perf_counters.cpp
+++ b/be/src/util/perf_counters.cpp
@@ -39,7 +39,7 @@
#include "util/string_util.h"
namespace doris {
-
+#include "common/compile_check_begin.h"
#define COUNTER_SIZE (sizeof(void*))
#define PRETTY_PRINT_WIDTH 13
@@ -65,8 +65,8 @@ enum PERF_IO_IDX {
// Wrapper around sys call. This syscall is hard to use and this is how it is
recommended
// to be used.
-static inline int sys_perf_event_open(struct perf_event_attr* attr, pid_t pid,
int cpu,
- int group_fd, unsigned long flags) {
+static inline auto sys_perf_event_open(struct perf_event_attr* attr, pid_t
pid, int cpu,
+ int64_t group_fd, unsigned long flags) {
attr->size = sizeof(*attr);
return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
}
@@ -210,7 +210,7 @@ bool PerfCounters::init_sys_counter(Counter counter) {
return false;
}
- int fd = sys_perf_event_open(&attr, getpid(), -1, _group_fd, 0);
+ auto fd = sys_perf_event_open(&attr, getpid(), -1, _group_fd, 0);
if (fd < 0) {
return false;
@@ -293,7 +293,7 @@ bool PerfCounters::init_proc_self_status_counter(Counter
counter) {
bool PerfCounters::get_sys_counters(std::vector<int64_t>& buffer) {
for (int i = 0; i < _counters.size(); i++) {
if (_counters[i].source == SYS_PERF_COUNTER) {
- int num_bytes = read(_counters[i].fd, &buffer[i], COUNTER_SIZE);
+ auto num_bytes = read((int)_counters[i].fd, &buffer[i],
COUNTER_SIZE);
if (num_bytes != COUNTER_SIZE) {
return false;
@@ -396,7 +396,7 @@ PerfCounters::PerfCounters() : _group_fd(-1) {}
PerfCounters::~PerfCounters() {
for (int i = 0; i < _counters.size(); ++i) {
if (_counters[i].source == SYS_PERF_COUNTER) {
- close(_counters[i].fd);
+ close((int)_counters[i].fd);
}
}
}
@@ -511,9 +511,9 @@ void PerfCounters::pretty_print(std::ostream* s) const {
stream << std::endl;
- for (int s = 0; s < _snapshots.size(); s++) {
- stream << std::setw(8) << _snapshot_names[s];
- const std::vector<int64_t>& snapshot = _snapshots[s];
+ for (int ss = 0; ss < _snapshots.size(); s++) {
+ stream << std::setw(8) << _snapshot_names[ss];
+ const std::vector<int64_t>& snapshot = _snapshots[ss];
for (int i = 0; i < snapshot.size(); ++i) {
stream << std::setw(PRETTY_PRINT_WIDTH)
diff --git a/be/src/util/perf_counters.h b/be/src/util/perf_counters.h
index 52fe46acb67..01af39f65b8 100644
--- a/be/src/util/perf_counters.h
+++ b/be/src/util/perf_counters.h
@@ -150,7 +150,7 @@ private:
// DataSource specific data. This is used to pull the counter values.
union {
// For SYS_PERF_COUNTER. File descriptor where the counter value
is stored.
- int fd;
+ int64_t fd;
// For PROC_SELF_IO. Line number from /proc/self/io file with
this counter's value
int proc_io_line_number;
};
@@ -164,7 +164,7 @@ private:
std::vector<std::vector<int64_t>> _snapshots;
// System perf counters can be grouped together. The OS will update all
grouped counters
// at the same time. This is useful to better correlate counter values.
- int _group_fd;
+ int64_t _group_fd;
static int64_t _vm_rss;
static std::string _vm_rss_str;
diff --git a/be/src/util/quantile_state.cpp b/be/src/util/quantile_state.cpp
index e0be15454f9..c42a0b964ba 100644
--- a/be/src/util/quantile_state.cpp
+++ b/be/src/util/quantile_state.cpp
@@ -29,7 +29,7 @@
#include "vec/common/unaligned.h"
namespace doris {
-
+#include "common/compile_check_begin.h"
QuantileState::QuantileState() : _type(EMPTY),
_compression(QUANTILE_STATE_COMPRESSION_MIN) {}
QuantileState::QuantileState(float compression) : _type(EMPTY),
_compression(compression) {}
@@ -113,11 +113,11 @@ bool QuantileState::is_valid(const Slice& slice) {
double QuantileState::get_explicit_value_by_percentile(float percentile) const
{
DCHECK(_type == EXPLICIT);
- int n = _explicit_data.size();
+ size_t n = _explicit_data.size();
std::vector<double> sorted_data(_explicit_data.begin(),
_explicit_data.end());
std::sort(sorted_data.begin(), sorted_data.end());
- double index = (n - 1) * percentile;
+ double index = double(n - 1) * percentile;
int intIdx = (int)index;
if (intIdx == n - 1) {
return sorted_data[intIdx];
@@ -216,7 +216,7 @@ size_t QuantileState::serialize(uint8_t* dst) const {
}
case EXPLICIT: {
*ptr++ = EXPLICIT;
- uint16_t size = _explicit_data.size();
+ auto size = (uint16_t)_explicit_data.size();
unaligned_store<uint16_t>(ptr, size);
ptr += sizeof(uint16_t);
memcpy(ptr, &_explicit_data[0], size * sizeof(double));
@@ -259,10 +259,10 @@ void QuantileState::merge(const QuantileState& other) {
_type = TDIGEST;
_tdigest_ptr = std::make_shared<TDigest>(_compression);
for (int i = 0; i < _explicit_data.size(); i++) {
- _tdigest_ptr->add(_explicit_data[i]);
+ _tdigest_ptr->add((float)_explicit_data[i]);
}
for (int i = 0; i < other._explicit_data.size(); i++) {
- _tdigest_ptr->add(other._explicit_data[i]);
+ _tdigest_ptr->add((float)other._explicit_data[i]);
}
} else {
_explicit_data.insert(_explicit_data.end(),
other._explicit_data.begin(),
@@ -271,7 +271,7 @@ void QuantileState::merge(const QuantileState& other) {
break;
case TDIGEST:
for (int i = 0; i < other._explicit_data.size(); i++) {
- _tdigest_ptr->add(other._explicit_data[i]);
+ _tdigest_ptr->add((float)other._explicit_data[i]);
}
break;
default:
@@ -288,13 +288,13 @@ void QuantileState::merge(const QuantileState& other) {
case SINGLE:
_type = TDIGEST;
_tdigest_ptr = other._tdigest_ptr;
- _tdigest_ptr->add(_single_data);
+ _tdigest_ptr->add((float)_single_data);
break;
case EXPLICIT:
_type = TDIGEST;
_tdigest_ptr = other._tdigest_ptr;
for (int i = 0; i < _explicit_data.size(); i++) {
- _tdigest_ptr->add(_explicit_data[i]);
+ _tdigest_ptr->add((float)_explicit_data[i]);
}
break;
case TDIGEST:
@@ -325,7 +325,7 @@ void QuantileState::add_value(const double& value) {
if (_explicit_data.size() == QUANTILE_STATE_EXPLICIT_NUM) {
_tdigest_ptr = std::make_shared<TDigest>(_compression);
for (int i = 0; i < _explicit_data.size(); i++) {
- _tdigest_ptr->add(_explicit_data[i]);
+ _tdigest_ptr->add((float)_explicit_data[i]);
}
_explicit_data.clear();
_explicit_data.shrink_to_fit();
@@ -336,7 +336,7 @@ void QuantileState::add_value(const double& value) {
}
break;
case TDIGEST:
- _tdigest_ptr->add(value);
+ _tdigest_ptr->add((float)value);
break;
}
}
diff --git a/be/src/vec/common/hash_table/hash_map_context.h
b/be/src/vec/common/hash_table/hash_map_context.h
index c362efa0597..519e326268d 100644
--- a/be/src/vec/common/hash_table/hash_map_context.h
+++ b/be/src/vec/common/hash_table/hash_map_context.h
@@ -32,7 +32,7 @@
#include "vec/core/types.h"
namespace doris::vectorized {
-
+#include "common/compile_check_begin.h"
constexpr auto BITSIZE = 8;
template <typename Base>
@@ -56,11 +56,11 @@ struct MethodBaseInner {
MethodBaseInner() { hash_table.reset(new HashMap()); }
virtual ~MethodBaseInner() = default;
- virtual void init_serialized_keys(const ColumnRawPtrs& key_columns, size_t
num_rows,
+ virtual void init_serialized_keys(const ColumnRawPtrs& key_columns,
uint32_t num_rows,
const uint8_t* null_map = nullptr, bool
is_join = false,
bool is_build = false, uint32_t
bucket_size = 0) = 0;
- [[nodiscard]] virtual size_t estimated_size(const ColumnRawPtrs&
key_columns, size_t num_rows,
+ [[nodiscard]] virtual size_t estimated_size(const ColumnRawPtrs&
key_columns, uint32_t num_rows,
bool is_join = false, bool
is_build = false,
uint32_t bucket_size = 0) = 0;
@@ -85,7 +85,7 @@ struct MethodBaseInner {
}
}
- void init_hash_values(size_t num_rows, const uint8_t* null_map) {
+ void init_hash_values(uint32_t num_rows, const uint8_t* null_map) {
if (null_map == nullptr) {
init_hash_values(num_rows);
return;
@@ -100,7 +100,7 @@ struct MethodBaseInner {
}
}
- void init_hash_values(size_t num_rows) {
+ void init_hash_values(uint32_t num_rows) {
hash_values.resize(num_rows);
for (size_t k = 0; k < num_rows; ++k) {
hash_values[k] = hash_table->hash(keys[k]);
@@ -156,7 +156,7 @@ struct MethodBaseInner {
}
virtual void insert_keys_into_columns(std::vector<Key>& keys,
MutableColumns& key_columns,
- size_t num_rows) = 0;
+ uint32_t num_rows) = 0;
};
template <typename T>
@@ -208,7 +208,7 @@ struct MethodSerialized : public MethodBase<TData> {
return {begin, sum_size};
}
- size_t estimated_size(const ColumnRawPtrs& key_columns, size_t num_rows,
bool is_join,
+ size_t estimated_size(const ColumnRawPtrs& key_columns, uint32_t num_rows,
bool is_join,
bool is_build, uint32_t bucket_size) override {
size_t size = 0;
for (const auto& column : key_columns) {
@@ -224,7 +224,7 @@ struct MethodSerialized : public MethodBase<TData> {
return size;
}
- void init_serialized_keys_impl(const ColumnRawPtrs& key_columns, size_t
num_rows,
+ void init_serialized_keys_impl(const ColumnRawPtrs& key_columns, uint32_t
num_rows,
DorisVector<StringRef>& input_keys, Arena&
input_arena) {
input_arena.clear();
input_keys.resize(num_rows);
@@ -266,7 +266,7 @@ struct MethodSerialized : public MethodBase<TData> {
}
}
- void init_serialized_keys(const ColumnRawPtrs& key_columns, size_t
num_rows,
+ void init_serialized_keys(const ColumnRawPtrs& key_columns, uint32_t
num_rows,
const uint8_t* null_map = nullptr, bool is_join
= false,
bool is_build = false, uint32_t bucket_size = 0)
override {
init_serialized_keys_impl(key_columns, num_rows, is_build ?
build_stored_keys : stored_keys,
@@ -279,7 +279,7 @@ struct MethodSerialized : public MethodBase<TData> {
}
void insert_keys_into_columns(std::vector<StringRef>& input_keys,
MutableColumns& key_columns,
- const size_t num_rows) override {
+ const uint32_t num_rows) override {
for (auto& column : key_columns) {
column->deserialize_vec(input_keys.data(), num_rows);
}
@@ -308,7 +308,7 @@ struct MethodStringNoCache : public MethodBase<TData> {
: (_stored_keys.size() * sizeof(StringRef));
}
- size_t estimated_size(const ColumnRawPtrs& key_columns, size_t num_rows,
bool is_join,
+ size_t estimated_size(const ColumnRawPtrs& key_columns, uint32_t num_rows,
bool is_join,
bool is_build, uint32_t bucket_size) override {
size_t size = 0;
size += sizeof(StringRef) * num_rows; // stored_keys
@@ -320,7 +320,7 @@ struct MethodStringNoCache : public MethodBase<TData> {
return size;
}
- void init_serialized_keys_impl(const ColumnRawPtrs& key_columns, size_t
num_rows,
+ void init_serialized_keys_impl(const ColumnRawPtrs& key_columns, uint32_t
num_rows,
DorisVector<StringRef>& stored_keys) {
const IColumn& column = *key_columns[0];
const auto& nested_column =
@@ -346,7 +346,7 @@ struct MethodStringNoCache : public MethodBase<TData> {
Base::keys = stored_keys.data();
}
- void init_serialized_keys(const ColumnRawPtrs& key_columns, size_t
num_rows,
+ void init_serialized_keys(const ColumnRawPtrs& key_columns, uint32_t
num_rows,
const uint8_t* null_map = nullptr, bool is_join
= false,
bool is_build = false, uint32_t bucket_size = 0)
override {
init_serialized_keys_impl(key_columns, num_rows,
@@ -359,7 +359,7 @@ struct MethodStringNoCache : public MethodBase<TData> {
}
void insert_keys_into_columns(std::vector<StringRef>& input_keys,
MutableColumns& key_columns,
- const size_t num_rows) override {
+ const uint32_t num_rows) override {
key_columns[0]->reserve(num_rows);
key_columns[0]->insert_many_strings(input_keys.data(), num_rows);
}
@@ -375,7 +375,7 @@ struct MethodOneNumber : public MethodBase<TData> {
using State = ColumnsHashing::HashMethodOneNumber<typename Base::Value,
typename Base::Mapped,
FieldType>;
- size_t estimated_size(const ColumnRawPtrs& key_columns, size_t num_rows,
bool is_join,
+ size_t estimated_size(const ColumnRawPtrs& key_columns, uint32_t num_rows,
bool is_join,
bool is_build, uint32_t bucket_size) override {
size_t size = 0;
if (is_join) {
@@ -386,7 +386,7 @@ struct MethodOneNumber : public MethodBase<TData> {
return size;
}
- void init_serialized_keys(const ColumnRawPtrs& key_columns, size_t
num_rows,
+ void init_serialized_keys(const ColumnRawPtrs& key_columns, uint32_t
num_rows,
const uint8_t* null_map = nullptr, bool is_join
= false,
bool is_build = false, uint32_t bucket_size = 0)
override {
Base::keys = (FieldType*)(key_columns[0]->is_nullable()
@@ -403,7 +403,7 @@ struct MethodOneNumber : public MethodBase<TData> {
}
void insert_keys_into_columns(std::vector<typename Base::Key>& input_keys,
- MutableColumns& key_columns, const size_t
num_rows) override {
+ MutableColumns& key_columns, const uint32_t
num_rows) override {
if (!input_keys.empty()) {
// If size() is 0, data() may or may not return a null pointer.
key_columns[0]->insert_many_raw_data((char*)input_keys.data(),
num_rows);
@@ -444,11 +444,11 @@ struct MethodKeysFixed : public MethodBase<TData> {
continue;
}
size_t bucket = j / BITSIZE;
- size_t offset = j % BITSIZE;
+ size_t local_offset = j % BITSIZE;
const auto& data =
assert_cast<const
ColumnUInt8&>(*nullmap_columns[j]).get_data().data();
for (size_t i = 0; i < row_numbers; ++i) {
- *((char*)(&result[i]) + bucket) |= data[i] << offset;
+ *((char*)(&result[i]) + bucket) |= data[i] << local_offset;
}
}
offset += bitmap_size;
@@ -499,7 +499,7 @@ struct MethodKeysFixed : public MethodBase<TData> {
sizeof(typename Base::Key);
}
- size_t estimated_size(const ColumnRawPtrs& key_columns, size_t num_rows,
bool is_join,
+ size_t estimated_size(const ColumnRawPtrs& key_columns, uint32_t num_rows,
bool is_join,
bool is_build, uint32_t bucket_size) override {
size_t size = 0;
size += sizeof(StringRef) * num_rows; // stored_keys
@@ -511,7 +511,7 @@ struct MethodKeysFixed : public MethodBase<TData> {
return size;
}
- void init_serialized_keys(const ColumnRawPtrs& key_columns, size_t
num_rows,
+ void init_serialized_keys(const ColumnRawPtrs& key_columns, uint32_t
num_rows,
const uint8_t* null_map = nullptr, bool is_join
= false,
bool is_build = false, uint32_t bucket_size = 0)
override {
ColumnRawPtrs actual_columns;
@@ -550,7 +550,7 @@ struct MethodKeysFixed : public MethodBase<TData> {
}
void insert_keys_into_columns(std::vector<typename Base::Key>& input_keys,
- MutableColumns& key_columns, const size_t
num_rows) override {
+ MutableColumns& key_columns, const uint32_t
num_rows) override {
// In any hash key value, column values to be read start just after
the bitmap, if it exists.
size_t pos = 0;
for (size_t i = 0; i < key_columns.size(); ++i) {
@@ -702,7 +702,7 @@ struct MethodSingleNullableColumn : public
SingleColumnMethod {
using State = ColumnsHashing::HashMethodSingleLowNullableColumn<typename
Base::State,
typename
Base::Mapped>;
void insert_keys_into_columns(std::vector<typename Base::Key>& input_keys,
- MutableColumns& key_columns, const size_t
num_rows) override {
+ MutableColumns& key_columns, const uint32_t
num_rows) override {
auto* col = key_columns[0].get();
col->reserve(num_rows);
if (input_keys.empty()) {
@@ -716,5 +716,5 @@ struct MethodSingleNullableColumn : public
SingleColumnMethod {
}
}
};
-
+#include "common/compile_check_end.h"
} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/hash_table.h
b/be/src/vec/common/hash_table/hash_table.h
index e00986704d7..34e02e5798a 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -45,6 +45,7 @@
* Another example: for an approximate calculation of the number of unique
visitors, there is a hash table for UniquesHashSet.
* It has the concept of "degree". At each overflow, cells with keys that do
not divide by the corresponding power of the two are deleted.
*/
+#include "common/compile_check_begin.h"
struct HashTableNoState {
/// Serialization, in binary and text form.
void write(doris::vectorized::BufferWritable&) const {}
@@ -339,14 +340,15 @@ public:
? fill_capacity
: fill_capacity + 1);
- size_degree_ = num_elems <= 1 ? initial_size_degree
- : (initial_size_degree > fill_capacity ?
initial_size_degree
- :
fill_capacity);
+ size_degree_ =
+ uint8_t(num_elems <= 1 ? initial_size_degree
+ : (initial_size_degree > fill_capacity
? initial_size_degree
+
: fill_capacity));
increase_size_degree(0);
}
void set_buf_size(size_t buf_size_) {
- size_degree_ = static_cast<size_t>(log2(buf_size_ - 1) + 1);
+ size_degree_ = static_cast<uint8_t>(log2(buf_size_ - 1) + 1);
increase_size_degree(0);
}
};
@@ -1076,3 +1078,4 @@ private:
}
}
};
+#include "common/compile_check_end.h"
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/join_hash_table.h
b/be/src/vec/common/hash_table/join_hash_table.h
index 57e1b8b1dfc..c6227591545 100644
--- a/be/src/vec/common/hash_table/join_hash_table.h
+++ b/be/src/vec/common/hash_table/join_hash_table.h
@@ -28,6 +28,7 @@
#include "vec/common/hash_table/hash.h"
namespace doris {
+#include "common/compile_check_begin.h"
template <typename Key, typename Hash = DefaultHash<Key>>
class JoinHashTable {
public:
@@ -38,8 +39,8 @@ public:
static uint32_t calc_bucket_size(size_t num_elem) {
size_t expect_bucket_size = num_elem + (num_elem - 1) / 7;
- return std::min(phmap::priv::NormalizeCapacity(expect_bucket_size) + 1,
-
static_cast<size_t>(std::numeric_limits<int32_t>::max()) + 1);
+ return
(uint32_t)std::min(phmap::priv::NormalizeCapacity(expect_bucket_size) + 1,
+
static_cast<size_t>(std::numeric_limits<int32_t>::max()) + 1);
}
size_t get_byte_size() const {
@@ -74,10 +75,10 @@ public:
bool empty_build_side() const { return _empty_build_side; }
- void build(const Key* __restrict keys, const uint32_t* __restrict
bucket_nums, size_t num_elem,
- bool keep_null_key) {
+ void build(const Key* __restrict keys, const uint32_t* __restrict
bucket_nums,
+ uint32_t num_elem, bool keep_null_key) {
build_keys = keys;
- for (size_t i = 1; i < num_elem; i++) {
+ for (uint32_t i = 1; i < num_elem; i++) {
uint32_t bucket_num = bucket_nums[i];
next[i] = first[bucket_num];
first[bucket_num] = i;
@@ -477,4 +478,5 @@ private:
template <typename Key, typename Hash = DefaultHash<Key>>
using JoinHashMap = JoinHashTable<Key, Hash>;
+#include "common/compile_check_end.h"
} // namespace doris
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]