This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7697f02b890 [fix](ub) undefined behavior in FixedContainer (#39191)
7697f02b890 is described below
commit 7697f02b890976523d851cfb5dd3a93adb0d2826
Author: Jerry Hu <[email protected]>
AuthorDate: Sun Aug 11 06:32:23 2024 +0800
[fix](ub) undefined behavior in FixedContainer (#39191)
## Proposed changes
Undefined behavior occurs if there is a null value in the list.
```
/root/doris/be/src/vec/common/string_ref.h:271:54: runtime error: null
pointer passed as argument 2, which is declared to never be null
/var/local/ldb-toolchain/bin/../usr/include/string.h:64:33: note: nonnull
attribute specified here
#0 0x5616d072245d in doris::StringRef::eq(doris::StringRef const&) const
/root/doris/be/src/vec/common/string_ref.h:271:41
#1 0x5616d072245d in doris::StringRef::operator==(doris::StringRef const&)
const /root/doris/be/src/vec/common/string_ref.h:274:60
#2 0x5616d072245d in doris::FixedContainer::find(doris::StringRef const&)
const /root/doris/be/src/exprs/hybrid_set.h:76:36
#3 0x5616d072245d in void
doris::StringValueSet>::_find_batch(doris::vectorized::IColumn const&, unsigned
long, doris::vectorized::PODArray, 16ul, 15ul> const*,
doris::vectorized::PODArray, 16ul, 15ul>&)
/root/doris/be/src/exprs/hybrid_set.h:688:63
#4 0x5616d0747857 in
doris::vectorized::FunctionIn::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long)
const /root/doris/be/src/vec/functions/in.h:170:21
#5 0x5616c741fa3a in
doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long)
const /root/doris/be/src/vec/functions/function.h:462:26
#6 0x5616cbb5b650 in
doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*,
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long,
bool) const /root/doris/be/src/vec/functions/function.cpp
#7 0x5616cbb4e14e in
doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*,
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long,
bool) const /root/doris/be/src/vec/functions/function.cpp:244:12
#8 0x5616cbb4e3c2 in
doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*,
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long,
bool) const /root/doris/be/src/vec/functions/function.cpp:250:12
#9 0x5616c741cd68 in
doris::vectorized::IFunctionBase::execute(doris::FunctionContext*,
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long,
bool) const /root/doris/be/src/vec/functions/function.h:190:19
#10 0x5616c74cf712 in
doris::vectorized::VInPredicate::execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*)
/root/doris/be/src/vec/exprs/vin_predicate.cpp:130:5
#11 0x5616c740d5c0 in
doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*, std::vector>&)
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:183:9
#12 0x5616c740ecf5 in
doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*)
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:215:12
#13 0x5616c7462e24 in
doris::vectorized::VCompoundPred::execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*)
/root/doris/be/src/vec/exprs/vcompound_pred.h:127:38
#14 0x5616c74bccec in
doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*)
/root/doris/be/src/vec/exprs/vexpr_context.cpp:54:5
#15 0x5616c74c1dcc in
doris::vectorized::VExprContext::execute_conjuncts(std::vector,
std::allocator>> const&, std::vector, 16ul, 15ul>, std::allocator, 16ul,
15ul>>> const*, bool, doris::vectorized::Block*, doris::vectorized::PODArray,
16ul, 15ul>, bool) /root/doris/be/src/vec/exprs/vexpr_context.cpp:169:9
#16 0x5616c74c5108 in
doris::vectorized::VExprContext::execute_conjuncts_and_filter_block(std::vector,
std::allocator>> const&, doris::vectorized::Block*, std::vector>&, int,
doris::vectorized::PODArray, 16ul, 15ul>&)
/root/doris/be/src/vec/exprs/vexpr_context.cpp:322:5
#17 0x5616ad8a7f1a in
doris::segment_v2::SegmentIterator::_execute_common_expr(unsigned short*,
unsigned short&, doris::vectorized::Block*)
/root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2680:5
#18 0x5616ad89e86e in
doris::segment_v2::SegmentIterator::_next_batch_internal(doris::vectorized::Block*)
/root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2582:25
#19 0x5616ad892f5c in
doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)::$_0::operator()()
const /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2315:9
#20 0x5616ad892f5c in
doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)
/root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2314:19
#21 0x5616ad6dd9cc in
doris::segment_v2::LazyInitSegmentIterator::next_batch(doris::vectorized::Block*)
/root/doris/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.h:44:33
#22 0x5616ad269d67 in
doris::BetaRowsetReader::next_block(doris::vectorized::Block*)
/root/doris/be/src/olap/rowset/beta_rowset_reader.cpp:380:29
#23 0x5616de6de110 in
doris::vectorized::VCollectIterator::Level0Iterator::_refresh()
/root/doris/be/src/vec/olap/vcollect_iterator.h
#24 0x5616de6c967f in
doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row()
/root/doris/be/src/vec/olap/vcollect_iterator.cpp:514:24
#25 0x5616de6ca8a6 in
doris::vectorized::VCollectIterator::Level0Iterator::ensure_first_row_ref()
/root/doris/be/src/vec/olap/vcollect_iterator.cpp:493:14
#26 0x5616de6d7008 in
doris::vectorized::VCollectIterator::Level1Iterator::ensure_first_row_ref()
/root/doris/be/src/vec/olap/vcollect_iterator.cpp:692:27
#27 0x5616de6bd200 in
doris::vectorized::VCollectIterator::build_heap(std::vector, std::allocator>>&)
/root/doris/be/src/vec/olap/vcollect_iterator.cpp:186:9
#28 0x5616de651b6c in
doris::vectorized::BlockReader::_init_collect_iter(doris::TabletReader::ReaderParams
const&) /root/doris/be/src/vec/olap/block_reader.cpp:157:5
#29 0x5616de65526f in
doris::vectorized::BlockReader::init(doris::TabletReader::ReaderParams const&)
/root/doris/be/src/vec/olap/block_reader.cpp:229:19
#30 0x5616e175a0f9 in
doris::vectorized::NewOlapScanner::open(doris::RuntimeState*)
/root/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:237:32
#31 0x5616c736ad34 in
doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr,
std::shared_ptr) /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:236:5
#32 0x5616c736f05e in
doris::vectorized::ScannerScheduler::submit(std::shared_ptr,
std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()()
const::'lambda'()::operator()() const
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:176:21
#33 0x5616c736f05e in
doris::vectorized::ScannerScheduler::submit(std::shared_ptr,
std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:175:31
#34 0x5616c736f05e in void std::_invoke_impl,
std::shared_ptr)::$_1::operator()() const::'lambda'()&>(std::_invoke_other,
doris::vectorized::ScannerScheduler::submit(std::shared_ptr,
std::shared_ptr)::$_1::operator()() const::'lambda'()&)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14
#35 0x5616c736f05e in std::enable_if, std::shared_ptr)::$1::operator()()
const::'lambda'()&>, void>::type std::_invoke_r,
std::shared_ptr)::$_1::operator()()
const::'lambda'()&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr,
std::shared_ptr)::$_1::operator()() const::'lambda'()&)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2
#36 0x5616c736f05e in std::_Function_handler,
std::shared_ptr)::$_1::operator()()
const::'lambda'()>::_M_invoke(std::_Any_data const&)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9
#37 0x5616aeed6a3b in doris::ThreadPool::dispatch_thread()
/root/doris/be/src/util/threadpool.cpp:543:24
#38 0x5616aeeae4f7 in doris::Thread::supervise_thread(void*)
/root/doris/be/src/util/thread.cpp:498:5
#39 0x7f7e663e3ac2 in start_thread nptl/pthread_create.c:442:8
#40 0x7f7e6647584f misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior
/root/doris/be/src/vec/common/string_ref.h:271:54 in
```
---
be/src/exprs/hybrid_set.h | 35 ++++++++++++++++++++++
be/src/vec/functions/in.h | 2 +-
.../data/nereids_syntax_p0/inpredicate.out | 9 ++++++
.../suites/nereids_syntax_p0/inpredicate.groovy | 16 ++++++++++
4 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h
index b75cc81ebf1..f0977a652b1 100644
--- a/be/src/exprs/hybrid_set.h
+++ b/be/src/exprs/hybrid_set.h
@@ -17,7 +17,13 @@
#pragma once
+#include <glog/logging.h>
+
+#include <type_traits>
+
+#include "common/exception.h"
#include "common/object_pool.h"
+#include "common/status.h"
#include "exprs/runtime_filter.h"
#include "runtime/decimalv2_value.h"
#include "runtime/define_primitive_type.h"
@@ -60,8 +66,16 @@ public:
}
}
+ void check_size() {
+ if (N != _size) {
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR,
+ "invalid size of FixedContainer<{}>: {}",
N, _size);
+ }
+ }
+
// Use '|' instead of '||' has better performance by test.
ALWAYS_INLINE bool find(const T& value) const {
+ DCHECK_EQ(N, _size);
if constexpr (N == 0) {
return false;
}
@@ -144,6 +158,12 @@ private:
size_t _size {};
};
+template <typename T>
+struct IsFixedContainer : std::false_type {};
+
+template <typename T, size_t N>
+struct IsFixedContainer<FixedContainer<T, N>> : std::true_type {};
+
/**
* Dynamic Container uses phmap::flat_hash_set.
* @tparam T Element Type
@@ -354,6 +374,11 @@ public:
if constexpr (is_nullable) {
null_map_data = null_map->data();
}
+
+ if constexpr (IsFixedContainer<ContainerType>::value) {
+ _set.check_size();
+ }
+
auto* __restrict result_data = results.data();
for (size_t i = 0; i < rows; ++i) {
if constexpr (!is_nullable && !is_negative) {
@@ -507,6 +532,11 @@ public:
if constexpr (is_nullable) {
null_map_data = null_map->data();
}
+
+ if constexpr (IsFixedContainer<ContainerType>::value) {
+ _set.check_size();
+ }
+
auto* __restrict result_data = results.data();
for (size_t i = 0; i < rows; ++i) {
const auto& string_data = col.get_data_at(i).to_string();
@@ -675,6 +705,11 @@ public:
if constexpr (is_nullable) {
null_map_data = null_map->data();
}
+
+ if constexpr (IsFixedContainer<ContainerType>::value) {
+ _set.check_size();
+ }
+
auto* __restrict result_data = results.data();
for (size_t i = 0; i < rows; ++i) {
uint32_t len = offset[i] - offset[i - 1];
diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h
index b25ad8eeb67..9b5c5bb023a 100644
--- a/be/src/vec/functions/in.h
+++ b/be/src/vec/functions/in.h
@@ -114,7 +114,7 @@ public:
context->get_arg_type(0)->type ==
PrimitiveType::TYPE_VARCHAR ||
context->get_arg_type(0)->type ==
PrimitiveType::TYPE_STRING) {
// the StringValue's memory is held by FunctionContext, so we can
use StringValueSet here directly
-
state->hybrid_set.reset(create_string_value_set((size_t)(context->get_num_args()
- 1)));
+
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
} else {
state->hybrid_set.reset(
create_set(context->get_arg_type(0)->type,
get_size_with_out_null(context)));
diff --git a/regression-test/data/nereids_syntax_p0/inpredicate.out
b/regression-test/data/nereids_syntax_p0/inpredicate.out
index cee03178b5c..ac6219c69ce 100644
--- a/regression-test/data/nereids_syntax_p0/inpredicate.out
+++ b/regression-test/data/nereids_syntax_p0/inpredicate.out
@@ -31,3 +31,12 @@
29 Supplier#000000029 VVSymB3fbwaN ARGENTINA4 ARGENTINA
AMERICA 11-773-203-7342
9 Supplier#000000009 ,gJ6K2MKveYxQT IRAN 6 IRAN MIDDLE
EAST 20-338-906-3675
+-- !in_predicate_11 --
+15 Supplier#000000015 DF35PepL5saAK INDIA 0 INDIA ASIA
18-687-542-7601
+
+-- !in_predicate_12 --
+
+-- !in_predicate_13 --
+
+-- !in_predicate_14 --
+
diff --git a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
index 3cdf096519c..bf4ec9787f9 100644
--- a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
+++ b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
@@ -61,5 +61,21 @@ suite("inpredicate") {
order_qt_in_predicate_10 """
SELECT * FROM supplier WHERE s_suppkey not in (15);
"""
+
+ order_qt_in_predicate_11 """
+ SELECT * FROM supplier WHERE s_suppkey in (15, null);
+ """
+
+ order_qt_in_predicate_12 """
+ SELECT * FROM supplier WHERE s_suppkey not in (15, null);
+ """
+
+ order_qt_in_predicate_13 """
+ SELECT * FROM supplier WHERE s_nation in ('PERU', 'ETHIOPIA', null);
+ """
+
+ order_qt_in_predicate_14 """
+ SELECT * FROM supplier WHERE s_nation not in ('PERU', 'ETHIOPIA',
null);
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]