This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 420a91d0e1c [fix](merge-on-write) incorrect result caused by key range
filter with pk (#31456)
420a91d0e1c is described below
commit 420a91d0e1c1f8177a152b08e56e9b225a293515
Author: Xin Liao <[email protected]>
AuthorDate: Wed Feb 28 16:34:24 2024 +0800
[fix](merge-on-write) incorrect result caused by key range filter with pk
(#31456)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +--
be/src/util/key_util.h | 17 ++++-----
.../test_primary_key_simple_case.out | 22 ++++++++++++
.../test_primary_key_simple_case.groovy | 42 ++++++++++++++++++++++
4 files changed, 74 insertions(+), 11 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f1cb1f2c61c..327a002e529 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1373,10 +1373,8 @@ Status
SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
DCHECK(pk_index_reader != nullptr);
std::string index_key;
- // when is_include is false, we shoudle append KEY_NORMAL_MARKER to the
- // encode key. Otherwise, we will get an incorrect upper bound.
encode_key_with_padding<RowCursor, true>(
- &index_key, key, _segment->_tablet_schema->num_key_columns(),
is_include, true);
+ &index_key, key, _segment->_tablet_schema->num_key_columns(),
is_include);
if (index_key < _segment->min_key()) {
*rowid = 0;
return Status::OK();
diff --git a/be/src/util/key_util.h b/be/src/util/key_util.h
index 0dbaa397101..fd57566fa4f 100644
--- a/be/src/util/key_util.h
+++ b/be/src/util/key_util.h
@@ -50,6 +50,8 @@ constexpr uint8_t KEY_NULL_FIRST_MARKER = 0x01;
constexpr uint8_t KEY_NORMAL_MARKER = 0x02;
// Used to represent maximal value for that field
constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
+// Used to represent a value greater than the normal marker by 1, using by MoW
+constexpr uint8_t KEY_NORMAL_NEXT_MARKER = 0x03;
// Encode one row into binary according given num_keys.
// A cell will be encoded in the format of a marker and encoded content.
@@ -57,21 +59,20 @@ constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
// fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER
will
// be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added.
// If all num_keys are found in row, no marker will be added.
-// if padding_minimal is false and padding_normal_marker is true,
-// KEY_NORMAL_MARKER will be added.
-template <typename RowType, bool full_encode = false>
+template <typename RowType, bool is_mow = false>
void encode_key_with_padding(std::string* buf, const RowType& row, size_t
num_keys,
- bool padding_minimal, bool padding_normal_marker
= false) {
+ bool padding_minimal) {
for (auto cid = 0; cid < num_keys; cid++) {
auto field = row.schema()->column(cid);
if (field == nullptr) {
if (padding_minimal) {
buf->push_back(KEY_MINIMAL_MARKER);
} else {
- if (padding_normal_marker) {
- buf->push_back(KEY_NORMAL_MARKER);
+ if (is_mow) {
+ buf->push_back(KEY_NORMAL_NEXT_MARKER);
+ } else {
+ buf->push_back(KEY_MAXIMAL_MARKER);
}
- buf->push_back(KEY_MAXIMAL_MARKER);
}
break;
}
@@ -82,7 +83,7 @@ void encode_key_with_padding(std::string* buf, const RowType&
row, size_t num_ke
continue;
}
buf->push_back(KEY_NORMAL_MARKER);
- if (full_encode) {
+ if (is_mow) {
field->full_encode_ascending(cell.cell_ptr(), buf);
} else {
field->encode_ascending(cell.cell_ptr(), buf);
diff --git
a/regression-test/data/unique_with_mow_p0/test_primary_key_simple_case.out
b/regression-test/data/unique_with_mow_p0/test_primary_key_simple_case.out
new file mode 100644
index 00000000000..d82fe80fdea
--- /dev/null
+++ b/regression-test/data/unique_with_mow_p0/test_primary_key_simple_case.out
@@ -0,0 +1,22 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !pk_key_range --
+2024-02-18 \N -4
+2024-02-18 \N 9
+2024-02-18 -10 -10
+2024-02-18 -10 -10
+2024-02-18 -10 -4
+2024-02-18 -10 5
+2024-02-18 -10 9
+2024-02-18 -4 -10
+2024-02-18 -4 -10
+2024-02-18 0 4
+2024-02-18 0 5
+2024-02-18 0 6
+2024-02-18 1 6
+2024-02-18 2 9
+2024-02-18 3 9
+2024-02-18 5 4
+2024-02-18 8 2
+2024-02-18 9 1
+2024-02-18 9 9
+
diff --git
a/regression-test/suites/unique_with_mow_p0/test_primary_key_simple_case.groovy
b/regression-test/suites/unique_with_mow_p0/test_primary_key_simple_case.groovy
index fafd2de3be6..a2e351372e3 100644
---
a/regression-test/suites/unique_with_mow_p0/test_primary_key_simple_case.groovy
+++
b/regression-test/suites/unique_with_mow_p0/test_primary_key_simple_case.groovy
@@ -108,4 +108,46 @@ suite("test_primary_key_simple_case") {
result = sql """ SELECT * FROM ${tableName} t ORDER BY user_id; """
assertTrue(result.size() == 7)
assertTrue(result[6][10] == 25)
+
+ sql """ DROP TABLE IF EXISTS test_unique_key_range_tbl """
+ sql """
+ create table test_unique_key_range_tbl (
+ k1 date not null,
+ k2 bigint not null,
+ v1 int null,
+ v2 int not null
+ ) UNIQUE KEY(`k1`, `k2`)
+ DISTRIBUTED BY HASH(`k2`) BUCKETS 30
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "enable_unique_key_merge_on_write" = "true"
+ );
+ """
+
+ sql """
+ insert into test_unique_key_range_tbl values
+ ( '2024-02-18' , -7822995176885966013 , -10 , -4 ),
+ ( '2024-02-18' , -5987215688096912139 , 8 , 2 ),
+ ( '2024-02-18' , -5889932400568797810 , -10 , -10 ),
+ ( '2024-02-18' , -5051784705055344649 , 1 , 6 ),
+ ( '2024-02-18' , -4635608137995832373 , 3 , 9 ),
+ ( '2024-02-18' , -3836821172182966892 , -10 , -10 ),
+ ( '2024-02-18' , -3675645188438967877 , NULL , -4 ),
+ ( '2024-02-18' , -3363157164254363034 , 5 , 4 ),
+ ( '2024-02-18' , -849169574767655353 , -4 , -10 ),
+ ( '2024-02-18' , -293023807696575395 , NULL , 9 ),
+ ( '2024-02-18' , 1167104788249072527 , 0 , 4 ),
+ ( '2024-02-18' , 1660707941299238025 , 9 , 9 ),
+ ( '2024-02-18' , 2852819493813807984 , 0 , 6 ),
+ ( '2024-02-18' , 5444305694667795860 , 9 , 1 ),
+ ( '2024-02-18' , 6136152292926889790 , 2 , 9 ),
+ ( '2024-02-18' , 6538123407677174537 , -4 , -10 ),
+ ( '2024-02-18' , 7958269158967938474 , -10 , 9 ),
+ ( '2024-02-18' , 9019386549208004184 , -10 , 5 ),
+ ( '2024-02-18' , 9208781524087970597 , 0 , 5 );
+ """
+
+ qt_pk_key_range """
+ select k1, v1, v2 from test_unique_key_range_tbl where k1 =
'2024-02-18' order by 1, 2, 3;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]