This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new d6ee2ff70d8 [Fix](merge-on-write) Fix duplicate key problem after
adding sequence column for merge-on-write table #39958 (#40015)
d6ee2ff70d8 is described below
commit d6ee2ff70d8ba0f83620bd9b4e66ec7ed4cac787
Author: bobhan1 <[email protected]>
AuthorDate: Thu Aug 29 19:33:34 2024 +0800
[Fix](merge-on-write) Fix duplicate key problem after adding sequence
column for merge-on-write table #39958 (#40015)
---
be/src/olap/rowset/segment_v2/segment.cpp | 16 +++--
be/src/olap/rowset/segment_v2/segment.h | 3 +-
be/src/olap/rowset/segment_v2/segment_writer.cpp | 5 +-
be/src/olap/tablet.cpp | 17 +++---
be/src/olap/tablet.h | 2 +-
be/src/service/point_query_executor.cpp | 6 +-
.../test_mow_enable_sequence_col.out | 16 +++++
.../test_mow_enable_sequence_col.groovy | 71 ++++++++++++++++++++++
8 files changed, 117 insertions(+), 19 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 9d12c772cd3..90ab6b45cf6 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -424,12 +424,13 @@ Status Segment::new_inverted_index_iterator(const
TabletColumn& tablet_column,
return Status::OK();
}
-Status Segment::lookup_row_key(const Slice& key, bool with_seq_col,
RowLocation* row_location) {
+Status Segment::lookup_row_key(const Slice& key, const TabletSchema*
latest_schema,
+ bool with_seq_col, RowLocation* row_location) {
RETURN_IF_ERROR(load_pk_index_and_bf());
- bool has_seq_col = _tablet_schema->has_sequence_col();
+ bool has_seq_col = latest_schema->has_sequence_col();
size_t seq_col_length = 0;
if (has_seq_col) {
- seq_col_length =
_tablet_schema->column(_tablet_schema->sequence_col_idx()).length() + 1;
+ seq_col_length =
latest_schema->column(latest_schema->sequence_col_idx()).length() + 1;
}
Slice key_without_seq =
@@ -464,15 +465,20 @@ Status Segment::lookup_row_key(const Slice& key, bool
with_seq_col, RowLocation*
Slice sought_key =
Slice(index_column->get_data_at(0).data,
index_column->get_data_at(0).size);
+ // user may use "ALTER TABLE tbl ENABLE FEATURE "SEQUENCE_LOAD" WITH
..." to add a hidden sequence column
+ // for a merge-on-write table which doesn't have sequence column, so
`has_seq_col == true` doesn't mean
+ // data in segment has sequence column value
+ bool segment_has_seq_col = _tablet_schema->has_sequence_col();
Slice sought_key_without_seq =
- Slice(sought_key.get_data(), sought_key.get_size() -
seq_col_length);
+ Slice(sought_key.get_data(),
+ sought_key.get_size() - (segment_has_seq_col ?
seq_col_length : 0));
// compare key
if (key_without_seq.compare(sought_key_without_seq) != 0) {
return Status::Error<ErrorCode::KEY_NOT_FOUND>("Can't find key in
the segment");
}
- if (!with_seq_col) {
+ if (!with_seq_col || !segment_has_seq_col) {
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index 34b41843aa1..6223c5f81f2 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -108,7 +108,8 @@ public:
return _pk_index_reader.get();
}
- Status lookup_row_key(const Slice& key, bool with_seq_col, RowLocation*
row_location);
+ Status lookup_row_key(const Slice& key, const TabletSchema* latest_schema,
bool with_seq_col,
+ RowLocation* row_location);
Status read_key_by_rowid(uint32_t row_id, std::string* key);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 64e91be3e9a..a4fa7940918 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -449,8 +449,9 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
RowLocation loc;
// save rowset shared ptr so this rowset wouldn't delete
RowsetSharedPtr rowset;
- auto st = _tablet->lookup_row_key(key, have_input_seq_column,
specified_rowsets, &loc,
- _mow_context->max_version,
segment_caches, &rowset);
+ auto st = _tablet->lookup_row_key(key, _tablet_schema.get(),
have_input_seq_column,
+ specified_rowsets, &loc,
_mow_context->max_version,
+ segment_caches, &rowset);
if (st.is<KEY_NOT_FOUND>()) {
if (_opts.rowset_ctx->partial_update_info->is_strict_mode) {
++num_rows_filtered;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 8a4a510f4c6..b2d5f9c114d 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3041,15 +3041,18 @@ Status Tablet::lookup_row_data(const Slice&
encoded_key, const RowLocation& row_
return Status::OK();
}
-Status Tablet::lookup_row_key(const Slice& encoded_key, bool with_seq_col,
+Status Tablet::lookup_row_key(const Slice& encoded_key, TabletSchema*
latest_schema,
+ bool with_seq_col,
const std::vector<RowsetSharedPtr>&
specified_rowsets,
RowLocation* row_location, uint32_t version,
std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches,
RowsetSharedPtr* rowset) {
SCOPED_BVAR_LATENCY(g_tablet_lookup_rowkey_latency);
size_t seq_col_length = 0;
- if (_schema->has_sequence_col() && with_seq_col) {
- seq_col_length = _schema->column(_schema->sequence_col_idx()).length()
+ 1;
+ // use the latest tablet schema to decide if the tablet has sequence
column currently
+ const TabletSchema* schema = (latest_schema == nullptr ? _schema.get() :
latest_schema);
+ if (schema->has_sequence_col() && with_seq_col) {
+ seq_col_length = schema->column(schema->sequence_col_idx()).length() +
1;
}
Slice key_without_seq = Slice(encoded_key.get_data(),
encoded_key.get_size() - seq_col_length);
RowLocation loc;
@@ -3080,7 +3083,7 @@ Status Tablet::lookup_row_key(const Slice& encoded_key,
bool with_seq_col,
DCHECK_EQ(segments.size(), num_segments);
for (auto id : picked_segments) {
- Status s = segments[id]->lookup_row_key(encoded_key, with_seq_col,
&loc);
+ Status s = segments[id]->lookup_row_key(encoded_key, schema,
with_seq_col, &loc);
if (s.is<KEY_NOT_FOUND>()) {
continue;
}
@@ -3091,7 +3094,7 @@ Status Tablet::lookup_row_key(const Slice& encoded_key,
bool with_seq_col,
{loc.rowset_id, loc.segment_id, version},
loc.row_id)) {
// if has sequence col, we continue to compare the sequence_id
of
// all rowsets, util we find an existing key.
- if (_schema->has_sequence_col()) {
+ if (schema->has_sequence_col()) {
continue;
}
// The key is deleted, we don't need to search for it any more.
@@ -3231,8 +3234,8 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr
rowset,
}
RowsetSharedPtr rowset_find;
- auto st = lookup_row_key(key, true, specified_rowsets, &loc,
dummy_version.first - 1,
- segment_caches, &rowset_find);
+ auto st = lookup_row_key(key, rowset_schema.get(), true,
specified_rowsets, &loc,
+ dummy_version.first - 1, segment_caches,
&rowset_find);
bool expected_st = st.ok() || st.is<KEY_NOT_FOUND>() ||
st.is<KEY_ALREADY_EXISTS>();
// It's a defensive DCHECK, we need to exclude some common errors
to avoid core-dump
// while stress test
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 20e9ae890cc..1fbc8254910 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -433,7 +433,7 @@ public:
// Lookup the row location of `encoded_key`, the function sets
`row_location` on success.
// NOTE: the method only works in unique key model with primary key index,
you will got a
// not supported error in other data model.
- Status lookup_row_key(const Slice& encoded_key, bool with_seq_col,
+ Status lookup_row_key(const Slice& encoded_key, TabletSchema*
latest_schema, bool with_seq_col,
const std::vector<RowsetSharedPtr>&
specified_rowsets,
RowLocation* row_location, uint32_t version,
std::vector<std::unique_ptr<SegmentCacheHandle>>&
segment_caches,
diff --git a/be/src/service/point_query_executor.cpp
b/be/src/service/point_query_executor.cpp
index 9017b10aef8..6402909f5cf 100644
--- a/be/src/service/point_query_executor.cpp
+++ b/be/src/service/point_query_executor.cpp
@@ -298,9 +298,9 @@ Status PointQueryExecutor::_lookup_row_key() {
}
// Get rowlocation and rowset, ctx._rowset_ptr will acquire wrap this
ptr
auto rowset_ptr = std::make_unique<RowsetSharedPtr>();
- st = (_tablet->lookup_row_key(_row_read_ctxs[i]._primary_key, false,
specified_rowsets,
- &location, INT32_MAX /*rethink?*/,
segment_caches,
- rowset_ptr.get()));
+ st = (_tablet->lookup_row_key(_row_read_ctxs[i]._primary_key, nullptr,
false,
+ specified_rowsets, &location, INT32_MAX
/*rethink?*/,
+ segment_caches, rowset_ptr.get()));
if (st.is<ErrorCode::KEY_NOT_FOUND>()) {
continue;
}
diff --git
a/regression-test/data/unique_with_mow_p0/test_mow_enable_sequence_col.out
b/regression-test/data/unique_with_mow_p0/test_mow_enable_sequence_col.out
new file mode 100644
index 00000000000..d99510cfbcb
--- /dev/null
+++ b/regression-test/data/unique_with_mow_p0/test_mow_enable_sequence_col.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+111 aaa bbb 11
+222 bbb bbb 11
+333 ccc ddd 11
+
+-- !sql --
+111 aaa bbb 11 \N 0 2
+222 bbb bbb 11 \N 0 3
+333 ccc ddd 11 \N 0 4
+
+-- !sql --
+111 zzz yyy 100 99 0 5
+222 xxx www 400 99 0 8
+333 ccc ddd 11 \N 0 4
+
diff --git
a/regression-test/suites/unique_with_mow_p0/test_mow_enable_sequence_col.groovy
b/regression-test/suites/unique_with_mow_p0/test_mow_enable_sequence_col.groovy
new file mode 100644
index 00000000000..ada3a55f042
--- /dev/null
+++
b/regression-test/suites/unique_with_mow_p0/test_mow_enable_sequence_col.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_mow_enable_sequence_col") {
+
+ def tableName = "test_mow_enable_sequence_col"
+ sql """ DROP TABLE IF EXISTS ${tableName} force;"""
+ sql """CREATE TABLE IF NOT EXISTS ${tableName}
+ (`user_id` BIGINT NOT NULL,
+ `username` VARCHAR(50) NOT NULL,
+ `city` VARCHAR(20),
+ `age` SMALLINT)
+ UNIQUE KEY(`user_id`)
+ DISTRIBUTED BY HASH(`user_id`) BUCKETS 1
+ PROPERTIES (
+ "disable_auto_compaction" = "true",
+ "replication_allocation" = "tag.location.default: 1",
+ "enable_unique_key_merge_on_write" = "true");"""
+
+ sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`)
VALUES(111,'aaa','bbb',11);"""
+ sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`)
VALUES(222,'bbb','bbb',11);"""
+ sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`)
VALUES(333,'ccc','ddd',11);"""
+ order_qt_sql "select * from ${tableName};"
+
+ sql "set show_hidden_columns = true;"
+ sql "sync;"
+ def res = sql "desc ${tableName} all;"
+ assertTrue(!res.toString().contains("__DORIS_SEQUENCE_COL__"))
+ sql "set show_hidden_columns = false;"
+ sql "sync;"
+
+ def doSchemaChange = { cmd ->
+ sql cmd
+ waitForSchemaChangeDone {
+ sql """SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}'
ORDER BY createtime DESC LIMIT 1"""
+ time 2000
+ }
+ }
+ doSchemaChange """ALTER TABLE ${tableName} ENABLE FEATURE "SEQUENCE_LOAD"
WITH PROPERTIES ("function_column.sequence_type" = "bigint");"""
+
+ sql "set show_hidden_columns = true;"
+ sql "sync;"
+ res = sql "desc ${tableName} all;"
+ assertTrue(res.toString().contains("__DORIS_SEQUENCE_COL__"))
+ order_qt_sql "select * from ${tableName};"
+ sql "set show_hidden_columns = false;"
+ sql "sync;"
+
+ sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`,
`__DORIS_SEQUENCE_COL__`) VALUES(111,'zzz','yyy',100,99);"""
+ sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`,
`__DORIS_SEQUENCE_COL__`) VALUES(111,'hhh','mmm',200,88);"""
+ sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`,
`__DORIS_SEQUENCE_COL__`) VALUES(222,'qqq','ppp',300,77);"""
+ sql """insert into ${tableName}(`user_id`,`username`,`city`,`age`,
`__DORIS_SEQUENCE_COL__`) VALUES(222,'xxx','www',400,99);"""
+
+ sql "set show_hidden_columns = true;"
+ sql "sync;"
+ order_qt_sql "select * from ${tableName};"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]