This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new dc49270319c branch-3.0: [Fix](mow) Should use rowset's schema when
calculate delete bitmaps between segments (#54351) (#54368)
dc49270319c is described below
commit dc49270319c8c1d4260a64057204820eb64632b0
Author: bobhan1 <[email protected]>
AuthorDate: Tue Aug 12 10:23:14 2025 +0800
branch-3.0: [Fix](mow) Should use rowset's schema when calculate delete
bitmaps between segments (#54351) (#54368)
pick https://github.com/apache/doris/pull/54351
---
.../cloud/cloud_engine_calc_delete_bitmap_task.cpp | 4 +-
be/src/olap/base_tablet.cpp | 16 ++--
be/src/olap/base_tablet.h | 3 +-
be/src/olap/calc_delete_bitmap_executor.cpp | 6 +-
be/src/olap/calc_delete_bitmap_executor.h | 2 +-
be/src/olap/rowset_builder.cpp | 8 +-
be/src/olap/txn_manager.cpp | 3 +-
be/src/service/backend_service.cpp | 4 +-
.../test_mow_alter_seq_multi_segments.out | Bin 0 -> 154 bytes
.../test_mow_alter_seq_multi_segments.groovy | 94 +++++++++++++++++++++
10 files changed, 119 insertions(+), 21 deletions(-)
diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
index dc8ecaf26b4..67dac3f7052 100644
--- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
+++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
@@ -272,8 +272,8 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const {
LOG_INFO("inject error when
CloudTabletCalcDeleteBitmapTask::_handle_rowset");
return Status::MemoryLimitExceeded("injected
MemoryLimitExceeded error");
});
-
RETURN_IF_ERROR(tablet->calc_delete_bitmap_between_segments(rowset->rowset_id(),
-
segments, delete_bitmap));
+ RETURN_IF_ERROR(tablet->calc_delete_bitmap_between_segments(
+ rowset->tablet_schema(), rowset->rowset_id(), segments,
delete_bitmap));
}
status = CloudTablet::update_delete_bitmap(tablet, &txn_info,
_transaction_id,
txn_expiration);
diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp
index 6738a9c41c2..49d1f2851f0 100644
--- a/be/src/olap/base_tablet.cpp
+++ b/be/src/olap/base_tablet.cpp
@@ -368,8 +368,8 @@ void
BaseTablet::generate_tablet_meta_copy_unlocked(TabletMeta& new_tablet_meta)
}
Status BaseTablet::calc_delete_bitmap_between_segments(
- const RowsetId& rowset_id, const
std::vector<segment_v2::SegmentSharedPtr>& segments,
- DeleteBitmapPtr delete_bitmap) {
+ TabletSchemaSPtr schema, const RowsetId& rowset_id,
+ const std::vector<segment_v2::SegmentSharedPtr>& segments,
DeleteBitmapPtr delete_bitmap) {
size_t const num_segments = segments.size();
if (num_segments < 2) {
return Status::OK();
@@ -377,12 +377,12 @@ Status BaseTablet::calc_delete_bitmap_between_segments(
OlapStopWatch watch;
size_t seq_col_length = 0;
- if (_tablet_meta->tablet_schema()->has_sequence_col()) {
- auto seq_col_idx = _tablet_meta->tablet_schema()->sequence_col_idx();
- seq_col_length =
_tablet_meta->tablet_schema()->column(seq_col_idx).length() + 1;
+ if (schema->has_sequence_col()) {
+ auto seq_col_idx = schema->sequence_col_idx();
+ seq_col_length = schema->column(seq_col_idx).length() + 1;
}
size_t rowid_length = 0;
- if (!_tablet_meta->tablet_schema()->cluster_key_idxes().empty()) {
+ if (!schema->cluster_key_idxes().empty()) {
rowid_length = PrimaryKeyIndexReader::ROW_ID_LENGTH;
}
@@ -1499,8 +1499,8 @@ Status BaseTablet::update_delete_bitmap_without_lock(
// calculate delete bitmap between segments if necessary.
DeleteBitmapPtr delete_bitmap =
std::make_shared<DeleteBitmap>(self->tablet_id());
-
RETURN_IF_ERROR(self->calc_delete_bitmap_between_segments(rowset->rowset_id(),
segments,
- delete_bitmap));
+ RETURN_IF_ERROR(self->calc_delete_bitmap_between_segments(
+ rowset->tablet_schema(), rowset->rowset_id(), segments,
delete_bitmap));
// get all base rowsets to calculate on
std::vector<RowsetSharedPtr> specified_rowsets;
diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h
index 6c797e0478b..4779600364d 100644
--- a/be/src/olap/base_tablet.h
+++ b/be/src/olap/base_tablet.h
@@ -186,7 +186,8 @@ public:
RowsetWriter* rowset_writer);
Status calc_delete_bitmap_between_segments(
- const RowsetId& rowset_id, const
std::vector<segment_v2::SegmentSharedPtr>& segments,
+ TabletSchemaSPtr schema, const RowsetId& rowset_id,
+ const std::vector<segment_v2::SegmentSharedPtr>& segments,
DeleteBitmapPtr delete_bitmap);
static Status commit_phase_update_delete_bitmap(
diff --git a/be/src/olap/calc_delete_bitmap_executor.cpp
b/be/src/olap/calc_delete_bitmap_executor.cpp
index 89e668c4c94..0cb6b5c0078 100644
--- a/be/src/olap/calc_delete_bitmap_executor.cpp
+++ b/be/src/olap/calc_delete_bitmap_executor.cpp
@@ -58,7 +58,8 @@ Status CalcDeleteBitmapToken::submit(BaseTabletSPtr tablet,
RowsetSharedPtr cur_
});
}
-Status CalcDeleteBitmapToken::submit(BaseTabletSPtr tablet, RowsetId rowset_id,
+Status CalcDeleteBitmapToken::submit(BaseTabletSPtr tablet, TabletSchemaSPtr
schema,
+ RowsetId rowset_id,
const
std::vector<segment_v2::SegmentSharedPtr>& segments,
DeleteBitmapPtr delete_bitmap) {
{
@@ -68,7 +69,8 @@ Status CalcDeleteBitmapToken::submit(BaseTabletSPtr tablet,
RowsetId rowset_id,
}
return _thread_token->submit_func([=, this]() {
SCOPED_ATTACH_TASK(_query_thread_context);
- auto st = tablet->calc_delete_bitmap_between_segments(rowset_id,
segments, delete_bitmap);
+ auto st = tablet->calc_delete_bitmap_between_segments(schema,
rowset_id, segments,
+ delete_bitmap);
if (!st.ok()) {
LOG(WARNING) << "failed to calc delete bitmap between segments,
tablet_id: "
<< tablet->tablet_id() << " rowset: " << rowset_id
diff --git a/be/src/olap/calc_delete_bitmap_executor.h
b/be/src/olap/calc_delete_bitmap_executor.h
index 5d471e2587e..4dccdf00143 100644
--- a/be/src/olap/calc_delete_bitmap_executor.h
+++ b/be/src/olap/calc_delete_bitmap_executor.h
@@ -56,7 +56,7 @@ public:
DeleteBitmapPtr delete_bitmap, RowsetWriter* rowset_writer);
// calculate delete bitmap between `segments`
- Status submit(BaseTabletSPtr tablet, RowsetId rowset_id,
+ Status submit(BaseTabletSPtr tablet, TabletSchemaSPtr schema, RowsetId
rowset_id,
const std::vector<segment_v2::SegmentSharedPtr>& segments,
DeleteBitmapPtr delete_bitmap);
diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp
index e9b518eaae0..dfe4da0d55f 100644
--- a/be/src/olap/rowset_builder.cpp
+++ b/be/src/olap/rowset_builder.cpp
@@ -271,11 +271,11 @@ Status
BaseRowsetBuilder::submit_calc_delete_bitmap_task() {
if (segments.size() > 1) {
// calculate delete bitmap between segments
if (config::enable_calc_delete_bitmap_between_segments_concurrently) {
- RETURN_IF_ERROR(_calc_delete_bitmap_token->submit(_tablet,
_rowset->rowset_id(),
- segments,
_delete_bitmap));
+ RETURN_IF_ERROR(_calc_delete_bitmap_token->submit(
+ _tablet, _tablet_schema, _rowset->rowset_id(), segments,
_delete_bitmap));
} else {
-
RETURN_IF_ERROR(_tablet->calc_delete_bitmap_between_segments(_rowset->rowset_id(),
-
segments, _delete_bitmap));
+ RETURN_IF_ERROR(_tablet->calc_delete_bitmap_between_segments(
+ _tablet_schema, _rowset->rowset_id(), segments,
_delete_bitmap));
}
}
diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp
index f47bd776fac..718df6c0c09 100644
--- a/be/src/olap/txn_manager.cpp
+++ b/be/src/olap/txn_manager.cpp
@@ -549,7 +549,8 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId
partition_id,
std::vector<segment_v2::SegmentSharedPtr> segments;
RETURN_IF_ERROR(std::static_pointer_cast<BetaRowset>(rowset)->load_segments(&segments));
RETURN_IF_ERROR(tablet->calc_delete_bitmap_between_segments(
- rowset->rowset_id(), segments,
tablet_txn_info->delete_bitmap));
+ rowset->tablet_schema(), rowset->rowset_id(), segments,
+ tablet_txn_info->delete_bitmap));
}
RETURN_IF_ERROR(
diff --git a/be/src/service/backend_service.cpp
b/be/src/service/backend_service.cpp
index 116d4951723..7be7642e753 100644
--- a/be/src/service/backend_service.cpp
+++ b/be/src/service/backend_service.cpp
@@ -591,8 +591,8 @@ void _ingest_binlog(StorageEngine& engine, IngestBinlogArg*
arg) {
}
if (segments.size() > 1) {
// calculate delete bitmap between segments
- status =
local_tablet->calc_delete_bitmap_between_segments(rowset->rowset_id(),
-
segments, delete_bitmap);
+ status = local_tablet->calc_delete_bitmap_between_segments(
+ rowset->tablet_schema(), rowset->rowset_id(), segments,
delete_bitmap);
if (!status) {
LOG(WARNING) << "failed to calculate delete bitmap"
<< ". tablet_id: " << local_tablet->tablet_id()
diff --git
a/regression-test/data/fault_injection_p0/test_mow_alter_seq_multi_segments.out
b/regression-test/data/fault_injection_p0/test_mow_alter_seq_multi_segments.out
new file mode 100644
index 00000000000..aa44268bf31
Binary files /dev/null and
b/regression-test/data/fault_injection_p0/test_mow_alter_seq_multi_segments.out
differ
diff --git
a/regression-test/suites/fault_injection_p0/test_mow_alter_seq_multi_segments.groovy
b/regression-test/suites/fault_injection_p0/test_mow_alter_seq_multi_segments.groovy
new file mode 100644
index 00000000000..d6b17bd5c73
--- /dev/null
+++
b/regression-test/suites/fault_injection_p0/test_mow_alter_seq_multi_segments.groovy
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_mow_alter_seq_multi_segments", "nonConcurrent") {
+ def table1 = "test_mow_alter_seq_multi_segments"
+ sql "DROP TABLE IF EXISTS ${table1} FORCE;"
+ sql """ CREATE TABLE IF NOT EXISTS ${table1} (
+ `k1` int NOT NULL,
+ `c1` int,
+ `c2` int
+ )UNIQUE KEY(k1)
+ DISTRIBUTED BY HASH(k1) BUCKETS 1
+ PROPERTIES (
+ "enable_unique_key_merge_on_write" = "true",
+ "disable_auto_compaction" = "true",
+ "replication_num" = "1"); """
+
+ sql """insert into ${table1} values(1,1,1);"""
+ qt_sql "select * from ${table1} order by k1;"
+ sql """alter table ${table1} ENABLE FEATURE "SEQUENCE_LOAD" WITH
PROPERTIES ("function_column.sequence_type"="int");"""
+
+ // to cause multi segments and segment compaction
+ def customBeConfig = [
+ doris_scanner_row_bytes : 1
+ ]
+
+ setBeConfigTemporary(customBeConfig) {
+ try {
+ GetDebugPoint().clearDebugPointsForAllBEs()
+ GetDebugPoint().clearDebugPointsForAllFEs()
+ // batch_size is 4164 in csv_reader.cpp
+ // _batch_size is 8192 in vtablet_writer.cpp
+ // to cause multi segments
+ GetDebugPoint().enableDebugPointForAllBEs("MemTable.need_flush")
+
+ Thread.sleep(1000)
+
+ int rows = 4064
+ // load data that will have multi segments and there are duplicate
keys between segments
+ String content = ""
+ (1..rows).each {
+ int x = it
+ content += "${x},${x},${x},1\n"
+ }
+ (1..rows).each {
+ int x = it
+ content += "${x},${x},${x},2\n"
+ }
+ def t1 = Thread.start {
+ streamLoad {
+ table "${table1}"
+ set 'column_separator', ','
+ set 'columns', 'k1,c1,c2,seq'
+ set 'function_column.sequence_col', 'seq'
+ inputStream new ByteArrayInputStream(content.getBytes())
+ time 30000
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ def json = parseJson(result)
+ assert "success" == json.Status.toLowerCase()
+ }
+ }
+ }
+ t1.join()
+ qt_sql "select count() from ${table1};"
+ // qt_sql "select *,__DORIS_VERSION_COL__ as ver,
__DORIS_DELETE_SIGN__ as del,__DORIS_SEQUENCE_COL__ as seq from ${table1} where
k1<=10 order by k1,__DORIS_VERSION_COL__;"
+ sql "set disable_nereids_rules='ELIMINATE_GROUP_BY';"
+ qt_dup_key_count "select count() from (select k1,count() as cnt
from ${table1} group by k1 having cnt > 1) A;"
+ } catch(Exception e) {
+ logger.info(e.getMessage())
+ throw e
+ } finally {
+ GetDebugPoint().clearDebugPointsForAllBEs()
+ GetDebugPoint().clearDebugPointsForAllFEs()
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]