This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new eff1564cbc6 branch-3.0: [Opt](cloud) Add some injection points for mow
(#47712) (#47849)
eff1564cbc6 is described below
commit eff1564cbc6867375e55f59ef10eb956200e4c5d
Author: bobhan1 <[email protected]>
AuthorDate: Tue Feb 18 16:28:26 2025 +0800
branch-3.0: [Opt](cloud) Add some injection points for mow (#47712) (#47849)
pick https://github.com/apache/doris/pull/47712
---
.../cloud/cloud_engine_calc_delete_bitmap_task.cpp | 14 +++
be/src/olap/base_tablet.cpp | 14 +++
.../cloud/test_cloud_mow_correctness_inject.out | Bin 0 -> 185 bytes
.../cloud/test_cloud_mow_correctness_inject.groovy | 94 +++++++++++++++++++++
.../test_cloud_mow_partial_update_retry.groovy | 2 +-
..._mow_stale_resp_load_compaction_conflict.groovy | 2 +-
..._cloud_mow_stale_resp_load_load_conflict.groovy | 2 +-
7 files changed, 125 insertions(+), 3 deletions(-)
diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
index 9de00993117..e85b160cf2f 100644
--- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
+++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
@@ -20,6 +20,8 @@
#include <fmt/format.h>
#include <memory>
+#include <random>
+#include <thread>
#include "cloud/cloud_meta_mgr.h"
#include "cloud/cloud_tablet.h"
@@ -208,6 +210,18 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const {
}
int64_t t3 = MonotonicMicros();
+ DBUG_EXECUTE_IF("CloudEngineCalcDeleteBitmapTask.handle.inject_sleep", {
+ auto p = dp->param("percent", 0.01);
+ // 100s > Config.calculate_delete_bitmap_task_timeout_seconds = 60s
+ auto sleep_time = dp->param("sleep", 100);
+ std::mt19937 gen {std::random_device {}()};
+ std::bernoulli_distribution inject_fault {p};
+ if (inject_fault(gen)) {
+ LOG_INFO("injection sleep for {} seconds, txn={}, tablet_id={}",
sleep_time,
+ _transaction_id, _tablet_id);
+ std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
+ }
+ });
rowset->set_version(Version(_version, _version));
TabletTxnInfo txn_info;
txn_info.rowset = rowset;
diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp
index ff01a898a31..25398e84346 100644
--- a/be/src/olap/base_tablet.cpp
+++ b/be/src/olap/base_tablet.cpp
@@ -20,6 +20,8 @@
#include <fmt/format.h>
#include <rapidjson/prettywriter.h>
+#include <random>
+
#include "common/status.h"
#include "olap/calc_delete_bitmap_executor.h"
#include "olap/delete_bitmap_calculator.h"
@@ -661,6 +663,18 @@ Status
BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,
continue;
}
+
DBUG_EXECUTE_IF("BaseTablet::calc_segment_delete_bitmap.inject_err", {
+ auto p = dp->param("percent", 0.01);
+ std::mt19937 gen {std::random_device {}()};
+ std::bernoulli_distribution inject_fault {p};
+ if (inject_fault(gen)) {
+ return Status::InternalError(
+ "injection error in calc_segment_delete_bitmap, "
+ "tablet_id={}, rowset_id={}",
+ tablet_id(), rowset_id.to_string());
+ }
+ });
+
RowsetSharedPtr rowset_find;
auto st = lookup_row_key(key, rowset_schema.get(), true,
specified_rowsets, &loc,
dummy_version.first - 1, segment_caches,
&rowset_find);
diff --git
a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
new file mode 100644
index 00000000000..79839efff32
Binary files /dev/null and
b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
differ
diff --git
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
new file mode 100644
index 00000000000..3c6ce3e8294
--- /dev/null
+++
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.junit.Assert
+import java.util.concurrent.TimeUnit
+import org.awaitility.Awaitility
+
+// test cases to ensure that inject points for mow correctness work as expected
+suite("test_cloud_mow_correctness_inject", "nonConcurrent") {
+ if (!isCloudMode()) {
+ return
+ }
+
+ GetDebugPoint().clearDebugPointsForAllFEs()
+ GetDebugPoint().clearDebugPointsForAllBEs()
+
+ def table1 = "test_cloud_mow_correctness_inject"
+ sql "DROP TABLE IF EXISTS ${table1} FORCE;"
+ sql """ CREATE TABLE IF NOT EXISTS ${table1} (
+ `k1` int NOT NULL,
+ `c1` int,
+ `c2` int
+ )UNIQUE KEY(k1)
+ DISTRIBUTED BY HASH(k1) BUCKETS 1
+ PROPERTIES (
+ "enable_mow_light_delete" = "false",
+ "enable_unique_key_merge_on_write" = "true",
+ "disable_auto_compaction" = "true",
+ "replication_num" = "1"); """
+
+ sql "insert into ${table1} values(1,1,1);"
+ sql "insert into ${table1} values(2,2,2);"
+ sql "insert into ${table1} values(3,3,3);"
+ sql "sync;"
+ qt_sql "select * from ${table1} order by k1;"
+
+ def customFeConfig = [
+ delete_bitmap_lock_expiration_seconds : 10,
+ calculate_delete_bitmap_task_timeout_seconds : 2,
+ mow_calculate_delete_bitmap_retry_times : 3
+ ]
+
+ setFeConfigTemporary(customFeConfig) {
+ try {
+ // 3 * 2s < 10s
+
GetDebugPoint().enableDebugPointForAllBEs("CloudEngineCalcDeleteBitmapTask.handle.inject_sleep",
[percent: "1.0", sleep: "10"])
+
+ test {
+ sql "insert into ${table1} values(4,4,4);"
+ exception "Failed to calculate delete bitmap. Timeout."
+ }
+
+ qt_sql "select * from ${table1} order by k1;"
+
+ } catch(Exception e) {
+ logger.info(e.getMessage())
+ throw e
+ } finally {
+ GetDebugPoint().clearDebugPointsForAllBEs()
+ }
+
+
+ try {
+
GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::calc_segment_delete_bitmap.inject_err",
[percent: "1.0"])
+
+ test {
+ sql "insert into ${table1} values(5,5,5);"
+ exception "injection error"
+ }
+
+ qt_sql "select * from ${table1} order by k1;"
+ } catch(Exception e) {
+ logger.info(e.getMessage())
+ throw e
+ } finally {
+ GetDebugPoint().clearDebugPointsForAllBEs()
+ }
+
+ }
+}
diff --git
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy
index 13abaf1ffca..4f091bef8ea 100644
---
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy
+++
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy
@@ -56,7 +56,7 @@ suite("test_cloud_mow_partial_update_retry", "nonConcurrent")
{
GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block",
[wait_token: "token1"])
// the first load
- t1 = Thread.start {
+ def t1 = Thread.start {
sql "set enable_unique_key_partial_update=true;"
sql "sync;"
sql "insert into ${table1}(k1,c1) values(1,999),(2,666);"
diff --git
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy
index 7b867088d1b..b380567bf54 100644
---
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy
+++
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy
@@ -74,7 +74,7 @@ suite("test_cloud_mow_stale_resp_load_compaction_conflict",
"nonConcurrent") {
GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block",
[wait_token: "token1"])
// the first load
- t1 = Thread.start {
+ def t1 = Thread.start {
sql "insert into ${table1} values(1,999,999),(2,888,888);"
}
diff --git
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy
index 377ff70cf21..faafb6b8482 100644
---
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy
+++
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy
@@ -55,7 +55,7 @@ suite("test_cloud_mow_stale_resp_load_load_conflict",
"nonConcurrent") {
GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block",
[wait_token: "token1"])
// the first load
- t1 = Thread.start {
+ def t1 = Thread.start {
sql "insert into ${table1} values(1,999,999),(2,888,888);"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]