This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 01a6423da01 [enhance](test) Add injection points to block FE/BE
(#59165)
01a6423da01 is described below
commit 01a6423da01f201c51d9f404efd86856453977cd
Author: Luwei <[email protected]>
AuthorDate: Mon Jan 12 14:28:49 2026 +0800
[enhance](test) Add injection points to block FE/BE (#59165)
---
be/src/cloud/cloud_stream_load_executor.cpp | 5 +++++
be/src/cloud/cloud_tablet_mgr.cpp | 2 ++
be/src/cloud/injection_point_action.cpp | 16 ++++++++++++++
.../main/java/org/apache/doris/catalog/Env.java | 25 +++++++++++++++++++++-
.../transaction/CloudGlobalTransactionMgr.java | 1 +
.../apache/doris/httpv2/rest/MetricsAction.java | 2 ++
.../doris/transaction/DatabaseTransactionMgr.java | 1 +
7 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/be/src/cloud/cloud_stream_load_executor.cpp
b/be/src/cloud/cloud_stream_load_executor.cpp
index e982acea923..760a4f46cf3 100644
--- a/be/src/cloud/cloud_stream_load_executor.cpp
+++ b/be/src/cloud/cloud_stream_load_executor.cpp
@@ -104,6 +104,11 @@ Status
CloudStreamLoadExecutor::operate_txn_2pc(StreamLoadContext* ctx) {
Status CloudStreamLoadExecutor::commit_txn(StreamLoadContext* ctx) {
DBUG_EXECUTE_IF("StreamLoadExecutor.commit_txn.block", DBUG_BLOCK);
+ DBUG_EXECUTE_IF("StreamLoadExecutor.commit_txn.crash", {
+ LOG(INFO) << "debug point " << DP_NAME << " trigger crash";
+ volatile int* p = nullptr;
+ *p = 1;
+ });
// forward to fe to excute commit transaction for MoW table
if (ctx->is_mow_table() || !config::enable_stream_load_commit_txn_on_be ||
ctx->load_type == TLoadType::ROUTINE_LOAD) {
diff --git a/be/src/cloud/cloud_tablet_mgr.cpp
b/be/src/cloud/cloud_tablet_mgr.cpp
index 854bb29315f..a3b6855ae92 100644
--- a/be/src/cloud/cloud_tablet_mgr.cpp
+++ b/be/src/cloud/cloud_tablet_mgr.cpp
@@ -29,6 +29,7 @@
#include "common/status.h"
#include "olap/lru_cache.h"
#include "runtime/memory/cache_policy.h"
+#include "util/debug_points.h"
#include "util/stack_util.h"
namespace doris {
@@ -165,6 +166,7 @@ Result<std::shared_ptr<CloudTablet>>
CloudTabletMgr::get_tablet(int64_t tablet_i
SyncRowsetStats* sync_stats,
bool
force_use_only_cached,
bool
cache_on_miss) {
+ DBUG_EXECUTE_IF("CloudTabletMgr::get_tablet.block", DBUG_BLOCK);
// LRU value type. `Value`'s lifetime MUST NOT be longer than
`CloudTabletMgr`
class Value : public LRUCacheValueBase {
public:
diff --git a/be/src/cloud/injection_point_action.cpp
b/be/src/cloud/injection_point_action.cpp
index 521ff52ffc3..0987a084efe 100644
--- a/be/src/cloud/injection_point_action.cpp
+++ b/be/src/cloud/injection_point_action.cpp
@@ -279,6 +279,17 @@ void set_return_error(const std::string& point,
HttpRequest* req) {
HttpChannel::send_reply(req, HttpStatus::OK, "OK");
}
+void set_segfault(const std::string& point, HttpRequest* req) {
+ auto sp = SyncPoint::get_instance();
+ sp->set_call_back(point, [point](auto&&) {
+ LOG(INFO) << "injection point hit, point=" << point << " trigger
segfault";
+ // Intentional null dereference to crash the BE for testing.
+ volatile int* p = nullptr;
+ *p = 1;
+ });
+ HttpChannel::send_reply(req, HttpStatus::OK, "OK");
+}
+
void handle_set(HttpRequest* req) {
auto& point = req->param("name");
if (point.empty()) {
@@ -302,6 +313,9 @@ void handle_set(HttpRequest* req) {
} else if (behavior == "return_error") {
set_return_error(point, req);
return;
+ } else if (behavior == "segfault") {
+ set_segfault(point, req);
+ return;
}
HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, "unknown behavior: "
+ behavior);
}
@@ -377,6 +391,7 @@ InjectionPointAction::InjectionPointAction() = default;
// which is an int, valid values can be found in status.h,
e.g. -235 or -230,
// if `code` is not present return Status::InternalError.
Optional `probability`
// determines the percentage of times to inject the error
(default 100).
+// * segfault: dereference a null pointer to crash BE intentionally
// ```
// curl
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=sleep&duration=${x_millsec}"
# sleep x millisecs
// curl
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return"
# return void
@@ -384,6 +399,7 @@ InjectionPointAction::InjectionPointAction() = default;
// curl
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error"
# internal error
// curl
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error&code=${code}"
# -235
// curl
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error&code=${code}&probability=50"
# inject with 50% probability
+// curl
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=segfault"
# crash BE
// ```
void InjectionPointAction::handle(HttpRequest* req) {
LOG(INFO) << "handle InjectionPointAction " << req->debug_string();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index bbd5d34e2af..8c2f5d81a6c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -68,6 +68,7 @@ import org.apache.doris.common.publish.TopicPublisher;
import org.apache.doris.common.publish.TopicPublisherThread;
import org.apache.doris.common.publish.WorkloadGroupPublisher;
import org.apache.doris.common.util.Daemon;
+import org.apache.doris.common.util.DebugPointUtil;
import org.apache.doris.common.util.DynamicPartitionUtil;
import org.apache.doris.common.util.HttpURLUtil;
import org.apache.doris.common.util.MasterDaemon;
@@ -1093,6 +1094,29 @@ public class Env {
}
}
+ // Block the caller while holding the global env lock when the given debug
point is enabled.
+ // Used to simulate a stuck import path that drags other operations
waiting on the same lock.
+ public void debugBlockAllOnGlobalLock(String debugPointName) {
+ if (!DebugPointUtil.isEnable(debugPointName)) {
+ return;
+ }
+ try {
+ lock.lock();
+ LOG.info("debug point {} enabled, block and hold env lock",
debugPointName);
+ while (DebugPointUtil.isEnable(debugPointName)) {
+ Thread.sleep(1000);
+ }
+ LOG.info("debug point {} cleared, release env lock",
debugPointName);
+ } catch (InterruptedException e) {
+ LOG.warn("debug point {} interrupted while blocking env lock",
debugPointName);
+ Thread.currentThread().interrupt();
+ } finally {
+ if (lock.isHeldByCurrentThread()) {
+ lock.unlock();
+ }
+ }
+ }
+
public String getBdbDir() {
return bdbDir;
}
@@ -7389,4 +7413,3 @@ public class Env {
protected void cloneClusterSnapshot() throws Exception {}
}
-
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
index c791ccdd27e..37262c49ec1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
@@ -597,6 +597,7 @@ public class CloudGlobalTransactionMgr implements
GlobalTransactionMgrIface {
throw new TransactionCommitFailedException(
"disable_load_job is set to true, all load jobs are not
allowed");
}
+ Env.getCurrentEnv().debugBlockAllOnGlobalLock("FE.BLOCK_IMPORT_LOCK");
if (!mowTableList.isEmpty()) {
List<Long> mowTableIds =
mowTableList.stream().map(Table::getId).collect(Collectors.toList());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/MetricsAction.java
b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/MetricsAction.java
index 80eb824fcdf..52066b25fcf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/MetricsAction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/MetricsAction.java
@@ -17,6 +17,7 @@
package org.apache.doris.httpv2.rest;
+import org.apache.doris.catalog.Env;
import org.apache.doris.common.Config;
import org.apache.doris.metric.JsonMetricVisitor;
import org.apache.doris.metric.MetricRepo;
@@ -46,6 +47,7 @@ public class MetricsAction extends RestBaseController {
if (Config.enable_all_http_auth) {
executeCheckPassword(request, response);
}
+ Env.getCurrentEnv().debugBlockAllOnGlobalLock("FE.BLOCK_IMPORT_LOCK");
String type = request.getParameter(TYPE_PARAM);
MetricVisitor visitor = null;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
index 3497e568c5d..ade04e95b15 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
@@ -774,6 +774,7 @@ public class DatabaseTransactionMgr {
public void commitTransaction(List<Table> tableList, long transactionId,
List<TabletCommitInfo> tabletCommitInfos,
TxnCommitAttachment txnCommitAttachment,
Boolean is2PC)
throws UserException {
+ env.debugBlockAllOnGlobalLock("FE.BLOCK_IMPORT_LOCK");
// check status
// the caller method already own tables' write lock
Database db = env.getInternalCatalog().getDbOrMetaException(dbId);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]