This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 01a6423da01 [enhance](test) Add injection points to block FE/BE 
(#59165)
01a6423da01 is described below

commit 01a6423da01f201c51d9f404efd86856453977cd
Author: Luwei <[email protected]>
AuthorDate: Mon Jan 12 14:28:49 2026 +0800

    [enhance](test) Add injection points to block FE/BE (#59165)
---
 be/src/cloud/cloud_stream_load_executor.cpp        |  5 +++++
 be/src/cloud/cloud_tablet_mgr.cpp                  |  2 ++
 be/src/cloud/injection_point_action.cpp            | 16 ++++++++++++++
 .../main/java/org/apache/doris/catalog/Env.java    | 25 +++++++++++++++++++++-
 .../transaction/CloudGlobalTransactionMgr.java     |  1 +
 .../apache/doris/httpv2/rest/MetricsAction.java    |  2 ++
 .../doris/transaction/DatabaseTransactionMgr.java  |  1 +
 7 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/be/src/cloud/cloud_stream_load_executor.cpp 
b/be/src/cloud/cloud_stream_load_executor.cpp
index e982acea923..760a4f46cf3 100644
--- a/be/src/cloud/cloud_stream_load_executor.cpp
+++ b/be/src/cloud/cloud_stream_load_executor.cpp
@@ -104,6 +104,11 @@ Status 
CloudStreamLoadExecutor::operate_txn_2pc(StreamLoadContext* ctx) {
 
 Status CloudStreamLoadExecutor::commit_txn(StreamLoadContext* ctx) {
     DBUG_EXECUTE_IF("StreamLoadExecutor.commit_txn.block", DBUG_BLOCK);
+    DBUG_EXECUTE_IF("StreamLoadExecutor.commit_txn.crash", {
+        LOG(INFO) << "debug point " << DP_NAME << " trigger crash";
+        volatile int* p = nullptr;
+        *p = 1;
+    });
     // forward to fe to excute commit transaction for MoW table
     if (ctx->is_mow_table() || !config::enable_stream_load_commit_txn_on_be ||
         ctx->load_type == TLoadType::ROUTINE_LOAD) {
diff --git a/be/src/cloud/cloud_tablet_mgr.cpp 
b/be/src/cloud/cloud_tablet_mgr.cpp
index 854bb29315f..a3b6855ae92 100644
--- a/be/src/cloud/cloud_tablet_mgr.cpp
+++ b/be/src/cloud/cloud_tablet_mgr.cpp
@@ -29,6 +29,7 @@
 #include "common/status.h"
 #include "olap/lru_cache.h"
 #include "runtime/memory/cache_policy.h"
+#include "util/debug_points.h"
 #include "util/stack_util.h"
 
 namespace doris {
@@ -165,6 +166,7 @@ Result<std::shared_ptr<CloudTablet>> 
CloudTabletMgr::get_tablet(int64_t tablet_i
                                                                 
SyncRowsetStats* sync_stats,
                                                                 bool 
force_use_only_cached,
                                                                 bool 
cache_on_miss) {
+    DBUG_EXECUTE_IF("CloudTabletMgr::get_tablet.block", DBUG_BLOCK);
     // LRU value type. `Value`'s lifetime MUST NOT be longer than 
`CloudTabletMgr`
     class Value : public LRUCacheValueBase {
     public:
diff --git a/be/src/cloud/injection_point_action.cpp 
b/be/src/cloud/injection_point_action.cpp
index 521ff52ffc3..0987a084efe 100644
--- a/be/src/cloud/injection_point_action.cpp
+++ b/be/src/cloud/injection_point_action.cpp
@@ -279,6 +279,17 @@ void set_return_error(const std::string& point, 
HttpRequest* req) {
     HttpChannel::send_reply(req, HttpStatus::OK, "OK");
 }
 
+void set_segfault(const std::string& point, HttpRequest* req) {
+    auto sp = SyncPoint::get_instance();
+    sp->set_call_back(point, [point](auto&&) {
+        LOG(INFO) << "injection point hit, point=" << point << " trigger 
segfault";
+        // Intentional null dereference to crash the BE for testing.
+        volatile int* p = nullptr;
+        *p = 1;
+    });
+    HttpChannel::send_reply(req, HttpStatus::OK, "OK");
+}
+
 void handle_set(HttpRequest* req) {
     auto& point = req->param("name");
     if (point.empty()) {
@@ -302,6 +313,9 @@ void handle_set(HttpRequest* req) {
     } else if (behavior == "return_error") {
         set_return_error(point, req);
         return;
+    } else if (behavior == "segfault") {
+        set_segfault(point, req);
+        return;
     }
     HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, "unknown behavior: " 
+ behavior);
 }
@@ -377,6 +391,7 @@ InjectionPointAction::InjectionPointAction() = default;
 //                 which is an int, valid values can be found in status.h, 
e.g. -235 or -230,
 //                 if `code` is not present return Status::InternalError. 
Optional `probability`
 //                 determines the percentage of times to inject the error 
(default 100).
+// * segfault: dereference a null pointer to crash BE intentionally
 // ```
 // curl 
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=sleep&duration=${x_millsec}"
 # sleep x millisecs
 // curl 
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return"
 # return void
@@ -384,6 +399,7 @@ InjectionPointAction::InjectionPointAction() = default;
 // curl 
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error"
 # internal error
 // curl 
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error&code=${code}"
 # -235
 // curl 
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error&code=${code}&probability=50"
 # inject with 50% probability
+// curl 
"be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=segfault"
 # crash BE
 // ```
 void InjectionPointAction::handle(HttpRequest* req) {
     LOG(INFO) << "handle InjectionPointAction " << req->debug_string();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index bbd5d34e2af..8c2f5d81a6c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -68,6 +68,7 @@ import org.apache.doris.common.publish.TopicPublisher;
 import org.apache.doris.common.publish.TopicPublisherThread;
 import org.apache.doris.common.publish.WorkloadGroupPublisher;
 import org.apache.doris.common.util.Daemon;
+import org.apache.doris.common.util.DebugPointUtil;
 import org.apache.doris.common.util.DynamicPartitionUtil;
 import org.apache.doris.common.util.HttpURLUtil;
 import org.apache.doris.common.util.MasterDaemon;
@@ -1093,6 +1094,29 @@ public class Env {
         }
     }
 
+    // Block the caller while holding the global env lock when the given debug 
point is enabled.
+    // Used to simulate a stuck import path that drags other operations 
waiting on the same lock.
+    public void debugBlockAllOnGlobalLock(String debugPointName) {
+        if (!DebugPointUtil.isEnable(debugPointName)) {
+            return;
+        }
+        try {
+            lock.lock();
+            LOG.info("debug point {} enabled, block and hold env lock", 
debugPointName);
+            while (DebugPointUtil.isEnable(debugPointName)) {
+                Thread.sleep(1000);
+            }
+            LOG.info("debug point {} cleared, release env lock", 
debugPointName);
+        } catch (InterruptedException e) {
+            LOG.warn("debug point {} interrupted while blocking env lock", 
debugPointName);
+            Thread.currentThread().interrupt();
+        } finally {
+            if (lock.isHeldByCurrentThread()) {
+                lock.unlock();
+            }
+        }
+    }
+
     public String getBdbDir() {
         return bdbDir;
     }
@@ -7389,4 +7413,3 @@ public class Env {
 
     protected void cloneClusterSnapshot() throws Exception {}
 }
-
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
index c791ccdd27e..37262c49ec1 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
@@ -597,6 +597,7 @@ public class CloudGlobalTransactionMgr implements 
GlobalTransactionMgrIface {
             throw new TransactionCommitFailedException(
                     "disable_load_job is set to true, all load jobs are not 
allowed");
         }
+        Env.getCurrentEnv().debugBlockAllOnGlobalLock("FE.BLOCK_IMPORT_LOCK");
 
         if (!mowTableList.isEmpty()) {
             List<Long> mowTableIds = 
mowTableList.stream().map(Table::getId).collect(Collectors.toList());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/MetricsAction.java 
b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/MetricsAction.java
index 80eb824fcdf..52066b25fcf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/MetricsAction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/MetricsAction.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.httpv2.rest;
 
+import org.apache.doris.catalog.Env;
 import org.apache.doris.common.Config;
 import org.apache.doris.metric.JsonMetricVisitor;
 import org.apache.doris.metric.MetricRepo;
@@ -46,6 +47,7 @@ public class MetricsAction extends RestBaseController {
         if (Config.enable_all_http_auth) {
             executeCheckPassword(request, response);
         }
+        Env.getCurrentEnv().debugBlockAllOnGlobalLock("FE.BLOCK_IMPORT_LOCK");
 
         String type = request.getParameter(TYPE_PARAM);
         MetricVisitor visitor = null;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
index 3497e568c5d..ade04e95b15 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
@@ -774,6 +774,7 @@ public class DatabaseTransactionMgr {
     public void commitTransaction(List<Table> tableList, long transactionId, 
List<TabletCommitInfo> tabletCommitInfos,
                                   TxnCommitAttachment txnCommitAttachment, 
Boolean is2PC)
             throws UserException {
+        env.debugBlockAllOnGlobalLock("FE.BLOCK_IMPORT_LOCK");
         // check status
         // the caller method already own tables' write lock
         Database db = env.getInternalCatalog().getDbOrMetaException(dbId);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to