This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ed6fb629e4c [fix](cloud) Fix unhandled error when 
get_partition_boundaries (#56935)
ed6fb629e4c is described below

commit ed6fb629e4cae07e2bc5820e07aca01d14a782b3
Author: Gavin Chou <[email protected]>
AuthorDate: Tue Oct 14 21:33:19 2025 +0800

    [fix](cloud) Fix unhandled error when get_partition_boundaries (#56935)
    
    ```
    (gdb) bt
    #0  doris::cloud::FdbTxnKv::get_partition_boundaries (this=0x7f11ac6e3950, 
boundaries=0x7f1130017f20)
        at 
/home/zcp/repo_center/doris_branch-4.0/doris/cloud/src/meta-store/txn_kv.cpp:212
    #1  0x000055e494de9276 in doris::cloud::export_fdb_kv_ranges_details 
(kv=0x7f11ac6e3950) at 
/home/zcp/repo_center/doris_branch-4.0/doris/cloud/src/common/metric.cpp:358
    #2  doris::cloud::FdbMetricExporter::export_fdb_metrics 
(txn_kv=0x7f11ac6e3950) at 
/home/zcp/repo_center/doris_branch-4.0/doris/cloud/src/common/metric.cpp:393
    #3  0x000055e494dec77d in 
doris::cloud::FdbMetricExporter::start()::$_0::operator()() const 
(this=<optimized out>)
        at 
/home/zcp/repo_center/doris_branch-4.0/doris/cloud/src/common/metric.cpp:415
    #4  std::__invoke_impl<void, 
doris::cloud::FdbMetricExporter::start()::$_0>(std::__invoke_other, 
doris::cloud::FdbMetricExporter::start()::$_0&&) (__f=...)
        at 
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:63
    #5  
std::__invoke<doris::cloud::FdbMetricExporter::start()::$_0>(doris::cloud::FdbMetricExporter::start()::$_0&&)
 (__fn=...)
        at 
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:98
    #6  
std::thread::_Invoker<std::tuple<doris::cloud::FdbMetricExporter::start()::$_0> 
>::_M_invoke<0ul>(std::_Index_tuple<0ul>) (this=<optimized out>)
        at 
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_thread.h:303
    #7  
std::thread::_Invoker<std::tuple<doris::cloud::FdbMetricExporter::start()::$_0> 
>::operator()() (this=<optimized out>)
        at 
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_thread.h:310
    #8  
std::thread::_State_impl<std::thread::_Invoker<std::tuple<doris::cloud::FdbMetricExporter::start()::$_0>
 > >::_M_run() (this=<optimized out>)
        at 
/usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_thread.h:255
    #9  0x000055e495e547b0 in execute_native_thread_routine ()
    #10 0x00007f11b1dfeac3 in start_thread (arg=<optimized out>) at 
./nptl/pthread_create.c:442
    #11 0x00007f11b1e90850 in __closefrom_fallback (from=-1266337808, 
dirfd_fallback=<optimized out>) at 
../sysdeps/unix/sysv/linux/closefrom_fallback.c:45
    #12 0x0000000000000000 in ?? ()
    ```
---
 cloud/src/meta-store/txn_kv.cpp | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/cloud/src/meta-store/txn_kv.cpp b/cloud/src/meta-store/txn_kv.cpp
index 3ef0c3206f1..10e9275c44f 100644
--- a/cloud/src/meta-store/txn_kv.cpp
+++ b/cloud/src/meta-store/txn_kv.cpp
@@ -187,29 +187,36 @@ TxnErrorCode 
FdbTxnKv::get_partition_boundaries(std::vector<std::string>* bounda
     RangeGetOptions opts;
     opts.snapshot = true;
     std::unique_ptr<RangeGetIterator> iter;
-    do {
+    int num_iterations = 0;
+    int num_kvs = 0;
+    while (iter == nullptr /* may be not init */ || iter->more()) {
         code = txn->get(begin_key, end_key, &iter, opts);
         if (code != TxnErrorCode::TXN_OK) {
+            LOG_WARNING("failed to get fdb boundaries")
+                    .tag("code", code)
+                    .tag("begin_key", hex(begin_key))
+                    .tag("end_key", hex(end_key))
+                    .tag("num_iterations", num_iterations)
+                    .tag("num_kvs", num_kvs);
             if (code == TxnErrorCode::TXN_TOO_OLD) {
                 code = create_txn_with_system_access(&txn);
                 if (code == TxnErrorCode::TXN_OK) {
                     continue;
                 }
             }
-            LOG_WARNING("failed to get fdb boundaries")
-                    .tag("code", code)
-                    .tag("begin_key", hex(begin_key))
-                    .tag("end_key", hex(end_key));
+            LOG_WARNING("failed to recreate txn when get fdb 
boundaries").tag("code", code);
             return code;
         }
 
         while (iter->has_next()) {
             auto&& [key, value] = iter->next();
             boundaries->emplace_back(key);
+            ++num_kvs;
         }
 
         begin_key = iter->next_begin_key();
-    } while (iter->more());
+        ++num_iterations;
+    }
 
     return TxnErrorCode::TXN_OK;
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to