This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.18.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.18.x by this push:
     new 2800e5fdf KUDU-3545 fix race condition when (un)registring EH frames
2800e5fdf is described below

commit 2800e5fdf788c7739f432d1c20cdcf4d3efdd4d3
Author: Alexey Serbin <[email protected]>
AuthorDate: Fri Dec 12 21:48:21 2025 -0800

    KUDU-3545 fix race condition when (un)registring EH frames
    
    This patch addresses race condition in libgcc which is triggered by
    concurrent registering/unregistering of EH frames by LLVM's
    RTDyldMemoryManager.  Also, since libgcc expects a null-terminated
    list of FDEs, this patch adds extra 4 zero bytes in the end of each
    section allocated for EH frames.
    
    I added a new test scenario to reproduce the race, and it was triggering
    TSAN warnings every time if rolling back the fix (see below).  For more
    robust and more reliable race reproduction, I added a new flag
    (tagged as 'hidden', default value is 1) to control the maximum number
    of threads in the codegen compiler thread pool:
      --codegen_compiler_manager_pool_max_threads_num
    We might start using it later to allow for parallel compilation of row
    projection functions in the presence of concurrent scan requests.
    
      WARNING: ThreadSanitizer: data race (pid=3406438)
        Write of size 8 at 0x7b4000008228 by thread T6 (mutexes: write M1879):
          #0 memmove sanitizer_common/sanitizer_common_interceptors.inc:791:3 
(codegen-test+0x2edb86)
          #1 <null> <null> (libgcc_s.so.1+0x1345d)
          #2 llvm::RuntimeDyldELF::registerEHFrames()
          #3 llvm::RuntimeDyld::registerEHFrames()
          #4 llvm::MCJIT::finalizeLoadedModules()
          #5 llvm::MCJIT::finalizeObject()
          #6 kudu::codegen::ModuleBuilder::Compile()
          ...
    
        Previous write of size 8 at 0x7b4000008228 by thread T5 (mutexes: write 
M1869):
          #0 malloc 
/root/Projects/kudu/thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:652:5
 (codegen-test+0x2b2c34)
          #1 <null> <null> (libgcc_s.so.1+0x1318a)
          #2 llvm::RuntimeDyldELF::registerEHFrames()
          #3 llvm::RuntimeDyld::registerEHFrames()
          #4 llvm::MCJIT::finalizeLoadedModules()
          #5 llvm::MCJIT::finalizeObject()
          #6 kudu::codegen::ModuleBuilder::Compile()
          #7 kudu::codegen::RowProjectorFunctions::Create()
          #8 kudu::codegen::CodeGenerator::CompileRowProjector()
          ...
    
      SUMMARY: ThreadSanitizer: data race (/lib64/libgcc_s.so.1+0x1345d)
    
    Change-Id: Ia91ada71f663451e11149a190def6a8ccd0b4ef5
    Reviewed-on: http://gerrit.cloudera.org:8080/23785
    Reviewed-by: Zoltan Martonka <[email protected]>
    Reviewed-by: Attila Bukor <[email protected]>
    Tested-by: Attila Bukor <[email protected]>
    (cherry picked from commit dc741732bb6a40213e11a13869fad06a49a209a3)
    Reviewed-on: http://gerrit.cloudera.org:8080/23786
    Reviewed-by: Abhishek Chennaka <[email protected]>
    Tested-by: Alexey Serbin <[email protected]>
---
 src/kudu/codegen/CMakeLists.txt         |  1 +
 src/kudu/codegen/codegen-test.cc        | 61 +++++++++++++++++++++++
 src/kudu/codegen/compilation_manager.cc |  9 ++--
 src/kudu/codegen/jit_frame_manager.cc   | 85 +++++++++++++++++++++++++++++++++
 src/kudu/codegen/jit_frame_manager.h    | 57 ++++++++++++++++++++++
 src/kudu/codegen/module_builder.cc      |  3 ++
 6 files changed, 213 insertions(+), 3 deletions(-)

diff --git a/src/kudu/codegen/CMakeLists.txt b/src/kudu/codegen/CMakeLists.txt
index 6ffa499e8..bb1053514 100644
--- a/src/kudu/codegen/CMakeLists.txt
+++ b/src/kudu/codegen/CMakeLists.txt
@@ -203,6 +203,7 @@ add_library(codegen
   code_cache.cc
   code_generator.cc
   compilation_manager.cc
+  jit_frame_manager.cc
   jit_wrapper.cc
   module_builder.cc
   row_projector.cc
diff --git a/src/kudu/codegen/codegen-test.cc b/src/kudu/codegen/codegen-test.cc
index 0caa04128..c7438c24c 100644
--- a/src/kudu/codegen/codegen-test.cc
+++ b/src/kudu/codegen/codegen-test.cc
@@ -21,6 +21,7 @@
 #include <memory>
 #include <ostream>
 #include <string>
+#include <thread>
 #include <vector>
 
 // IWYU pragma: no_include "testing/base/public/gunit.h"
@@ -40,6 +41,7 @@
 #include "kudu/common/schema.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/singleton.h"
+#include "kudu/util/countdown_latch.h"
 #include "kudu/util/logging_test_util.h"
 #include "kudu/util/memory/arena.h"
 #include "kudu/util/random.h"
@@ -51,10 +53,12 @@
 
 using std::string;
 using std::unique_ptr;
+using std::thread;
 using std::vector;
 
 DECLARE_bool(codegen_dump_mc);
 DECLARE_int32(codegen_cache_capacity);
+DECLARE_int32(codegen_compiler_manager_pool_max_threads_num);
 
 namespace kudu {
 
@@ -433,4 +437,61 @@ TEST_F(CodegenTest, TestCodeCache) {
   }
 }
 
+// Test scenario to reproduce the race condition that leads to the behavior
+// described by KUDU-3545 before it was addressed. This works accross different
+// compilers/toolchains, while KUDU-3545 originally stipulated that the issue
+// is specific to GCC13 and the related toolchain.
+TEST_F(CodegenTest, CodegenEHFrameRace) {
+  constexpr const size_t kNumThreads = 2;
+
+  // For easier reproduction of the race, allow for multiple threads in the
+  // codegen compile thread pool. It results in a race condition in libgcc
+  // when concurrently registering EH frames.
+  FLAGS_codegen_compiler_manager_pool_max_threads_num = 3;
+
+  // A smaller codegen cache might help to create situations when a race
+  // condition happens when just a single codegen compiler thread is active.
+  // Upon adding a new entry into the cache, an entry would be evicted
+  // if it's not enough space, and its EH frames would unregistered,
+  // while another compiler request might be running at the only active thread
+  // in the compiler thread pool, registering correspoding EH frames.
+  // It creates a race condition in libgcc, and that's the original issue
+  // behind KUDU-3545.
+  FLAGS_codegen_cache_capacity = 10;
+  Singleton<CompilationManager>::UnsafeReset();
+  CompilationManager* cm = CompilationManager::GetSingleton();
+
+  CountDownLatch start(kNumThreads);
+  vector<thread> threads;
+  threads.reserve(kNumThreads);
+  for (size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back([&]() {
+      start.Wait();
+      for (auto pass = 0; pass < 10; ++pass) {
+        // Generate all permutations of the first 4 columns (24 permutations).
+        // For each such permutation, reate a projection and request code
+        // generation.
+        vector<size_t> perm = { 0, 1, 2, 4 };
+        do {
+          SCOPED_TRACE(perm);
+          Schema projection;
+          const auto s = CreatePartialSchema(perm, &projection);
+          if (!s.ok()) {
+            LOG(WARNING) << s.ToString();
+            return;
+          }
+
+          unique_ptr<CodegenRP> projector;
+          cm->RequestRowProjector(&base_, &projection, &projector);
+        } while (std::next_permutation(perm.begin(), perm.end()));
+      }
+    });
+  }
+  start.CountDown(kNumThreads);
+
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
 } // namespace kudu
diff --git a/src/kudu/codegen/compilation_manager.cc 
b/src/kudu/codegen/compilation_manager.cc
index 9dc2d2b05..de16d4a82 100644
--- a/src/kudu/codegen/compilation_manager.cc
+++ b/src/kudu/codegen/compilation_manager.cc
@@ -62,6 +62,10 @@ DEFINE_int32(codegen_queue_capacity, 100, "Number of tasks 
which may be put in t
              "generation task queue.");
 TAG_FLAG(codegen_queue_capacity, experimental);
 
+DEFINE_int32(codegen_compiler_manager_pool_max_threads_num, 1,
+             "Maximum number of threads in the compiler manager pool");
+TAG_FLAG(codegen_compiler_manager_pool_max_threads_num, hidden);
+
 METRIC_DEFINE_gauge_uint64(server, code_cache_hits, "Codegen Cache Hits",
                            kudu::MetricUnit::kCacheHits,
                            "Number of codegen cache hits since start",
@@ -118,8 +122,7 @@ class CompilationTask {
       RETURN_NOT_OK(generator_->CompileRowProjector(base_, proj_, &functions));
     }
 
-    RETURN_NOT_OK(cache_->AddEntry(functions));
-    return Status::OK();
+    return cache_->AddEntry(functions);
   }
 
   Schema base_;
@@ -138,7 +141,7 @@ CompilationManager::CompilationManager()
     query_counter_(0) {
   CHECK_OK(ThreadPoolBuilder("compiler_manager_pool")
            .set_min_threads(0)
-           .set_max_threads(1)
+           
.set_max_threads(FLAGS_codegen_compiler_manager_pool_max_threads_num)
            .set_max_queue_size(FLAGS_codegen_queue_capacity)
            .set_idle_timeout(MonoDelta::FromMilliseconds(100))
            .Build(&pool_));
diff --git a/src/kudu/codegen/jit_frame_manager.cc 
b/src/kudu/codegen/jit_frame_manager.cc
new file mode 100644
index 000000000..5adefa258
--- /dev/null
+++ b/src/kudu/codegen/jit_frame_manager.cc
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/codegen/jit_frame_manager.h"
+
+#include <cstdint>
+#include <iterator>
+
+#include <llvm/ExecutionEngine/SectionMemoryManager.h>
+
+// External symbols from libgcc/libunwind.
+extern "C" void __register_frame(void*);  // 
NOLINT(bugprone-reserved-identifier)
+extern "C" void __deregister_frame(void*);// 
NOLINT(bugprone-reserved-identifier)
+
+using llvm::SectionMemoryManager;
+using llvm::StringRef;
+using std::lock_guard;
+
+namespace kudu {
+namespace codegen {
+
+// Initialize the static mutex
+std::mutex JITFrameManager::kRegistrationMutex;
+
+JITFrameManager::~JITFrameManager() {
+  // Be explicit about avoiding the virtual dispatch: invoke
+  // deregisterEHFramesImpl() instead of deregisterEHFrames().
+  deregisterEHFramesImpl();
+}
+
+uint8_t* JITFrameManager::allocateCodeSection(uintptr_t size,
+                                              unsigned alignment,
+                                              unsigned section_id,
+                                              StringRef section_name) {
+    // Add extra padding for EH frame section: it's zeroed out later upon
+    // registerEHFrames() calls.
+    if (section_name == ".eh_frame") {
+      size += 4;
+    }
+    return SectionMemoryManager::allocateCodeSection(
+        size, alignment, section_id, section_name);
+  }
+
+void JITFrameManager::registerEHFrames(uint8_t* addr,
+                                       uint64_t /*load_addr*/,
+                                       size_t size) {
+  lock_guard guard(kRegistrationMutex);
+
+  // libgcc expects a null-terminated list of FDEs: write 4 zero bytes in the
+  // end of the allocated section.
+  auto* terminator = reinterpret_cast<uint32_t*>(addr + size);
+  *terminator = 0;
+
+  __register_frame(addr);
+  registered_frames_.push_back(addr);
+}
+
+void JITFrameManager::deregisterEHFrames() {
+  return deregisterEHFramesImpl();
+}
+
+void JITFrameManager::deregisterEHFramesImpl() {
+  lock_guard guard(kRegistrationMutex);
+  for (auto it = registered_frames_.rbegin(); it != registered_frames_.rend(); 
++it) {
+    __deregister_frame(*it);
+  }
+  registered_frames_.clear();
+}
+
+} // namespace codegen
+} // namespace kudu
diff --git a/src/kudu/codegen/jit_frame_manager.h 
b/src/kudu/codegen/jit_frame_manager.h
new file mode 100644
index 000000000..7d1099e20
--- /dev/null
+++ b/src/kudu/codegen/jit_frame_manager.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <deque>
+#include <mutex>
+
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include <llvm/ADT/StringRef.h>
+
+namespace kudu {
+namespace codegen {
+
+class JITFrameManager : public llvm::SectionMemoryManager {
+ public:
+  JITFrameManager() = default;
+  ~JITFrameManager() override;
+
+  // Override to add space for the 4-byte null terminator.
+  uint8_t* allocateCodeSection(uintptr_t size,
+                               unsigned alignment,
+                               unsigned section_id,
+                               llvm::StringRef section_name) override;
+
+  void registerEHFrames(uint8_t* addr, uint64_t load_addr, size_t size) 
override;
+  void deregisterEHFrames() override;
+
+ private:
+  void deregisterEHFramesImpl();
+
+  // Mutex to prevent races in libgcc/libunwind. Since it should work across
+  // multiple instances, it's a static one.
+  static std::mutex kRegistrationMutex;
+
+  // Container to keep track of registered frames: this information is 
necessary
+  // for unregistring all of them.
+  std::deque<uint8_t*> registered_frames_;
+};
+
+} // namespace codegen
+} // namespace kudu
diff --git a/src/kudu/codegen/module_builder.cc 
b/src/kudu/codegen/module_builder.cc
index cd5d4c022..3e33840b2 100644
--- a/src/kudu/codegen/module_builder.cc
+++ b/src/kudu/codegen/module_builder.cc
@@ -35,6 +35,7 @@
 #include <llvm/ADT/iterator.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
 #include <llvm/ExecutionEngine/MCJIT.h> // IWYU pragma: keep
+#include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
 #include <llvm/IR/Attributes.h>
 #include <llvm/IR/Constant.h>
 #include <llvm/IR/Constants.h>
@@ -58,6 +59,7 @@
 #include <llvm/Transforms/IPO/AlwaysInliner.h>
 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
 
+#include "kudu/codegen/jit_frame_manager.h"
 #include "kudu/codegen/precompiled.ll.h"
 #include "kudu/gutil/basictypes.h"
 #include "kudu/gutil/map-util.h"
@@ -331,6 +333,7 @@ Status ModuleBuilder::Compile(unique_ptr<ExecutionEngine>* 
out) {
 #endif
   Module* module = module_.get();
   EngineBuilder ebuilder(std::move(module_));
+  ebuilder.setMCJITMemoryManager(std::make_unique<JITFrameManager>());
   ebuilder.setErrorStr(&str);
   ebuilder.setOptLevel(opt_level);
   ebuilder.setMCPU(llvm::sys::getHostCPUName());

Reply via email to