This is an automated email from the ASF dual-hosted git repository.
abukor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new dc741732b KUDU-3545 fix race condition when (un)registring EH frames
dc741732b is described below
commit dc741732bb6a40213e11a13869fad06a49a209a3
Author: Alexey Serbin <[email protected]>
AuthorDate: Fri Dec 12 21:48:21 2025 -0800
KUDU-3545 fix race condition when (un)registring EH frames
This patch addresses race condition in libgcc which is triggered by
concurrent registering/unregistering of EH frames by LLVM's
RTDyldMemoryManager. Also, since libgcc expects a null-terminated
list of FDEs, this patch adds extra 4 zero bytes in the end of each
section allocated for EH frames.
I added a new test scenario to reproduce the race, and it was triggering
TSAN warnings every time if rolling back the fix (see below). For more
robust and more reliable race reproduction, I added a new flag
(tagged as 'hidden', default value is 1) to control the maximum number
of threads in the codegen compiler thread pool:
--codegen_compiler_manager_pool_max_threads_num
We might start using it later to allow for parallel compilation of row
projection functions in the presence of concurrent scan requests.
WARNING: ThreadSanitizer: data race (pid=3406438)
Write of size 8 at 0x7b4000008228 by thread T6 (mutexes: write M1879):
#0 memmove sanitizer_common/sanitizer_common_interceptors.inc:791:3
(codegen-test+0x2edb86)
#1 <null> <null> (libgcc_s.so.1+0x1345d)
#2 llvm::RuntimeDyldELF::registerEHFrames()
#3 llvm::RuntimeDyld::registerEHFrames()
#4 llvm::MCJIT::finalizeLoadedModules()
#5 llvm::MCJIT::finalizeObject()
#6 kudu::codegen::ModuleBuilder::Compile()
...
Previous write of size 8 at 0x7b4000008228 by thread T5 (mutexes: write
M1869):
#0 malloc
/root/Projects/kudu/thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:652:5
(codegen-test+0x2b2c34)
#1 <null> <null> (libgcc_s.so.1+0x1318a)
#2 llvm::RuntimeDyldELF::registerEHFrames()
#3 llvm::RuntimeDyld::registerEHFrames()
#4 llvm::MCJIT::finalizeLoadedModules()
#5 llvm::MCJIT::finalizeObject()
#6 kudu::codegen::ModuleBuilder::Compile()
#7 kudu::codegen::RowProjectorFunctions::Create()
#8 kudu::codegen::CodeGenerator::CompileRowProjector()
...
SUMMARY: ThreadSanitizer: data race (/lib64/libgcc_s.so.1+0x1345d)
Change-Id: Ia91ada71f663451e11149a190def6a8ccd0b4ef5
Reviewed-on: http://gerrit.cloudera.org:8080/23785
Reviewed-by: Zoltan Martonka <[email protected]>
Reviewed-by: Attila Bukor <[email protected]>
Tested-by: Attila Bukor <[email protected]>
---
src/kudu/codegen/CMakeLists.txt | 1 +
src/kudu/codegen/codegen-test.cc | 61 +++++++++++++++++++++++
src/kudu/codegen/compilation_manager.cc | 9 ++--
src/kudu/codegen/jit_frame_manager.cc | 85 +++++++++++++++++++++++++++++++++
src/kudu/codegen/jit_frame_manager.h | 57 ++++++++++++++++++++++
src/kudu/codegen/module_builder.cc | 3 ++
6 files changed, 213 insertions(+), 3 deletions(-)
diff --git a/src/kudu/codegen/CMakeLists.txt b/src/kudu/codegen/CMakeLists.txt
index 6ffa499e8..bb1053514 100644
--- a/src/kudu/codegen/CMakeLists.txt
+++ b/src/kudu/codegen/CMakeLists.txt
@@ -203,6 +203,7 @@ add_library(codegen
code_cache.cc
code_generator.cc
compilation_manager.cc
+ jit_frame_manager.cc
jit_wrapper.cc
module_builder.cc
row_projector.cc
diff --git a/src/kudu/codegen/codegen-test.cc b/src/kudu/codegen/codegen-test.cc
index 4a4d289cc..a1eda8c8b 100644
--- a/src/kudu/codegen/codegen-test.cc
+++ b/src/kudu/codegen/codegen-test.cc
@@ -21,6 +21,7 @@
#include <memory>
#include <ostream>
#include <string>
+#include <thread>
#include <vector>
// IWYU pragma: no_include "testing/base/public/gunit.h"
@@ -40,6 +41,7 @@
#include "kudu/common/schema.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/gutil/singleton.h"
+#include "kudu/util/countdown_latch.h"
#include "kudu/util/logging_test_util.h"
#include "kudu/util/memory/arena.h"
#include "kudu/util/random.h"
@@ -51,10 +53,12 @@
using std::string;
using std::unique_ptr;
+using std::thread;
using std::vector;
DECLARE_bool(codegen_dump_mc);
DECLARE_int32(codegen_cache_capacity);
+DECLARE_int32(codegen_compiler_manager_pool_max_threads_num);
namespace kudu {
@@ -449,4 +453,61 @@ TEST_F(CodegenTest, TestCodeCache) {
}
}
+// Test scenario to reproduce the race condition that leads to the behavior
+// described by KUDU-3545 before it was addressed. This works accross different
+// compilers/toolchains, while KUDU-3545 originally stipulated that the issue
+// is specific to GCC13 and the related toolchain.
+TEST_F(CodegenTest, CodegenEHFrameRace) {
+ constexpr const size_t kNumThreads = 2;
+
+ // For easier reproduction of the race, allow for multiple threads in the
+ // codegen compile thread pool. It results in a race condition in libgcc
+ // when concurrently registering EH frames.
+ FLAGS_codegen_compiler_manager_pool_max_threads_num = 3;
+
+ // A smaller codegen cache might help to create situations when a race
+ // condition happens when just a single codegen compiler thread is active.
+ // Upon adding a new entry into the cache, an entry would be evicted
+ // if it's not enough space, and its EH frames would unregistered,
+ // while another compiler request might be running at the only active thread
+ // in the compiler thread pool, registering correspoding EH frames.
+ // It creates a race condition in libgcc, and that's the original issue
+ // behind KUDU-3545.
+ FLAGS_codegen_cache_capacity = 10;
+ Singleton<CompilationManager>::UnsafeReset();
+ CompilationManager* cm = CompilationManager::GetSingleton();
+
+ CountDownLatch start(kNumThreads);
+ vector<thread> threads;
+ threads.reserve(kNumThreads);
+ for (size_t idx = 0; idx < kNumThreads; ++idx) {
+ threads.emplace_back([&]() {
+ start.Wait();
+ for (auto pass = 0; pass < 10; ++pass) {
+ // Generate all permutations of the first 4 columns (24 permutations).
+ // For each such permutation, reate a projection and request code
+ // generation.
+ vector<size_t> perm = { 0, 1, 2, 4 };
+ do {
+ SCOPED_TRACE(perm);
+ Schema projection;
+ const auto s = CreatePartialSchema(perm, &projection);
+ if (!s.ok()) {
+ LOG(WARNING) << s.ToString();
+ return;
+ }
+
+ unique_ptr<CodegenRP> projector;
+ cm->RequestRowProjector(&base_, &projection, &projector);
+ } while (std::next_permutation(perm.begin(), perm.end()));
+ }
+ });
+ }
+ start.CountDown(kNumThreads);
+
+ for (auto& t : threads) {
+ t.join();
+ }
+}
+
} // namespace kudu
diff --git a/src/kudu/codegen/compilation_manager.cc
b/src/kudu/codegen/compilation_manager.cc
index 9dc2d2b05..de16d4a82 100644
--- a/src/kudu/codegen/compilation_manager.cc
+++ b/src/kudu/codegen/compilation_manager.cc
@@ -62,6 +62,10 @@ DEFINE_int32(codegen_queue_capacity, 100, "Number of tasks
which may be put in t
"generation task queue.");
TAG_FLAG(codegen_queue_capacity, experimental);
+DEFINE_int32(codegen_compiler_manager_pool_max_threads_num, 1,
+ "Maximum number of threads in the compiler manager pool");
+TAG_FLAG(codegen_compiler_manager_pool_max_threads_num, hidden);
+
METRIC_DEFINE_gauge_uint64(server, code_cache_hits, "Codegen Cache Hits",
kudu::MetricUnit::kCacheHits,
"Number of codegen cache hits since start",
@@ -118,8 +122,7 @@ class CompilationTask {
RETURN_NOT_OK(generator_->CompileRowProjector(base_, proj_, &functions));
}
- RETURN_NOT_OK(cache_->AddEntry(functions));
- return Status::OK();
+ return cache_->AddEntry(functions);
}
Schema base_;
@@ -138,7 +141,7 @@ CompilationManager::CompilationManager()
query_counter_(0) {
CHECK_OK(ThreadPoolBuilder("compiler_manager_pool")
.set_min_threads(0)
- .set_max_threads(1)
+
.set_max_threads(FLAGS_codegen_compiler_manager_pool_max_threads_num)
.set_max_queue_size(FLAGS_codegen_queue_capacity)
.set_idle_timeout(MonoDelta::FromMilliseconds(100))
.Build(&pool_));
diff --git a/src/kudu/codegen/jit_frame_manager.cc
b/src/kudu/codegen/jit_frame_manager.cc
new file mode 100644
index 000000000..5adefa258
--- /dev/null
+++ b/src/kudu/codegen/jit_frame_manager.cc
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/codegen/jit_frame_manager.h"
+
+#include <cstdint>
+#include <iterator>
+
+#include <llvm/ExecutionEngine/SectionMemoryManager.h>
+
+// External symbols from libgcc/libunwind.
+extern "C" void __register_frame(void*); //
NOLINT(bugprone-reserved-identifier)
+extern "C" void __deregister_frame(void*);//
NOLINT(bugprone-reserved-identifier)
+
+using llvm::SectionMemoryManager;
+using llvm::StringRef;
+using std::lock_guard;
+
+namespace kudu {
+namespace codegen {
+
+// Initialize the static mutex
+std::mutex JITFrameManager::kRegistrationMutex;
+
+JITFrameManager::~JITFrameManager() {
+ // Be explicit about avoiding the virtual dispatch: invoke
+ // deregisterEHFramesImpl() instead of deregisterEHFrames().
+ deregisterEHFramesImpl();
+}
+
+uint8_t* JITFrameManager::allocateCodeSection(uintptr_t size,
+ unsigned alignment,
+ unsigned section_id,
+ StringRef section_name) {
+ // Add extra padding for EH frame section: it's zeroed out later upon
+ // registerEHFrames() calls.
+ if (section_name == ".eh_frame") {
+ size += 4;
+ }
+ return SectionMemoryManager::allocateCodeSection(
+ size, alignment, section_id, section_name);
+ }
+
+void JITFrameManager::registerEHFrames(uint8_t* addr,
+ uint64_t /*load_addr*/,
+ size_t size) {
+ lock_guard guard(kRegistrationMutex);
+
+ // libgcc expects a null-terminated list of FDEs: write 4 zero bytes in the
+ // end of the allocated section.
+ auto* terminator = reinterpret_cast<uint32_t*>(addr + size);
+ *terminator = 0;
+
+ __register_frame(addr);
+ registered_frames_.push_back(addr);
+}
+
+void JITFrameManager::deregisterEHFrames() {
+ return deregisterEHFramesImpl();
+}
+
+void JITFrameManager::deregisterEHFramesImpl() {
+ lock_guard guard(kRegistrationMutex);
+ for (auto it = registered_frames_.rbegin(); it != registered_frames_.rend();
++it) {
+ __deregister_frame(*it);
+ }
+ registered_frames_.clear();
+}
+
+} // namespace codegen
+} // namespace kudu
diff --git a/src/kudu/codegen/jit_frame_manager.h
b/src/kudu/codegen/jit_frame_manager.h
new file mode 100644
index 000000000..7d1099e20
--- /dev/null
+++ b/src/kudu/codegen/jit_frame_manager.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <deque>
+#include <mutex>
+
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include <llvm/ADT/StringRef.h>
+
+namespace kudu {
+namespace codegen {
+
+class JITFrameManager : public llvm::SectionMemoryManager {
+ public:
+ JITFrameManager() = default;
+ ~JITFrameManager() override;
+
+ // Override to add space for the 4-byte null terminator.
+ uint8_t* allocateCodeSection(uintptr_t size,
+ unsigned alignment,
+ unsigned section_id,
+ llvm::StringRef section_name) override;
+
+ void registerEHFrames(uint8_t* addr, uint64_t load_addr, size_t size)
override;
+ void deregisterEHFrames() override;
+
+ private:
+ void deregisterEHFramesImpl();
+
+ // Mutex to prevent races in libgcc/libunwind. Since it should work across
+ // multiple instances, it's a static one.
+ static std::mutex kRegistrationMutex;
+
+ // Container to keep track of registered frames: this information is
necessary
+ // for unregistring all of them.
+ std::deque<uint8_t*> registered_frames_;
+};
+
+} // namespace codegen
+} // namespace kudu
diff --git a/src/kudu/codegen/module_builder.cc
b/src/kudu/codegen/module_builder.cc
index 3da31585c..4c61796c4 100644
--- a/src/kudu/codegen/module_builder.cc
+++ b/src/kudu/codegen/module_builder.cc
@@ -35,6 +35,7 @@
#include <llvm/ADT/iterator.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/MCJIT.h> // IWYU pragma: keep
+#include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/Constant.h>
#include <llvm/IR/Constants.h>
@@ -58,6 +59,7 @@
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
+#include "kudu/codegen/jit_frame_manager.h"
#include "kudu/codegen/precompiled.ll.h"
#include "kudu/gutil/basictypes.h"
#include "kudu/gutil/map-util.h"
@@ -331,6 +333,7 @@ Status ModuleBuilder::Compile(unique_ptr<ExecutionEngine>*
out) {
#endif
Module* module = module_.get();
EngineBuilder ebuilder(std::move(module_));
+ ebuilder.setMCJITMemoryManager(std::make_unique<JITFrameManager>());
ebuilder.setErrorStr(&str);
ebuilder.setOptLevel(opt_level);
ebuilder.setMCPU(llvm::sys::getHostCPUName());