This is an automated email from the ASF dual-hosted git repository.
swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git
The following commit(s) were added to refs/heads/master by this push:
new 0046ba5c Fix application hang on SocketAppender shutdown when offline
(#707)
0046ba5c is described below
commit 0046ba5cbacbd227076a262be9297f3f254e5e33
Author: sahvx655-wq <[email protected]>
AuthorDate: Sun Jun 7 07:41:37 2026 +0530
Fix application hang on SocketAppender shutdown when offline (#707)
---
src/main/cpp/threadutility.cpp | 98 +++++++++++++++++++-------
src/test/cpp/helpers/threadutilitytestcase.cpp | 33 +++++++++
2 files changed, 107 insertions(+), 24 deletions(-)
diff --git a/src/main/cpp/threadutility.cpp b/src/main/cpp/threadutility.cpp
index cfe07507..4ea4c078 100644
--- a/src/main/cpp/threadutility.cpp
+++ b/src/main/cpp/threadutility.cpp
@@ -369,39 +369,88 @@ void ThreadUtility::priv_data::doPeriodicTasks()
{
auto currentTime = std::chrono::system_clock::now();
TimePoint nextOperationTime = currentTime + this->maxDelay;
+
+ // Run each due task with job_mutex released, so a long-running
callback
+ // (e.g. a reconnect blocked on network I/O) does not stall
removePeriodicTask()
+ while (true)
{
- std::lock_guard<std::recursive_mutex>
lock(this->job_mutex);
- for (auto& item : this->jobs)
+ std::function<void()> taskCallback;
+ LogString taskName;
+ Period taskDelay;
+ bool foundTask = false;
+
+ // Take a copy of the next due task while holding the
lock
{
if (this->terminated.load())
return;
- if (item.removed)
- ;
- else if (item.nextRun <= currentTime)
+ std::lock_guard<std::recursive_mutex>
lock(this->job_mutex);
+ auto pItem = std::find_if(this->jobs.begin(),
this->jobs.end()
+ , [currentTime](const
NamedPeriodicFunction& item)
+ { return !item.removed && item.nextRun
<= currentTime; }
+ );
+ if (pItem != this->jobs.end())
{
- try
- {
- item.f();
- item.nextRun =
std::chrono::system_clock::now() + item.delay;
- if (item.nextRun <
nextOperationTime)
- nextOperationTime =
item.nextRun;
- item.errorCount = 0;
- }
- catch (std::exception& ex)
- {
- LogLog::warn(item.name, ex);
- ++item.errorCount;
- }
- catch (...)
- {
- LogLog::warn(item.name +
LOG4CXX_STR(" threw an exception"));
- ++item.errorCount;
- }
+ taskCallback = pItem->f;
+ taskName = pItem->name;
+ taskDelay = pItem->delay;
+ foundTask = true;
}
- else if (item.nextRun < nextOperationTime)
+ }
+
+ // No more tasks are due, leave the loop
+ if (!foundTask)
+ {
+ break;
+ }
+
+ // Execute the callback outside the lock
+ bool success = false;
+ try
+ {
+ taskCallback();
+ success = true;
+ }
+ catch (std::exception& ex)
+ {
+ LogLog::warn(taskName, ex);
+ }
+ catch (...)
+ {
+ LogLog::warn(taskName + LOG4CXX_STR(" threw an
exception"));
+ }
+
+ // Re-find the task (it may have been removed while
running) and reschedule it
+ {
+ std::lock_guard<std::recursive_mutex>
lock(this->job_mutex);
+ auto pItem = std::find_if(this->jobs.begin(),
this->jobs.end()
+ , [&taskName](const
NamedPeriodicFunction& item)
+ { return !item.removed && taskName ==
item.name; }
+ );
+
+ if (pItem != this->jobs.end())
+ {
+ // Always push nextRun out, so a
failing task waits before the next retry
+ pItem->nextRun =
std::chrono::system_clock::now() + taskDelay;
+ if (success)
+ pItem->errorCount = 0;
+ else
+ ++pItem->errorCount;
+ }
+ }
+ }
+
+ // Update nextOperationTime under the lock
+ {
+ std::lock_guard<std::recursive_mutex>
lock(this->job_mutex);
+ for (const auto& item : this->jobs)
+ {
+ if (!item.removed && item.nextRun <
nextOperationTime)
+ {
nextOperationTime = item.nextRun;
+ }
}
}
+
// Delete removed and faulty tasks
while (1)
{
@@ -418,6 +467,7 @@ void ThreadUtility::priv_data::doPeriodicTasks()
return;
}
+ // Wait until the next task is due or an add/remove/shutdown
wakes us
std::unique_lock<std::mutex> lock(this->interrupt_mutex);
this->interrupt.wait_until(lock, nextOperationTime);
}
diff --git a/src/test/cpp/helpers/threadutilitytestcase.cpp
b/src/test/cpp/helpers/threadutilitytestcase.cpp
index 32e1e119..aa5203c7 100644
--- a/src/test/cpp/helpers/threadutilitytestcase.cpp
+++ b/src/test/cpp/helpers/threadutilitytestcase.cpp
@@ -40,6 +40,7 @@ LOGUNIT_CLASS(ThreadUtilityTest)
LOGUNIT_TEST(testCustomFunctions);
LOGUNIT_TEST(testDefaultFunctions);
LOGUNIT_TEST(testPeriodicTaskRestartsAfterEmptyQueue);
+ LOGUNIT_TEST(testNoDeadlockDuringTaskExecution);
#if LOG4CXX_HAS_PTHREAD_SETNAME || defined(_WIN32)
LOGUNIT_TEST(testThreadNameLogging);
#endif
@@ -155,6 +156,38 @@ public:
std::this_thread::sleep_for(std::chrono::milliseconds(30));
}
+ void testNoDeadlockDuringTaskExecution()
+ {
+ auto thrUtil = ThreadUtility::instance();
+ const LogString slowTask(LOG4CXX_STR("ThreadUtilityTest.slow"));
+ std::atomic<bool> taskRunning{false};
+ std::atomic<bool> releaseTask{false};
+
+ thrUtil->removeAllPeriodicTasks();
+ // A callback that blocks, as a reconnect attempt would when
the log server is offline
+ thrUtil->addPeriodicTask(slowTask, [&taskRunning,
&releaseTask]() {
+ taskRunning = true;
+ for (int i = 0; i < 500 && !releaseTask.load(); ++i)
+
std::this_thread::sleep_for(std::chrono::milliseconds(10));
+ }, std::chrono::milliseconds(1));
+
+ // Wait for the callback to start executing
+ for (int i = 0; i < 100 && !taskRunning.load(); ++i)
+
std::this_thread::sleep_for(std::chrono::milliseconds(10));
+ LOGUNIT_ASSERT(taskRunning.load());
+
+ // removePeriodicTask() must return while the slow callback is
still running
+ auto startTime = std::chrono::steady_clock::now();
+ thrUtil->removePeriodicTask(slowTask);
+ auto elapsed = std::chrono::steady_clock::now() - startTime;
+
+ releaseTask = true;
+
LOGUNIT_ASSERT(std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()
< 1000);
+
+ // wait for the periodic task thread to settle
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ }
+
void testThreadNameLogging()
{
auto layout = std::make_shared<PatternLayout>(LOG4CXX_STR("%T
%m%n"));