This is an automated email from the ASF dual-hosted git repository.

swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git


The following commit(s) were added to refs/heads/master by this push:
     new 0046ba5c Fix application hang on SocketAppender shutdown when offline 
(#707)
0046ba5c is described below

commit 0046ba5cbacbd227076a262be9297f3f254e5e33
Author: sahvx655-wq <[email protected]>
AuthorDate: Sun Jun 7 07:41:37 2026 +0530

    Fix application hang on SocketAppender shutdown when offline (#707)
---
 src/main/cpp/threadutility.cpp                 | 98 +++++++++++++++++++-------
 src/test/cpp/helpers/threadutilitytestcase.cpp | 33 +++++++++
 2 files changed, 107 insertions(+), 24 deletions(-)

diff --git a/src/main/cpp/threadutility.cpp b/src/main/cpp/threadutility.cpp
index cfe07507..4ea4c078 100644
--- a/src/main/cpp/threadutility.cpp
+++ b/src/main/cpp/threadutility.cpp
@@ -369,39 +369,88 @@ void ThreadUtility::priv_data::doPeriodicTasks()
        {
                auto currentTime = std::chrono::system_clock::now();
                TimePoint nextOperationTime = currentTime + this->maxDelay;
+
+               // Run each due task with job_mutex released, so a long-running 
callback
+               // (e.g. a reconnect blocked on network I/O) does not stall 
removePeriodicTask()
+               while (true)
                {
-                       std::lock_guard<std::recursive_mutex> 
lock(this->job_mutex);
-                       for (auto& item : this->jobs)
+                       std::function<void()> taskCallback;
+                       LogString taskName;
+                       Period taskDelay;
+                       bool foundTask = false;
+
+                       // Take a copy of the next due task while holding the 
lock
                        {
                                if (this->terminated.load())
                                        return;
-                               if (item.removed)
-                                       ;
-                               else if (item.nextRun <= currentTime)
+                               std::lock_guard<std::recursive_mutex> 
lock(this->job_mutex);
+                               auto pItem = std::find_if(this->jobs.begin(), 
this->jobs.end()
+                                       , [currentTime](const 
NamedPeriodicFunction& item)
+                                       { return !item.removed && item.nextRun 
<= currentTime; }
+                                       );
+                               if (pItem != this->jobs.end())
                                {
-                                       try
-                                       {
-                                               item.f();
-                                               item.nextRun = 
std::chrono::system_clock::now() + item.delay;
-                                               if (item.nextRun < 
nextOperationTime)
-                                                       nextOperationTime = 
item.nextRun;
-                                               item.errorCount = 0;
-                                       }
-                                       catch (std::exception& ex)
-                                       {
-                                               LogLog::warn(item.name, ex);
-                                               ++item.errorCount;
-                                       }
-                                       catch (...)
-                                       {
-                                               LogLog::warn(item.name + 
LOG4CXX_STR(" threw an exception"));
-                                               ++item.errorCount;
-                                       }
+                                       taskCallback = pItem->f;
+                                       taskName = pItem->name;
+                                       taskDelay = pItem->delay;
+                                       foundTask = true;
                                }
-                               else if (item.nextRun < nextOperationTime)
+                       }
+
+                       // No more tasks are due, leave the loop
+                       if (!foundTask)
+                       {
+                               break;
+                       }
+
+                       // Execute the callback outside the lock
+                       bool success = false;
+                       try
+                       {
+                               taskCallback();
+                               success = true;
+                       }
+                       catch (std::exception& ex)
+                       {
+                               LogLog::warn(taskName, ex);
+                       }
+                       catch (...)
+                       {
+                               LogLog::warn(taskName + LOG4CXX_STR(" threw an 
exception"));
+                       }
+
+                       // Re-find the task (it may have been removed while 
running) and reschedule it
+                       {
+                               std::lock_guard<std::recursive_mutex> 
lock(this->job_mutex);
+                               auto pItem = std::find_if(this->jobs.begin(), 
this->jobs.end()
+                                       , [&taskName](const 
NamedPeriodicFunction& item)
+                                       { return !item.removed && taskName == 
item.name; }
+                                       );
+
+                               if (pItem != this->jobs.end())
+                               {
+                                       // Always push nextRun out, so a 
failing task waits before the next retry
+                                       pItem->nextRun = 
std::chrono::system_clock::now() + taskDelay;
+                                       if (success)
+                                               pItem->errorCount = 0;
+                                       else
+                                               ++pItem->errorCount;
+                               }
+                       }
+               }
+
+               // Update nextOperationTime under the lock
+               {
+                       std::lock_guard<std::recursive_mutex> 
lock(this->job_mutex);
+                       for (const auto& item : this->jobs)
+                       {
+                               if (!item.removed && item.nextRun < 
nextOperationTime)
+                               {
                                        nextOperationTime = item.nextRun;
+                               }
                        }
                }
+
                // Delete removed and faulty tasks
                while (1)
                {
@@ -418,6 +467,7 @@ void ThreadUtility::priv_data::doPeriodicTasks()
                                return;
                }
 
+               // Wait until the next task is due or an add/remove/shutdown 
wakes us
                std::unique_lock<std::mutex> lock(this->interrupt_mutex);
                this->interrupt.wait_until(lock, nextOperationTime);
        }
diff --git a/src/test/cpp/helpers/threadutilitytestcase.cpp 
b/src/test/cpp/helpers/threadutilitytestcase.cpp
index 32e1e119..aa5203c7 100644
--- a/src/test/cpp/helpers/threadutilitytestcase.cpp
+++ b/src/test/cpp/helpers/threadutilitytestcase.cpp
@@ -40,6 +40,7 @@ LOGUNIT_CLASS(ThreadUtilityTest)
        LOGUNIT_TEST(testCustomFunctions);
        LOGUNIT_TEST(testDefaultFunctions);
        LOGUNIT_TEST(testPeriodicTaskRestartsAfterEmptyQueue);
+       LOGUNIT_TEST(testNoDeadlockDuringTaskExecution);
 #if LOG4CXX_HAS_PTHREAD_SETNAME || defined(_WIN32)
        LOGUNIT_TEST(testThreadNameLogging);
 #endif
@@ -155,6 +156,38 @@ public:
                std::this_thread::sleep_for(std::chrono::milliseconds(30));
        }
 
+       void testNoDeadlockDuringTaskExecution()
+       {
+               auto thrUtil = ThreadUtility::instance();
+               const LogString slowTask(LOG4CXX_STR("ThreadUtilityTest.slow"));
+               std::atomic<bool> taskRunning{false};
+               std::atomic<bool> releaseTask{false};
+
+               thrUtil->removeAllPeriodicTasks();
+               // A callback that blocks, as a reconnect attempt would when 
the log server is offline
+               thrUtil->addPeriodicTask(slowTask, [&taskRunning, 
&releaseTask]() {
+                       taskRunning = true;
+                       for (int i = 0; i < 500 && !releaseTask.load(); ++i)
+                               
std::this_thread::sleep_for(std::chrono::milliseconds(10));
+               }, std::chrono::milliseconds(1));
+
+               // Wait for the callback to start executing
+               for (int i = 0; i < 100 && !taskRunning.load(); ++i)
+                       
std::this_thread::sleep_for(std::chrono::milliseconds(10));
+               LOGUNIT_ASSERT(taskRunning.load());
+
+               // removePeriodicTask() must return while the slow callback is 
still running
+               auto startTime = std::chrono::steady_clock::now();
+               thrUtil->removePeriodicTask(slowTask);
+               auto elapsed = std::chrono::steady_clock::now() - startTime;
+
+               releaseTask = true;
+               
LOGUNIT_ASSERT(std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()
 < 1000);
+
+               // wait for the periodic task thread to settle
+               std::this_thread::sleep_for(std::chrono::milliseconds(50));
+       }
+
        void testThreadNameLogging()
        {
                auto layout = std::make_shared<PatternLayout>(LOG4CXX_STR("%T 
%m%n"));

Reply via email to