Here is a fix for a deadlock seen under Windows using OpenThreads
Barrier operations. The error is with atomic operations in the
win32 condition implementation. The attached sample program will
reliably trigger with as few as three threads and a dual core system,
though sometimes it will take 65,000 iterations.
2.8.1 was the base for these changes
Win32ConditionPrivateData.h
Win32ConditionPrivateData::wait does two operations to decrement
waiters_ then read, when InterlockedDecrement decrements and returns
the value in one operation. The two operations allows another thread
to also decrement with both getting 0 for an answer.
Win32ConditionPrivateData::broadcast is using waiters_ directly
instead of using the w value read earlier, if it was safe to use
waiters_ directly there would be no need for InterlockedGet or w.
overview of deadlock in barrier with three threads
one thread in broadcast, 2 threads in wait,
release semaphore 2, waits on waiters_done_
both threads wake, decrement waiters_, get 0 for w,
<logic error here>
one calls set waiters_done_,
broadcast thread comes out of waiters_done_,
other thread calls waiters_done_, (which leaves waiters_done_ in the
signaled state)
<sets the trap>
broadcast thread returns releases mutex, other threads get
mutex and also return,
next barrier, first two threads enter wait, one goes to broadcast, release
semaphore 2, skips waiters_done_ as it had been released last time
returns, processes, enters the barrier for the next barrier operation
and waits,
three threads are now in wait, two have the previous barrier phase,
one the current phase, there's one count left in the semaphore which a
thread gets, returns, enters the barrier as a waiter, sleeps, and the
deadlock is completed
--
David Fries <[email protected]>
http://fries.net/~david/ (PGP encryption key available)
// Test OpenThreads::Barrier operations.
#include <iostream>
#include <stdlib.h>
#include <OpenThreads/Thread>
#include <OpenThreads/Barrier>
#include <osg/Timer>
using namespace std;
static int operations;
static OpenThreads::Barrier b;
class AddRemove : public OpenThreads::Thread
{
public:
virtual void run();
};
void AddRemove::run()
{
b.block(); // start them at the same time
for(int i=0; i<operations; ++i)
{
b.block();
}
b.block(); // work complete
}
int main(int argc, char **argv)
{
if(argc!=3)
{
cerr << "Usage: " << argv[0] << " threads operations\n";
exit(1);
}
int i;
int tcount = atoi(argv[1]);
operations = atoi(argv[2]);
AddRemove *t = new AddRemove[tcount];
for(i=0; i<tcount; ++i)
{
t[i].start();
}
osg::Timer timer;
// start them at the same time
b.block(tcount+1);
timer.setStartTick();
for(int i=0; i<operations; ++i)
{
b.block(tcount+1);
}
// work complete
b.block(tcount+1);
cout << "threads\toperations\tseconds\n";
cout << tcount << '\t' << operations << "\t\t" << timer.time_s() << endl;
for(i=0; i<tcount; ++i)
{
t[i].join();
}
delete [] t;
return 0;
}
/* -*-c++-*- OpenThreads library, Copyright (C) 2002 - 2007 The Open Thread Group
*
* This library is open source and may be redistributed and/or modified under
* the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or
* (at your option) any later version. The full license is in LICENSE file
* included with this distribution, and on the openscenegraph.org website.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* OpenSceneGraph Public License for more details.
*/
//
//
// WIN32ConditionPrivateData.h - Private data structure for Condition
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
#ifndef _WIN32CONDITIONPRIVATEDATA_H_
#define _WIN32CONDITIONPRIVATEDATA_H_
#ifndef _WINDOWS_
#define WIN32_LEAN_AND_MEAN
#define _WIN32_WINNT 0x0400
#include <windows.h>
#endif
#include <OpenThreads/ScopedLock>
#include "Win32ThreadPrivateData.h"
#include "HandleHolder.h"
#define InterlockedGet(x) InterlockedExchangeAdd(x,0)
namespace OpenThreads {
class Condition;
class Win32ConditionPrivateData {
public:
friend class Condition;
/// number of waiters.
long waiters_;
Win32ConditionPrivateData ()
:waiters_(0),
was_broadcast_(0),
sema_(CreateSemaphore(NULL,0,0x7fffffff,NULL)),
waiters_done_(CreateEvent(NULL,FALSE,FALSE,NULL))
{
}
~Win32ConditionPrivateData ();
inline int broadcast ()
{
int have_waiters = 0;
long w = InterlockedGet(&waiters_);
if (w > 0)
{
// we are broadcasting.
was_broadcast_ = 1;
have_waiters = 1;
}
int result = 0;
if (have_waiters)
{
// Wake up all the waiters.
ReleaseSemaphore(sema_.get(), w, NULL);
cooperativeWait(waiters_done_.get(), INFINITE);
//end of broadcasting
was_broadcast_ = 0;
}
return result;
}
inline int signal()
{
long w = InterlockedGet(&waiters_);
int have_waiters = w > 0;
int result = 0;
if (have_waiters)
{
if( !ReleaseSemaphore(sema_.get(),1,NULL) )
result = -1;
}
return result;
}
inline int wait (Mutex& external_mutex, long timeout_ms)
{
// Prevent race conditions on the <waiters_> count.
InterlockedIncrement(&waiters_);
int result = 0;
ReverseScopedLock<Mutex> lock(external_mutex);
// wait in timeslices, giving testCancel() a change to
// exit the thread if requested.
try {
DWORD dwResult = cooperativeWait(sema_.get(), timeout_ms);
if(dwResult != WAIT_OBJECT_0)
result = (int)dwResult;
}
catch(...){
// thread is canceled in cooperative wait , do cleanup
long w = InterlockedDecrement(&waiters_);
int last_waiter = was_broadcast_ && w == 0;
if (last_waiter) SetEvent(waiters_done_.get());
// rethrow
throw;
}
// We're ready to return, so there's one less waiter.
long w = InterlockedDecrement(&waiters_);
int last_waiter = was_broadcast_ && w == 0;
if (result != -1 && last_waiter)
SetEvent(waiters_done_.get());
return result;
}
protected:
/// Serialize access to the waiters count.
/// Mutex waiters_lock_;
/// Queue up threads waiting for the condition to become signaled.
HandleHolder sema_;
/**
* An auto reset event used by the broadcast/signal thread to wait
* for the waiting thread(s) to wake up and get a chance at the
* semaphore.
*/
HandleHolder waiters_done_;
/// Keeps track of whether we were broadcasting or just signaling.
size_t was_broadcast_;
};
#undef InterlockedGet
}
#endif // !_WIN32CONDITIONPRIVATEDATA_H_
_______________________________________________
osg-submissions mailing list
[email protected]
http://lists.openscenegraph.org/listinfo.cgi/osg-submissions-openscenegraph.org