Diff
Modified: trunk/Source/WTF/ChangeLog (223964 => 223965)
--- trunk/Source/WTF/ChangeLog 2017-10-25 18:28:54 UTC (rev 223964)
+++ trunk/Source/WTF/ChangeLog 2017-10-25 18:32:46 UTC (rev 223965)
@@ -1,3 +1,16 @@
+2017-10-25 Commit Queue <[email protected]>
+
+ Unreviewed, rolling out r222945.
+ https://bugs.webkit.org/show_bug.cgi?id=178818
+
+ "It made WasmBench crash" (Requested by saamyjoon on #webkit).
+
+ Reverted changeset:
+
+ "bmalloc mutex should be adaptive"
+ https://bugs.webkit.org/show_bug.cgi?id=177839
+ https://trac.webkit.org/changeset/222945
+
2017-10-25 Zan Dobersek <[email protected]>
Make SERVICE_WORKER feature buildable on GTK, WPE
Modified: trunk/Source/WTF/wtf/LockAlgorithmInlines.h (223964 => 223965)
--- trunk/Source/WTF/wtf/LockAlgorithmInlines.h 2017-10-25 18:28:54 UTC (rev 223964)
+++ trunk/Source/WTF/wtf/LockAlgorithmInlines.h 2017-10-25 18:32:46 UTC (rev 223965)
@@ -33,10 +33,6 @@
// the lock algorithm slow path without recompiling the world. Right now this should be included in two
// places (Lock.cpp and JSCell.cpp).
-// NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst
-// fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no
-// reward.
-
namespace WTF {
template<typename LockType, LockType isHeldBit, LockType hasParkedBit>
Modified: trunk/Source/WTF/wtf/WordLock.cpp (223964 => 223965)
--- trunk/Source/WTF/wtf/WordLock.cpp 2017-10-25 18:28:54 UTC (rev 223964)
+++ trunk/Source/WTF/wtf/WordLock.cpp 2017-10-25 18:32:46 UTC (rev 223965)
@@ -76,10 +76,6 @@
} // anonymous namespace
-// NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst
-// fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no
-// reward.
-
NEVER_INLINE void WordLockBase::lockSlow()
{
unsigned spinCount = 0;
@@ -227,8 +223,7 @@
ASSERT(currentWordValue & isLockedBit);
ASSERT(currentWordValue & isQueueLockedBit);
ThreadData* queueHead = bitwise_cast<ThreadData*>(currentWordValue & ~queueHeadMask);
- RELEASE_ASSERT(queueHead);
- RELEASE_ASSERT(queueHead->shouldPark); // This would have been set before the thread was enqueued, so it must still be set now.
+ ASSERT(queueHead);
ThreadData* newQueueHead = queueHead->nextInQueue;
// Either this was the only thread on the queue, in which case we delete the queue, or there
@@ -261,12 +256,10 @@
{
std::lock_guard<std::mutex> locker(queueHead->parkingLock);
queueHead->shouldPark = false;
- // We have to do the notify while still holding the lock, since otherwise, we could try to
- // do it after the queueHead has destructed. It's impossible for queueHead to destruct
- // while we hold the lock, since it is either currently in the wait loop or it's before it
- // so it has to grab the lock before destructing.
- queueHead->parkingCondition.notify_one();
}
+ // Doesn't matter if we notify_all() or notify_one() here since the only thread that could be
+ // waiting is queueHead.
+ queueHead->parkingCondition.notify_one();
// The old queue head can now contend for the lock again. We're done!
}
Modified: trunk/Source/bmalloc/ChangeLog (223964 => 223965)
--- trunk/Source/bmalloc/ChangeLog 2017-10-25 18:28:54 UTC (rev 223964)
+++ trunk/Source/bmalloc/ChangeLog 2017-10-25 18:32:46 UTC (rev 223965)
@@ -1,3 +1,16 @@
+2017-10-25 Commit Queue <[email protected]>
+
+ Unreviewed, rolling out r222945.
+ https://bugs.webkit.org/show_bug.cgi?id=178818
+
+ "It made WasmBench crash" (Requested by saamyjoon on #webkit).
+
+ Reverted changeset:
+
+ "bmalloc mutex should be adaptive"
+ https://bugs.webkit.org/show_bug.cgi?id=177839
+ https://trac.webkit.org/changeset/222945
+
2017-10-24 Zan Dobersek <[email protected]>
[Linux] Enable Gigacage in x64 Linux environment
Modified: trunk/Source/bmalloc/bmalloc/Algorithm.h (223964 => 223965)
--- trunk/Source/bmalloc/bmalloc/Algorithm.h 2017-10-25 18:28:54 UTC (rev 223964)
+++ trunk/Source/bmalloc/bmalloc/Algorithm.h 2017-10-25 18:32:46 UTC (rev 223965)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -28,7 +28,6 @@
#include "BAssert.h"
#include <algorithm>
-#include <atomic>
#include <cstdint>
#include <cstddef>
#include <limits>
@@ -159,24 +158,6 @@
return bitCount<unsigned long>() - 1 - __builtin_clzl(value);
}
-// We need a CAS API that isn't the badly designed one from C++.
-template<typename T>
-bool compareExchangeWeak(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
-{
- // They could have made a sensible CAS API, but they didn't. Sneaky fact: for no good reason, the
- // C++ API will mutate expected. I think that apologists will say something about how it saves you
- // reloading the value around the back of your CAS loop, but that's nonsense. The delay along the
- // back edge of a CAS loop has almost no impact on CAS loop performance. More often than not, we
- // want to *add* delays to the back edge, not remove them.
- return word.compare_exchange_weak(expected, desired, order, std::memory_order_relaxed);
-}
-
-template<typename T>
-bool compareExchangeStrong(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
-{
- return word.compare_exchange_strong(expected, desired, order, std::memory_order_relaxed);
-}
-
} // namespace bmalloc
#endif // Algorithm_h
Modified: trunk/Source/bmalloc/bmalloc/PerThread.h (223964 => 223965)
--- trunk/Source/bmalloc/bmalloc/PerThread.h 2017-10-25 18:28:54 UTC (rev 223964)
+++ trunk/Source/bmalloc/bmalloc/PerThread.h 2017-10-25 18:32:46 UTC (rev 223965)
@@ -60,11 +60,11 @@
static void destructor(void*);
};
+#if HAVE_PTHREAD_MACHDEP_H
+
class Cache;
template<typename T> struct PerThreadStorage;
-#if HAVE_PTHREAD_MACHDEP_H
-
// For now, we only support PerThread<PerHeapKind<Cache>>. We can expand to other types by
// using more keys.
template<> struct PerThreadStorage<PerHeapKind<Cache>> {
@@ -82,7 +82,7 @@
}
};
-#endif
+#else
template<typename T> struct PerThreadStorage {
static bool s_didInitialize;
@@ -112,6 +112,8 @@
template<typename T> pthread_key_t PerThreadStorage<T>::s_key;
template<typename T> std::once_flag PerThreadStorage<T>::s_onceFlag;
+#endif
+
template<typename T>
BINLINE T* PerThread<T>::getFastCase()
{
Modified: trunk/Source/bmalloc/bmalloc/StaticMutex.cpp (223964 => 223965)
--- trunk/Source/bmalloc/bmalloc/StaticMutex.cpp 2017-10-25 18:28:54 UTC (rev 223964)
+++ trunk/Source/bmalloc/bmalloc/StaticMutex.cpp 2017-10-25 18:32:46 UTC (rev 223965)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -23,245 +23,31 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "ScopeExit.h"
#include "StaticMutex.h"
-
-#include "PerThread.h"
-#include "ScopeExit.h"
-#include <condition_variable>
-#include <mutex>
#include <thread>
namespace bmalloc {
-// FIXME: This duplicates code from WTF::WordLock.
-// https://bugs.webkit.org/show_bug.cgi?id=177719
-
-namespace {
-
-// This data structure serves three purposes:
-//
-// 1) A parking mechanism for threads that go to sleep. That involves just a system mutex and
-// condition variable.
-//
-// 2) A queue node for when a thread is on some lock's queue.
-//
-// 3) The queue head. This is kind of funky. When a thread is the head of a queue, it also serves as
-// the basic queue bookkeeping data structure. When a thread is dequeued, the next thread in the
-// queue takes on the queue head duties.
-struct ThreadData {
- // The parking mechanism.
- bool shouldPark { false };
- std::mutex parkingLock;
- std::condition_variable parkingCondition;
-
- // The queue node.
- ThreadData* nextInQueue { nullptr };
-
- // The queue itself.
- ThreadData* queueTail { nullptr };
-};
-
-ThreadData* myThreadData()
+void StaticMutex::lockSlowCase()
{
- return PerThread<ThreadData>::get();
-}
-
-} // anonymous namespace
-
-// NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst
-// fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no
-// reward.
-
-BNO_INLINE void StaticMutex::lockSlow()
-{
- unsigned spinCount = 0;
-
- // This magic number turns out to be optimal based on past JikesRVM experiments.
- const unsigned spinLimit = 40;
+ // The longest critical section in bmalloc is much shorter than the
+ // time it takes to make a system call to yield to the OS scheduler.
+ // So, we try again a lot before we yield.
+ static const size_t aLot = 256;
- for (;;) {
- uintptr_t currentWordValue = m_word.load();
-
- if (!(currentWordValue & isLockedBit)) {
- // It's not possible for someone to hold the queue lock while the lock itself is no longer
- // held, since we will only attempt to acquire the queue lock when the lock is held and
- // the queue lock prevents unlock.
- BASSERT(!(currentWordValue & isQueueLockedBit));
- if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isLockedBit)) {
- // Success! We acquired the lock.
- return;
- }
- }
+ if (!m_isSpinning.test_and_set()) {
+ auto clear = makeScopeExit([&] { m_isSpinning.clear(); });
- // If there is no queue and we haven't spun too much, we can just try to spin around again.
- if (!(currentWordValue & ~queueHeadMask) && spinCount < spinLimit) {
- spinCount++;
- sched_yield();
- continue;
- }
-
- // Need to put ourselves on the queue. Create the queue if one does not exist. This requries
- // owning the queue for a little bit. The lock that controls the queue is itself a spinlock.
- // But before we acquire the queue spinlock, we make sure that we have a ThreadData for this
- // thread.
- ThreadData* me = myThreadData();
- BASSERT(!me->shouldPark);
- BASSERT(!me->nextInQueue);
- BASSERT(!me->queueTail);
-
- // Reload the current word value, since some time may have passed.
- currentWordValue = m_word.load();
-
- // We proceed only if the queue lock is not held, the lock is held, and we succeed in
- // acquiring the queue lock.
- if ((currentWordValue & isQueueLockedBit)
- || !(currentWordValue & isLockedBit)
- || !compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit)) {
- sched_yield();
- continue;
- }
-
- me->shouldPark = true;
-
- // We own the queue. Nobody can enqueue or dequeue until we're done. Also, it's not possible
- // to release the lock while we hold the queue lock.
- ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask);
- if (queueHead) {
- // Put this thread at the end of the queue.
- queueHead->queueTail->nextInQueue = me;
- queueHead->queueTail = me;
-
- // Release the queue lock.
- currentWordValue = m_word.load();
- BASSERT(currentWordValue & ~queueHeadMask);
- BASSERT(currentWordValue & isQueueLockedBit);
- BASSERT(currentWordValue & isLockedBit);
- m_word.store(currentWordValue & ~isQueueLockedBit);
- } else {
- // Make this thread be the queue-head.
- queueHead = me;
- me->queueTail = me;
-
- // Release the queue lock and install ourselves as the head. No need for a CAS loop, since
- // we own the queue lock.
- currentWordValue = m_word.load();
- BASSERT(~(currentWordValue & ~queueHeadMask));
- BASSERT(currentWordValue & isQueueLockedBit);
- BASSERT(currentWordValue & isLockedBit);
- uintptr_t newWordValue = currentWordValue;
- newWordValue |= reinterpret_cast<uintptr_t>(queueHead);
- newWordValue &= ~isQueueLockedBit;
- m_word.store(newWordValue);
- }
-
- // At this point everyone who acquires the queue lock will see me on the queue, and anyone who
- // acquires me's lock will see that me wants to park. Note that shouldPark may have been
- // cleared as soon as the queue lock was released above, but it will happen while the
- // releasing thread holds me's parkingLock.
-
- {
- std::unique_lock<std::mutex> locker(me->parkingLock);
- while (me->shouldPark)
- me->parkingCondition.wait(locker);
- }
-
- BASSERT(!me->shouldPark);
- BASSERT(!me->nextInQueue);
- BASSERT(!me->queueTail);
-
- // Now we can loop around and try to acquire the lock again.
- }
-}
-
-BNO_INLINE void StaticMutex::unlockSlow()
-{
- // The fast path can fail either because of spurious weak CAS failure, or because someone put a
- // thread on the queue, or the queue lock is held. If the queue lock is held, it can only be
- // because someone *will* enqueue a thread onto the queue.
-
- // Acquire the queue lock, or release the lock. This loop handles both lock release in case the
- // fast path's weak CAS spuriously failed and it handles queue lock acquisition if there is
- // actually something interesting on the queue.
- for (;;) {
- uintptr_t currentWordValue = m_word.load();
-
- BASSERT(currentWordValue & isLockedBit);
-
- if (currentWordValue == isLockedBit) {
- if (compareExchangeWeak(m_word, isLockedBit, clear)) {
- // The fast path's weak CAS had spuriously failed, and now we succeeded. The lock is
- // unlocked and we're done!
+ for (size_t i = 0; i < aLot; ++i) {
+ if (try_lock())
return;
- }
- // Loop around and try again.
- sched_yield();
- continue;
}
-
- if (currentWordValue & isQueueLockedBit) {
- sched_yield();
- continue;
- }
-
- // If it wasn't just a spurious weak CAS failure and if the queue lock is not held, then there
- // must be an entry on the queue.
- BASSERT(currentWordValue & ~queueHeadMask);
-
- if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit))
- break;
}
- uintptr_t currentWordValue = m_word.load();
-
- // After we acquire the queue lock, the lock must still be held and the queue must be
- // non-empty. The queue must be non-empty since only the lockSlow() method could have held the
- // queue lock and if it did then it only releases it after putting something on the queue.
- BASSERT(currentWordValue & isLockedBit);
- BASSERT(currentWordValue & isQueueLockedBit);
- ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask);
- RELEASE_BASSERT(queueHead);
- RELEASE_BASSERT(queueHead->shouldPark);
-
- ThreadData* newQueueHead = queueHead->nextInQueue;
- // Either this was the only thread on the queue, in which case we delete the queue, or there
- // are still more threads on the queue, in which case we create a new queue head.
- if (newQueueHead)
- newQueueHead->queueTail = queueHead->queueTail;
-
- // Change the queue head, possibly removing it if newQueueHead is null. No need for a CAS loop,
- // since we hold the queue lock and the lock itself so nothing about the lock can change right
- // now.
- currentWordValue = m_word.load();
- BASSERT(currentWordValue & isLockedBit);
- BASSERT(currentWordValue & isQueueLockedBit);
- BASSERT((currentWordValue & ~queueHeadMask) == reinterpret_cast<uintptr_t>(queueHead));
- uintptr_t newWordValue = currentWordValue;
- newWordValue &= ~isLockedBit; // Release the lock.
- newWordValue &= ~isQueueLockedBit; // Release the queue lock.
- newWordValue &= queueHeadMask; // Clear out the old queue head.
- newWordValue |= reinterpret_cast<uintptr_t>(newQueueHead); // Install new queue head.
- m_word.store(newWordValue);
-
- // Now the lock is available for acquisition. But we just have to wake up the old queue head.
- // After that, we're done!
-
- queueHead->nextInQueue = nullptr;
- queueHead->queueTail = nullptr;
-
- // We do this carefully because this may run either before or during the parkingLock critical
- // section in lockSlow().
- {
- std::lock_guard<std::mutex> locker(queueHead->parkingLock);
- RELEASE_BASSERT(queueHead->shouldPark);
- queueHead->shouldPark = false;
- // We have to do the notify while still holding the lock, since otherwise, we could try to
- // do it after the queueHead has destructed. It's impossible for queueHead to destruct
- // while we hold the lock, since it is either currently in the wait loop or it's before it
- // so it has to grab the lock before destructing.
- queueHead->parkingCondition.notify_one();
- }
-
- // The old queue head can now contend for the lock again. We're done!
+ // Avoid spinning pathologically.
+ while (!try_lock())
+ sched_yield();
}
} // namespace bmalloc
Modified: trunk/Source/bmalloc/bmalloc/StaticMutex.h (223964 => 223965)
--- trunk/Source/bmalloc/bmalloc/StaticMutex.h 2017-10-25 18:28:54 UTC (rev 223964)
+++ trunk/Source/bmalloc/bmalloc/StaticMutex.h 2017-10-25 18:32:46 UTC (rev 223965)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2014 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,9 +26,7 @@
#ifndef StaticMutex_h
#define StaticMutex_h
-#include "Algorithm.h"
#include "BAssert.h"
-#include "BExport.h"
#include <atomic>
#include <mutex>
#include <thread>
@@ -38,16 +36,6 @@
// Use StaticMutex in static storage, where global constructors and exit-time
// destructors are prohibited, but all memory is zero-initialized automatically.
-// This uses the code from what WTF used to call WordLock. It's a fully adaptive mutex that uses
-// sizeof(void*) storage. It has a fast path that is similar to a spinlock, and a slow path that is
-// similar to std::mutex.
-
-// NOTE: In general, this is a great lock to use if you are very low in the stack. WTF continues to
-// have a copy of this code.
-
-// FIXME: Either fold bmalloc into WTF or find a better way to share code between them.
-// https://bugs.webkit.org/show_bug.cgi?id=177719
-
namespace bmalloc {
class StaticMutex {
@@ -57,49 +45,57 @@
public:
void lock();
- bool tryLock();
- bool try_lock() { return tryLock(); }
+ bool try_lock();
void unlock();
-
- bool isHeld() const;
- bool isLocked() const { return isHeld(); }
private:
- BEXPORT void lockSlow();
- BEXPORT void unlockSlow();
+ void lockSlowCase();
- static const uintptr_t clear = 0;
- static const uintptr_t isLockedBit = 1;
- static const uintptr_t isQueueLockedBit = 2;
- static const uintptr_t queueHeadMask = 3;
-
- std::atomic<uintptr_t> m_word;
+ std::atomic_flag m_flag;
+ std::atomic_flag m_isSpinning;
};
+static inline void sleep(
+ std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds duration)
+{
+ if (duration == std::chrono::milliseconds(0))
+ return;
+
+ lock.unlock();
+ std::this_thread::sleep_for(duration);
+ lock.lock();
+}
+
+static inline void waitUntilFalse(
+ std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds sleepDuration,
+ bool& condition)
+{
+ while (condition) {
+ condition = false;
+ sleep(lock, sleepDuration);
+ }
+}
+
inline void StaticMutex::init()
{
- m_word.store(0, std::memory_order_relaxed);
+ m_flag.clear();
+ m_isSpinning.clear();
}
-inline bool StaticMutex::tryLock()
+inline bool StaticMutex::try_lock()
{
- return m_word.load(std::memory_order_acquire) & isLockedBit;
+ return !m_flag.test_and_set(std::memory_order_acquire);
}
inline void StaticMutex::lock()
{
- if (compareExchangeWeak(m_word, clear, isLockedBit, std::memory_order_acquire))
- return;
-
- lockSlow();
+ if (!try_lock())
+ lockSlowCase();
}
inline void StaticMutex::unlock()
{
- if (compareExchangeWeak(m_word, isLockedBit, clear, std::memory_order_release))
- return;
-
- unlockSlow();
+ m_flag.clear(std::memory_order_release);
}
} // namespace bmalloc