Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (206313 => 206314)
--- trunk/Source/_javascript_Core/ChangeLog 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-09-23 18:09:44 UTC (rev 206314)
@@ -1,3 +1,69 @@
+2016-09-22 Filip Pizlo <[email protected]>
+
+ Need a store-load fence between setting cell state and visiting the object in SlotVisitor
+ https://bugs.webkit.org/show_bug.cgi?id=162354
+
+ Reviewed by Mark Lam.
+
+ This was meant to be a small change, but then it became bigger as I found small
+ opportunities for improving this code. This adds a store-load fence and is performance-
+ neutral. That's probably partly due to other optimizations that I did to visitChildren().
+
+ Initially, I found that adding an mfence as a store-load fence was terribly expensive. So,
+ I thought that I needed to buffer up a bunch of objects, set their states, do one mfence,
+ and then visit all of them. This seemed like a win, so I went with it. Unfortunately, this
+ made no sense for two reasons:
+
+ - I shouldn't use mfence. I should use ortop (lock orl $0, (%rsp)) instead. Ortop is
+ basically free, and it's what WTF now uses for storeLoadFence().
+
+ - My data saying that buffering up objects was not a slow-down was wrong. That was actually
+ almost as expensive as the mfence.
+
+ But in order to implement that, I made some other improvements that I think we should stick
+ with:
+
+ - SlotVisitor::visitChildren() now uses a switch on type. This replaces what used to be
+ some nasty ClassInfo look-ups.
+
+ - We no longer save the object's old CellState. We would do that so that we would know what
+ state the object had been before we blackened it. But I believe that the more logical
+ solution is to have two kinds of black - one for black-for-the-first-time objects and one
+ for repeat offenders. This is a lot easier to reason about, since you can now just figure
+ this out by looking at the cell directly.
+
+ The latter change meant rewiring a bunch of barriers. It didn't make them any more
+ expensive.
+
+ * ftl/FTLLowerDFGToB3.cpp:
+ (JSC::FTL::DFG::LowerDFGToB3::emitStoreBarrier):
+ * heap/CellState.h:
+ (JSC::blacken):
+ * heap/Heap.cpp:
+ (JSC::Heap::addToRememberedSet):
+ * heap/Heap.h:
+ * heap/HeapInlines.h:
+ (JSC::Heap::writeBarrier):
+ (JSC::Heap::reportExtraMemoryVisited):
+ (JSC::Heap::reportExternalMemoryVisited):
+ * heap/MarkStack.cpp:
+ * heap/MarkStack.h:
+ * heap/SlotVisitor.cpp:
+ (JSC::SlotVisitor::visitChildren):
+ * heap/SlotVisitor.h:
+ * heap/SlotVisitorInlines.h:
+ (JSC::SlotVisitor::reportExtraMemoryVisited):
+ (JSC::SlotVisitor::reportExternalMemoryVisited):
+ * jit/AssemblyHelpers.h:
+ (JSC::AssemblyHelpers::jumpIfIsRememberedOrInEden):
+ * llint/LLIntData.cpp:
+ (JSC::LLInt::Data::performAssertions):
+ * llint/LowLevelInterpreter.asm:
+ * llint/LowLevelInterpreter32_64.asm:
+ * llint/LowLevelInterpreter64.asm:
+ * runtime/JSObject.h:
+ (JSC::isJSFinalObject):
+
2016-09-23 Csaba Osztrogonác <[email protected]>
ARM EABI buildfix after r206289
Modified: trunk/Source/_javascript_Core/ftl/FTLLowerDFGToB3.cpp (206313 => 206314)
--- trunk/Source/_javascript_Core/ftl/FTLLowerDFGToB3.cpp 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/ftl/FTLLowerDFGToB3.cpp 2016-09-23 18:09:44 UTC (rev 206314)
@@ -11435,7 +11435,8 @@
LBasicBlock continuation = m_out.newBlock();
m_out.branch(
- m_out.notZero32(loadCellState(base)), usually(continuation), rarely(slowPath));
+ m_out.above(loadCellState(base), m_out.constInt32(blackThreshold)),
+ usually(continuation), rarely(slowPath));
LBasicBlock lastNext = m_out.appendTo(slowPath, continuation);
Modified: trunk/Source/_javascript_Core/heap/CellState.h (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/CellState.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/CellState.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2015 Apple Inc. All rights reserved.
+ * Copyright (C) 2015-2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,34 +26,47 @@
#ifndef CellState_h
#define CellState_h
+#include <wtf/Assertions.h>
+
namespace JSC {
enum class CellState : uint8_t {
- // The object is black as far as this GC is concerned. When not in GC, this just means that it's an
- // old gen object. Note that we deliberately arrange OldBlack to be zero, so that the store barrier on
- // a target object "from" is just:
- //
- // if (!from->cellState())
- // slowPath(from);
- //
- // There is a bunch of code in the LLInt and JITs that rely on this being the case. You'd have to
- // change a lot of code if you ever wanted the store barrier to be anything but a non-zero check on
- // cellState.
- OldBlack = 0,
+ // The object is black for the first time during this GC.
+ NewBlack = 0,
+ // The object is black for the Nth time during this full GC cycle (N > 1). An object may get to
+ // this state if it transitions from black back to grey during a concurrent GC, or because it
+ // wound up in the remembered set because of a generational barrier.
+ OldBlack = 1,
+
// The object is in eden. During GC, this means that the object has not been marked yet.
- NewWhite = 1,
+ NewWhite = 2,
+ // The object is grey - i.e. it will be scanned - and this is the first time in this GC that we are
+ // going to scan it. If this is an eden GC, this also means that the object is in eden.
+ NewGrey = 3,
+
// The object is grey - i.e. it will be scanned - but it either belongs to old gen (if this is eden
// GC) or it is grey a second time in this current GC (because a concurrent store barrier requested
// re-greying).
- OldGrey = 2,
-
- // The object is grey - i.e. it will be scanned - and this is the first time in this GC that we are
- // going to scan it. If this is an eden GC, this also means that the object is in eden.
- NewGrey = 3
+ OldGrey = 4
};
+static const unsigned blackThreshold = 1; // x <= blackThreshold means x is black.
+
+inline bool isBlack(CellState cellState)
+{
+ return static_cast<unsigned>(cellState) <= blackThreshold;
+}
+
+inline CellState blacken(CellState cellState)
+{
+ if (cellState == CellState::NewGrey)
+ return CellState::NewBlack;
+ ASSERT(cellState == CellState::NewBlack || cellState == CellState::OldBlack || cellState == CellState::OldGrey);
+ return CellState::OldBlack;
+}
+
} // namespace JSC
#endif // CellState_h
Modified: trunk/Source/_javascript_Core/heap/Heap.cpp (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/Heap.cpp 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/Heap.cpp 2016-09-23 18:09:44 UTC (rev 206314)
@@ -28,6 +28,7 @@
#include "FullGCActivityCallback.h"
#include "GCActivityCallback.h"
#include "GCIncomingRefCountedSetInlines.h"
+#include "GCSegmentedArrayInlines.h"
#include "GCTypeMap.h"
#include "HasOwnPropertyCache.h"
#include "HeapHelperPool.h"
@@ -914,7 +915,7 @@
{
ASSERT(cell);
ASSERT(!Options::useConcurrentJIT() || !isCompilationThread());
- ASSERT(cell->cellState() == CellState::OldBlack);
+ ASSERT(isBlack(cell->cellState()));
// Indicate that this object is grey and that it's one of the following:
// - A re-greyed object during a concurrent collection.
// - An old remembered object.
Modified: trunk/Source/_javascript_Core/heap/Heap.h (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/Heap.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/Heap.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -175,11 +175,11 @@
// call both of these functions: Calling only one may trigger catastropic
// memory growth.
void reportExtraMemoryAllocated(size_t);
- void reportExtraMemoryVisited(CellState cellStateBeforeVisiting, size_t);
+ void reportExtraMemoryVisited(JSCell*, size_t);
#if ENABLE(RESOURCE_USAGE)
// Use this API to report the subset of extra memory that lives outside this process.
- void reportExternalMemoryVisited(CellState cellStateBeforeVisiting, size_t);
+ void reportExternalMemoryVisited(JSCell*, size_t);
size_t externalMemorySize() { return m_externalMemorySize; }
#endif
Modified: trunk/Source/_javascript_Core/heap/HeapInlines.h (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/HeapInlines.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/HeapInlines.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -125,7 +125,7 @@
#if ENABLE(WRITE_BARRIER_PROFILING)
WriteBarrierCounters::countWriteBarrier();
#endif
- if (!from || from->cellState() != CellState::OldBlack)
+ if (!from || !isBlack(from->cellState()))
return;
if (!to || to->cellState() != CellState::NewWhite)
return;
@@ -135,7 +135,7 @@
inline void Heap::writeBarrier(const JSCell* from)
{
ASSERT_GC_OBJECT_LOOKS_VALID(const_cast<JSCell*>(from));
- if (!from || from->cellState() != CellState::OldBlack)
+ if (!from || !isBlack(from->cellState()))
return;
addToRememberedSet(from);
}
@@ -146,10 +146,10 @@
reportExtraMemoryAllocatedSlowCase(size);
}
-inline void Heap::reportExtraMemoryVisited(CellState dataBeforeVisiting, size_t size)
+inline void Heap::reportExtraMemoryVisited(JSCell* cell, size_t size)
{
// We don't want to double-count the extra memory that was reported in previous collections.
- if (operationInProgress() == EdenCollection && dataBeforeVisiting == CellState::OldGrey)
+ if (operationInProgress() == EdenCollection && cell->cellState() == CellState::OldBlack)
return;
size_t* counter = &m_extraMemorySize;
@@ -162,10 +162,10 @@
}
#if ENABLE(RESOURCE_USAGE)
-inline void Heap::reportExternalMemoryVisited(CellState dataBeforeVisiting, size_t size)
+inline void Heap::reportExternalMemoryVisited(JSCell* cell, size_t size)
{
// We don't want to double-count the external memory that was reported in previous collections.
- if (operationInProgress() == EdenCollection && dataBeforeVisiting == CellState::OldGrey)
+ if (operationInProgress() == EdenCollection && cell->cellState() == CellState::OldBlack)
return;
size_t* counter = &m_externalMemorySize;
Modified: trunk/Source/_javascript_Core/heap/MarkStack.cpp (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/MarkStack.cpp 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/MarkStack.cpp 2016-09-23 18:09:44 UTC (rev 206314)
@@ -26,6 +26,7 @@
#include "config.h"
#include "MarkStack.h"
+#include "GCSegmentedArrayInlines.h"
#include "JSCInlines.h"
namespace JSC {
Modified: trunk/Source/_javascript_Core/heap/MarkStack.h (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/MarkStack.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/MarkStack.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -26,7 +26,7 @@
#ifndef MarkStack_h
#define MarkStack_h
-#include "GCSegmentedArrayInlines.h"
+#include "GCSegmentedArray.h"
namespace JSC {
Modified: trunk/Source/_javascript_Core/heap/SlotVisitor.cpp (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/SlotVisitor.cpp 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/SlotVisitor.cpp 2016-09-23 18:09:44 UTC (rev 206314)
@@ -25,9 +25,9 @@
#include "config.h"
#include "SlotVisitor.h"
-#include "SlotVisitorInlines.h"
#include "ConservativeRoots.h"
+#include "GCSegmentedArrayInlines.h"
#include "HeapCellInlines.h"
#include "HeapProfiler.h"
#include "HeapSnapshotBuilder.h"
@@ -36,6 +36,7 @@
#include "JSObject.h"
#include "JSString.h"
#include "JSCInlines.h"
+#include "SlotVisitorInlines.h"
#include "SuperSampler.h"
#include "VM.h"
#include <wtf/Lock.h>
@@ -296,25 +297,32 @@
SetCurrentCellScope currentCellScope(*this, cell);
- m_currentObjectCellStateBeforeVisiting = cell->cellState();
- cell->setCellState(CellState::OldBlack);
+ cell->setCellState(blacken(cell->cellState()));
- if (isJSString(cell)) {
+ // FIXME: Make this work on ARM also.
+ // https://bugs.webkit.org/show_bug.cgi?id=162461
+ if (isX86())
+ WTF::storeLoadFence();
+
+ switch (cell->type()) {
+ case StringType:
JSString::visitChildren(const_cast<JSCell*>(cell), *this);
- return;
- }
-
- if (isJSFinalObject(cell)) {
+ break;
+
+ case FinalObjectType:
JSFinalObject::visitChildren(const_cast<JSCell*>(cell), *this);
- return;
- }
+ break;
- if (isJSArray(cell)) {
+ case ArrayType:
JSArray::visitChildren(const_cast<JSCell*>(cell), *this);
- return;
+ break;
+
+ default:
+ // FIXME: This could be so much better.
+ // https://bugs.webkit.org/show_bug.cgi?id=162462
+ cell->methodTable()->visitChildren(const_cast<JSCell*>(cell), *this);
+ break;
}
-
- cell->methodTable()->visitChildren(const_cast<JSCell*>(cell), *this);
}
void SlotVisitor::donateKnownParallel()
Modified: trunk/Source/_javascript_Core/heap/SlotVisitor.h (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/SlotVisitor.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/SlotVisitor.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -168,8 +168,6 @@
HeapSnapshotBuilder* m_heapSnapshotBuilder { nullptr };
JSCell* m_currentCell { nullptr };
- CellState m_currentObjectCellStateBeforeVisiting { CellState::NewWhite };
-
public:
#if !ASSERT_DISABLED
bool m_isCheckingForDefaultMarkViolation;
Modified: trunk/Source/_javascript_Core/heap/SlotVisitorInlines.h (206313 => 206314)
--- trunk/Source/_javascript_Core/heap/SlotVisitorInlines.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/heap/SlotVisitorInlines.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -106,13 +106,13 @@
inline void SlotVisitor::reportExtraMemoryVisited(size_t size)
{
- heap()->reportExtraMemoryVisited(m_currentObjectCellStateBeforeVisiting, size);
+ heap()->reportExtraMemoryVisited(m_currentCell, size);
}
#if ENABLE(RESOURCE_USAGE)
inline void SlotVisitor::reportExternalMemoryVisited(size_t size)
{
- heap()->reportExternalMemoryVisited(m_currentObjectCellStateBeforeVisiting, size);
+ heap()->reportExternalMemoryVisited(m_currentCell, size);
}
#endif
Modified: trunk/Source/_javascript_Core/jit/AssemblyHelpers.h (206313 => 206314)
--- trunk/Source/_javascript_Core/jit/AssemblyHelpers.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/jit/AssemblyHelpers.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -1308,13 +1308,13 @@
Jump jumpIfIsRememberedOrInEden(GPRReg cell)
{
- return branchTest8(MacroAssembler::NonZero, MacroAssembler::Address(cell, JSCell::cellStateOffset()));
+ return branch8(Above, Address(cell, JSCell::cellStateOffset()), TrustedImm32(blackThreshold));
}
Jump jumpIfIsRememberedOrInEden(JSCell* cell)
{
uint8_t* address = reinterpret_cast<uint8_t*>(cell) + JSCell::cellStateOffset();
- return branchTest8(MacroAssembler::NonZero, MacroAssembler::AbsoluteAddress(address));
+ return branch8(Above, AbsoluteAddress(address), TrustedImm32(blackThreshold));
}
// Emits the branch structure for typeof. The code emitted by this doesn't fall through. The
Modified: trunk/Source/_javascript_Core/llint/LLIntData.cpp (206313 => 206314)
--- trunk/Source/_javascript_Core/llint/LLIntData.cpp 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/llint/LLIntData.cpp 2016-09-23 18:09:44 UTC (rev 206314)
@@ -214,6 +214,7 @@
STATIC_ASSERT(GetPutInfo::initializationBits == 0xffc00);
STATIC_ASSERT(MarkedBlock::blockSize == 16 * 1024);
+ STATIC_ASSERT(blackThreshold == 1);
ASSERT(bitwise_cast<uintptr_t>(ShadowChicken::Packet::tailMarker()) == static_cast<uintptr_t>(0x7a11));
Modified: trunk/Source/_javascript_Core/llint/LowLevelInterpreter.asm (206313 => 206314)
--- trunk/Source/_javascript_Core/llint/LowLevelInterpreter.asm 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/llint/LowLevelInterpreter.asm 2016-09-23 18:09:44 UTC (rev 206314)
@@ -409,6 +409,8 @@
const MarkedBlockSize = 16 * 1024
const MarkedBlockMask = ~(MarkedBlockSize - 1)
+const BlackThreshold = 1
+
# Allocation constants
if JSVALUE64
const JSFinalObjectSizeClassIndex = 1
@@ -888,9 +890,10 @@
loadb JSCell::m_indexingType[cell], indexingType
end
-macro skipIfIsRememberedOrInEden(cell, scratch1, scratch2, continuation)
- loadb JSCell::m_cellState[cell], scratch1
- continuation(scratch1)
+macro skipIfIsRememberedOrInEden(cell, slowPath)
+ bba JSCell::m_cellState[cell], BlackThreshold, .done
+ slowPath()
+.done:
end
macro notifyWrite(set, slow)
Modified: trunk/Source/_javascript_Core/llint/LowLevelInterpreter32_64.asm (206313 => 206314)
--- trunk/Source/_javascript_Core/llint/LowLevelInterpreter32_64.asm 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/llint/LowLevelInterpreter32_64.asm 2016-09-23 18:09:44 UTC (rev 206314)
@@ -500,9 +500,9 @@
macro writeBarrierOnOperand(cellOperand)
loadisFromInstruction(cellOperand, t1)
loadConstantOrVariablePayload(t1, CellTag, t2, .writeBarrierDone)
- skipIfIsRememberedOrInEden(t2, t1, t3,
- macro(cellState)
- btbnz cellState, .writeBarrierDone
+ skipIfIsRememberedOrInEden(
+ t2,
+ macro()
push cfr, PC
# We make two extra slots because cCall2 will poke.
subp 8, sp
@@ -511,8 +511,7 @@
cCall2Void(_llint_write_barrier_slow)
addp 8, sp
pop PC, cfr
- end
- )
+ end)
.writeBarrierDone:
end
@@ -532,9 +531,9 @@
loadHelper(t3)
- skipIfIsRememberedOrInEden(t3, t1, t2,
- macro(gcData)
- btbnz gcData, .writeBarrierDone
+ skipIfIsRememberedOrInEden(
+ t3,
+ macro()
push cfr, PC
# We make two extra slots because cCall2 will poke.
subp 8, sp
@@ -543,8 +542,7 @@
cCall2Void(_llint_write_barrier_slow)
addp 8, sp
pop PC, cfr
- end
- )
+ end)
.writeBarrierDone:
end
Modified: trunk/Source/_javascript_Core/llint/LowLevelInterpreter64.asm (206313 => 206314)
--- trunk/Source/_javascript_Core/llint/LowLevelInterpreter64.asm 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/llint/LowLevelInterpreter64.asm 2016-09-23 18:09:44 UTC (rev 206314)
@@ -404,16 +404,15 @@
macro writeBarrierOnOperand(cellOperand)
loadisFromInstruction(cellOperand, t1)
loadConstantOrVariableCell(t1, t2, .writeBarrierDone)
- skipIfIsRememberedOrInEden(t2, t1, t3,
- macro(cellState)
- btbnz cellState, .writeBarrierDone
+ skipIfIsRememberedOrInEden(
+ t2,
+ macro()
push PB, PC
move t2, a1 # t2 can be a0 (not on 64 bits, but better safe than sorry)
move cfr, a0
cCall2Void(_llint_write_barrier_slow)
pop PC, PB
- end
- )
+ end)
.writeBarrierDone:
end
@@ -432,9 +431,9 @@
btpz t0, .writeBarrierDone
loadHelper(t3)
- skipIfIsRememberedOrInEden(t3, t1, t2,
- macro(gcData)
- btbnz gcData, .writeBarrierDone
+ skipIfIsRememberedOrInEden(
+ t3,
+ macro()
push PB, PC
move cfr, a0
move t3, a1
Modified: trunk/Source/_javascript_Core/runtime/JSObject.h (206313 => 206314)
--- trunk/Source/_javascript_Core/runtime/JSObject.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/_javascript_Core/runtime/JSObject.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -1093,7 +1093,7 @@
inline bool isJSFinalObject(JSCell* cell)
{
- return cell->classInfo() == JSFinalObject::info();
+ return cell->type() == FinalObjectType;
}
inline bool isJSFinalObject(JSValue value)
Modified: trunk/Source/WTF/ChangeLog (206313 => 206314)
--- trunk/Source/WTF/ChangeLog 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/WTF/ChangeLog 2016-09-23 18:09:44 UTC (rev 206314)
@@ -14,6 +14,18 @@
2016-09-22 Filip Pizlo <[email protected]>
+ Need a store-load fence between setting cell state and visiting the object in SlotVisitor
+ https://bugs.webkit.org/show_bug.cgi?id=162354
+
+ Reviewed by Mark Lam.
+
+ Fix this on x86-32.
+
+ * wtf/Atomics.h:
+ (WTF::x86_ortop):
+
+2016-09-22 Filip Pizlo <[email protected]>
+
Fences on x86 should be a lot cheaper
https://bugs.webkit.org/show_bug.cgi?id=162417
Modified: trunk/Source/WTF/wtf/Atomics.h (206313 => 206314)
--- trunk/Source/WTF/wtf/Atomics.h 2016-09-23 17:49:15 UTC (rev 206313)
+++ trunk/Source/WTF/wtf/Atomics.h 2016-09-23 18:09:44 UTC (rev 206314)
@@ -175,10 +175,12 @@
// know that it is equivalent for our purposes, but it would be good to
// investigate if that is actually better.
MemoryBarrier();
-#else
+#elif CPU(X86_64)
// This has acqrel semantics and is much cheaper than mfence. For exampe, in the JSC GC, using
// mfence as a store-load fence was a 9% slow-down on Octane/splay while using this was neutral.
asm volatile("lock; orl $0, (%%rsp)" ::: "memory");
+#else
+ asm volatile("lock; orl $0, (%%esp)" ::: "memory");
#endif
}