Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (206538 => 206539)
--- trunk/Source/_javascript_Core/ChangeLog 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-09-28 20:30:44 UTC (rev 206539)
@@ -1,3 +1,59 @@
+2016-09-28 Filip Pizlo <[email protected]>
+
+ Optimize B3->Air lowering of Fence on ARM
+ https://bugs.webkit.org/show_bug.cgi?id=162342
+
+ Reviewed by Geoffrey Garen.
+
+ This gives us comprehensive support for standalone fences on x86 and ARM. The changes are as
+ follows:
+
+ - Sets in stone the rule that the heaps of a B3::Fence tell you what the fence protects. If the
+ fence reads, it protects motion of stores. If the fence writes, it protects motion of loads.
+ This allows us to express for example load-load fences in a portable way: on x86 they will just
+ block B3 optimizations and emit no code, while on ARM you will get some fence.
+
+ - Adds comprehensive support for WTF-style fences in the ARM assembler. I simplified it just a bit
+ to match what B3, the main client, knows. There are three fences: MemoryFence, StoreFence, and
+ LoadFence. On x86, MemoryFence is ortop while StoreFence and LoadFence emit no code. On ARM64,
+ MemoryFence and LoadFence are dmb ish while StoreFence is dmb ishst.
+
+ - Tests! To test this, I needed to teach the disassembler how to disassemble dmb ish and dmb
+ ishst. I think that the canonical way to do it would be to create a group for dmb and then teach
+ that group how to decode the operands. But I don't actually know what are all of the ways of
+ encoding dmb, so I'd rather that unrecognized encodings fall through to the ".long blah"
+ bailout. So, this creates explicit matching rules for "dmb ish" and "dmb ishst", which is the
+ most conservative thing we can do.
+
+ * assembler/ARM64Assembler.h:
+ (JSC::ARM64Assembler::dmbISH):
+ (JSC::ARM64Assembler::dmbISHST):
+ (JSC::ARM64Assembler::dmbSY): Deleted.
+ * assembler/MacroAssemblerARM64.h:
+ (JSC::MacroAssemblerARM64::memoryFence):
+ (JSC::MacroAssemblerARM64::storeFence):
+ (JSC::MacroAssemblerARM64::loadFence):
+ * assembler/MacroAssemblerX86Common.h:
+ (JSC::MacroAssemblerX86Common::storeFence):
+ (JSC::MacroAssemblerX86Common::loadFence):
+ * b3/B3FenceValue.h:
+ * b3/B3LowerToAir.cpp:
+ (JSC::B3::Air::LowerToAir::lower):
+ * b3/air/AirOpcode.opcodes:
+ * b3/testb3.cpp:
+ (JSC::B3::testMemoryFence):
+ (JSC::B3::testStoreFence):
+ (JSC::B3::testLoadFence):
+ (JSC::B3::run):
+ (JSC::B3::testX86MFence): Deleted.
+ (JSC::B3::testX86CompilerFence): Deleted.
+ * disassembler/ARM64/A64DOpcode.cpp:
+ (JSC::ARM64Disassembler::A64DOpcodeDmbIsh::format):
+ (JSC::ARM64Disassembler::A64DOpcodeDmbIshSt::format):
+ * disassembler/ARM64/A64DOpcode.h:
+ (JSC::ARM64Disassembler::A64DOpcodeDmbIsh::opName):
+ (JSC::ARM64Disassembler::A64DOpcodeDmbIshSt::opName):
+
2016-09-28 Joseph Pecoraro <[email protected]>
Adopt #pragma once in some generated resources
Modified: trunk/Source/_javascript_Core/assembler/ARM64Assembler.h (206538 => 206539)
--- trunk/Source/_javascript_Core/assembler/ARM64Assembler.h 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/assembler/ARM64Assembler.h 2016-09-28 20:30:44 UTC (rev 206539)
@@ -1496,11 +1496,16 @@
}
}
- ALWAYS_INLINE void dmbSY()
+ ALWAYS_INLINE void dmbISH()
{
- insn(0xd5033fbf);
+ insn(0xd5033bbf);
}
+ ALWAYS_INLINE void dmbISHST()
+ {
+ insn(0xd5033abf);
+ }
+
template<int datasize>
ALWAYS_INLINE void orn(RegisterID rd, RegisterID rn, RegisterID rm)
{
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h (206538 => 206539)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h 2016-09-28 20:30:44 UTC (rev 206539)
@@ -3215,12 +3215,27 @@
m_assembler.nop();
}
+ // We take memoryFence to mean acqrel. This has acqrel semantics on ARM64.
void memoryFence()
{
- m_assembler.dmbSY();
+ m_assembler.dmbISH();
}
+ // We take this to mean that it prevents motion of normal stores. That's a store fence on ARM64 (hence the "ST").
+ void storeFence()
+ {
+ m_assembler.dmbISHST();
+ }
+ // We take this to mean that it prevents motion of normal loads. Ideally we'd have expressed this
+ // using dependencies or half fences, but there are cases where this is as good as it gets. The only
+ // way to get a standalone load fence instruction on ARM is to use the ISH fence, which is just like
+ // the memoryFence().
+ void loadFence()
+ {
+ m_assembler.dmbISH();
+ }
+
// Misc helper functions.
// Invert a relational condition, e.g. == becomes !=, < becomes >=, etc.
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h (206538 => 206539)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2016-09-28 20:30:44 UTC (rev 206539)
@@ -2636,6 +2636,16 @@
m_assembler.orl_im(0, 0, X86Registers::esp);
}
+ // We take this to mean that it prevents motion of normal stores. So, it's a no-op on x86.
+ void storeFence()
+ {
+ }
+
+ // We take this to mean that it prevents motion of normal loads. So, it's a no-op on x86.
+ void loadFence()
+ {
+ }
+
static void replaceWithJump(CodeLocationLabel instructionStart, CodeLocationLabel destination)
{
X86Assembler::replaceWithJump(instructionStart.executableAddress(), destination.executableAddress());
Modified: trunk/Source/_javascript_Core/b3/B3FenceValue.h (206538 => 206539)
--- trunk/Source/_javascript_Core/b3/B3FenceValue.h 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/b3/B3FenceValue.h 2016-09-28 20:30:44 UTC (rev 206539)
@@ -41,7 +41,9 @@
// The read/write heaps are reflected in the effects() of this value. The compiler may change
// the lowering of a Fence based on the heaps. For example, if a fence does not write anything
// then it is understood to be a store-store fence. On x86, this may lead us to not emit any
- // code, while on ARM we may emit a cheaper fence (dmb ishst instead of dmb ish).
+ // code, while on ARM we may emit a cheaper fence (dmb ishst instead of dmb ish). We will do
+ // the same optimization for load-load fences, which are expressed as a Fence that writes but
+ // does not read.
//
// This abstraction allows us to cover all of the fences on x86 and all of the standalone fences
// on ARM. X86 really just has one fence: mfence. This fence should be used to protect stores
@@ -65,7 +67,6 @@
// On ARM there are many more fences. The Fence instruction is meant to model just two of them:
// dmb ish and dmb ishst. You can emit a dmb ishst by using a Fence with an empty write heap.
// Otherwise, you will get a dmb ish.
- // FIXME: Make this work right on ARM. https://bugs.webkit.org/show_bug.cgi?id=162342
// FIXME: Add fenced memory accesses. https://bugs.webkit.org/show_bug.cgi?id=162349
// FIXME: Add a Depend operation. https://bugs.webkit.org/show_bug.cgi?id=162350
HeapRange read { HeapRange::top() };
Modified: trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp (206538 => 206539)
--- trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp 2016-09-28 20:30:44 UTC (rev 206539)
@@ -2049,10 +2049,18 @@
case Fence: {
FenceValue* fence = m_value->as<FenceValue>();
- if (isX86() && !fence->write)
+ if (!fence->write && !fence->read)
return;
- // FIXME: Optimize this on ARM.
- // https://bugs.webkit.org/show_bug.cgi?id=162342
+ if (!fence->write) {
+ // A fence that reads but does not write is for protecting motion of stores.
+ append(StoreFence);
+ return;
+ }
+ if (!fence->read) {
+ // A fence that writes but does not read is for protecting motion of loads.
+ append(LoadFence);
+ return;
+ }
append(MemoryFence);
return;
}
Modified: trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes (206538 => 206539)
--- trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes 2016-09-28 20:30:44 UTC (rev 206539)
@@ -844,6 +844,8 @@
DoubleCond, Tmp, Tmp, Tmp, Tmp, Tmp
MemoryFence /effects
+StoreFence /effects
+LoadFence /effects
Jump /branch
Modified: trunk/Source/_javascript_Core/b3/testb3.cpp (206538 => 206539)
--- trunk/Source/_javascript_Core/b3/testb3.cpp 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/b3/testb3.cpp 2016-09-28 20:30:44 UTC (rev 206539)
@@ -13052,7 +13052,7 @@
CHECK_EQ(invoke<int>(*code, -1), 666);
}
-void testX86MFence()
+void testMemoryFence()
{
Procedure proc;
@@ -13059,14 +13059,19 @@
BasicBlock* root = proc.addBlock();
root->appendNew<FenceValue>(proc, Origin());
- root->appendNew<Value>(proc, Return, Origin());
+ root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42));
auto code = compile(proc);
- checkUsesInstruction(*code, "lock or $0x0, (%rsp)");
+ CHECK_EQ(invoke<int>(*code), 42);
+ if (isX86())
+ checkUsesInstruction(*code, "lock or $0x0, (%rsp)");
+ if (isARM64())
+ checkUsesInstruction(*code, "dmb ish");
checkDoesNotUseInstruction(*code, "mfence");
+ checkDoesNotUseInstruction(*code, "dmb ishst");
}
-void testX86CompilerFence()
+void testStoreFence()
{
Procedure proc;
@@ -13073,13 +13078,34 @@
BasicBlock* root = proc.addBlock();
root->appendNew<FenceValue>(proc, Origin(), HeapRange::top(), HeapRange());
- root->appendNew<Value>(proc, Return, Origin());
+ root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42));
auto code = compile(proc);
+ CHECK_EQ(invoke<int>(*code), 42);
checkDoesNotUseInstruction(*code, "lock");
checkDoesNotUseInstruction(*code, "mfence");
+ if (isARM64())
+ checkUsesInstruction(*code, "dmb ishst");
}
+void testLoadFence()
+{
+ Procedure proc;
+
+ BasicBlock* root = proc.addBlock();
+
+ root->appendNew<FenceValue>(proc, Origin(), HeapRange(), HeapRange::top());
+ root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42));
+
+ auto code = compile(proc);
+ CHECK_EQ(invoke<int>(*code), 42);
+ checkDoesNotUseInstruction(*code, "lock");
+ checkDoesNotUseInstruction(*code, "mfence");
+ if (isARM64())
+ checkUsesInstruction(*code, "dmb ish");
+ checkDoesNotUseInstruction(*code, "dmb ishst");
+}
+
// Make sure the compiler does not try to optimize anything out.
NEVER_INLINE double zero()
{
@@ -14510,8 +14536,6 @@
RUN(testBranchBitAndImmFusion(Load, Int32, 1, Air::BranchTest32, Air::Arg::Addr));
RUN(testBranchBitAndImmFusion(Load, Int64, 1, Air::BranchTest32, Air::Arg::Addr));
- RUN(testX86MFence());
- RUN(testX86CompilerFence());
}
if (isARM64()) {
@@ -14519,6 +14543,10 @@
RUN(testTernarySubInstructionSelection(Trunc, Int32, Air::Sub32));
}
+ RUN(testMemoryFence());
+ RUN(testStoreFence());
+ RUN(testLoadFence());
+
if (tasks.isEmpty())
usage();
Modified: trunk/Source/_javascript_Core/disassembler/ARM64/A64DOpcode.cpp (206538 => 206539)
--- trunk/Source/_javascript_Core/disassembler/ARM64/A64DOpcode.cpp 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/disassembler/ARM64/A64DOpcode.cpp 2016-09-28 20:30:44 UTC (rev 206539)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
+ * Copyright (C) 2012, 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -84,6 +84,8 @@
OPCODE_GROUP_ENTRY(0x15, A64DOpcodeConditionalBranchImmediate),
OPCODE_GROUP_ENTRY(0x15, A64DOpcodeCompareAndBranchImmediate),
OPCODE_GROUP_ENTRY(0x15, A64DOpcodeHint),
+ OPCODE_GROUP_ENTRY(0x15, A64DOpcodeDmbIsh),
+ OPCODE_GROUP_ENTRY(0x15, A64DOpcodeDmbIshSt),
OPCODE_GROUP_ENTRY(0x16, A64DOpcodeUnconditionalBranchImmediate),
OPCODE_GROUP_ENTRY(0x16, A64DOpcodeUnconditionalBranchRegister),
OPCODE_GROUP_ENTRY(0x16, A64DOpcodeTestAndBranchImmediate),
@@ -823,6 +825,20 @@
return m_formatBuffer;
}
+const char* A64DOpcodeDmbIsh::format()
+{
+ appendInstructionName("dmb");
+ appendString("ish");
+ return m_formatBuffer;
+}
+
+const char* A64DOpcodeDmbIshSt::format()
+{
+ appendInstructionName("dmb");
+ appendString("ishst");
+ return m_formatBuffer;
+}
+
// A zero in an entry of the table means the instruction is Unallocated
const char* const A64DOpcodeLoadStore::s_opNames[32] = {
"strb", "ldrb", "ldrsb", "ldrsb", "str", "ldr", "str", "ldr",
Modified: trunk/Source/_javascript_Core/disassembler/ARM64/A64DOpcode.h (206538 => 206539)
--- trunk/Source/_javascript_Core/disassembler/ARM64/A64DOpcode.h 2016-09-28 20:19:47 UTC (rev 206538)
+++ trunk/Source/_javascript_Core/disassembler/ARM64/A64DOpcode.h 2016-09-28 20:30:44 UTC (rev 206539)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
+ * Copyright (C) 2012, 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -509,6 +509,30 @@
unsigned immediate7() { return (m_opcode >> 5) & 0x7f; }
};
+class A64DOpcodeDmbIsh : public A64DOpcode {
+public:
+ static const uint32_t mask = 0xffffffff;
+ static const uint32_t pattern = 0xd5033bbf;
+
+ DEFINE_STATIC_FORMAT(A64DOpcodeDmbIsh, thisObj);
+
+ const char* format();
+
+ const char* opName() { return "dmb"; }
+};
+
+class A64DOpcodeDmbIshSt : public A64DOpcode {
+public:
+ static const uint32_t mask = 0xffffffff;
+ static const uint32_t pattern = 0xd5033abf;
+
+ DEFINE_STATIC_FORMAT(A64DOpcodeDmbIshSt, thisObj);
+
+ const char* format();
+
+ const char* opName() { return "dmb"; }
+};
+
class A64DOpcodeLoadStore : public A64DOpcode {
private:
static const char* const s_opNames[32];