Log Message
The JITs should be able to emit fast TLS loads https://bugs.webkit.org/show_bug.cgi?id=169483
Reviewed by Keith Miller. Source/_javascript_Core: Added loadFromTLS32/64/Ptr to the MacroAssembler and added a B3 test for this. * assembler/ARM64Assembler.h: (JSC::ARM64Assembler::mrs_TPIDRRO_EL0): * assembler/MacroAssembler.h: (JSC::MacroAssembler::loadFromTLSPtr): * assembler/MacroAssemblerARM64.h: (JSC::MacroAssemblerARM64::loadFromTLS32): (JSC::MacroAssemblerARM64::loadFromTLS64): * assembler/MacroAssemblerX86Common.h: (JSC::MacroAssemblerX86Common::loadFromTLS32): * assembler/MacroAssemblerX86_64.h: (JSC::MacroAssemblerX86_64::loadFromTLS64): * assembler/X86Assembler.h: (JSC::X86Assembler::adcl_im): (JSC::X86Assembler::addl_mr): (JSC::X86Assembler::addl_im): (JSC::X86Assembler::andl_im): (JSC::X86Assembler::orl_im): (JSC::X86Assembler::orl_rm): (JSC::X86Assembler::subl_im): (JSC::X86Assembler::cmpb_im): (JSC::X86Assembler::cmpl_rm): (JSC::X86Assembler::cmpl_im): (JSC::X86Assembler::testb_im): (JSC::X86Assembler::movb_i8m): (JSC::X86Assembler::movb_rm): (JSC::X86Assembler::movl_mr): (JSC::X86Assembler::movq_mr): (JSC::X86Assembler::movsxd_rr): (JSC::X86Assembler::gs): (JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM): * b3/testb3.cpp: (JSC::B3::testFastTLS): (JSC::B3::run): Source/WTF: Consolidated what we know about fast TLS in FastTLS.h. * WTF.xcodeproj/project.pbxproj: * wtf/CMakeLists.txt: * wtf/FastTLS.h: Added. (WTF::loadFastTLS): (WTF::fastTLSOffsetForKey): * wtf/Platform.h: * wtf/WTFThreadData.cpp: (WTF::WTFThreadData::createAndRegisterForGetspecificDirect): * wtf/WTFThreadData.h: (WTF::wtfThreadData):
Modified Paths
- trunk/Source/_javascript_Core/ChangeLog
- trunk/Source/_javascript_Core/assembler/ARM64Assembler.h
- trunk/Source/_javascript_Core/assembler/MacroAssembler.h
- trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h
- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h
- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h
- trunk/Source/_javascript_Core/assembler/X86Assembler.h
- trunk/Source/_javascript_Core/b3/testb3.cpp
- trunk/Source/WTF/ChangeLog
- trunk/Source/WTF/WTF.xcodeproj/project.pbxproj
- trunk/Source/WTF/wtf/CMakeLists.txt
- trunk/Source/WTF/wtf/Platform.h
- trunk/Source/WTF/wtf/WTFThreadData.cpp
- trunk/Source/WTF/wtf/WTFThreadData.h
Added Paths
Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (213752 => 213753)
--- trunk/Source/_javascript_Core/ChangeLog 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/ChangeLog 2017-03-11 07:17:48 UTC (rev 213753)
@@ -1,3 +1,46 @@
+2017-03-10 Filip Pizlo <fpi...@apple.com>
+
+ The JITs should be able to emit fast TLS loads
+ https://bugs.webkit.org/show_bug.cgi?id=169483
+
+ Reviewed by Keith Miller.
+
+ Added loadFromTLS32/64/Ptr to the MacroAssembler and added a B3 test for this.
+
+ * assembler/ARM64Assembler.h:
+ (JSC::ARM64Assembler::mrs_TPIDRRO_EL0):
+ * assembler/MacroAssembler.h:
+ (JSC::MacroAssembler::loadFromTLSPtr):
+ * assembler/MacroAssemblerARM64.h:
+ (JSC::MacroAssemblerARM64::loadFromTLS32):
+ (JSC::MacroAssemblerARM64::loadFromTLS64):
+ * assembler/MacroAssemblerX86Common.h:
+ (JSC::MacroAssemblerX86Common::loadFromTLS32):
+ * assembler/MacroAssemblerX86_64.h:
+ (JSC::MacroAssemblerX86_64::loadFromTLS64):
+ * assembler/X86Assembler.h:
+ (JSC::X86Assembler::adcl_im):
+ (JSC::X86Assembler::addl_mr):
+ (JSC::X86Assembler::addl_im):
+ (JSC::X86Assembler::andl_im):
+ (JSC::X86Assembler::orl_im):
+ (JSC::X86Assembler::orl_rm):
+ (JSC::X86Assembler::subl_im):
+ (JSC::X86Assembler::cmpb_im):
+ (JSC::X86Assembler::cmpl_rm):
+ (JSC::X86Assembler::cmpl_im):
+ (JSC::X86Assembler::testb_im):
+ (JSC::X86Assembler::movb_i8m):
+ (JSC::X86Assembler::movb_rm):
+ (JSC::X86Assembler::movl_mr):
+ (JSC::X86Assembler::movq_mr):
+ (JSC::X86Assembler::movsxd_rr):
+ (JSC::X86Assembler::gs):
+ (JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM):
+ * b3/testb3.cpp:
+ (JSC::B3::testFastTLS):
+ (JSC::B3::run):
+
2017-03-10 Alex Christensen <achristen...@webkit.org>
Fix watch and tv builds after r213294
Modified: trunk/Source/_javascript_Core/assembler/ARM64Assembler.h (213752 => 213753)
--- trunk/Source/_javascript_Core/assembler/ARM64Assembler.h 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/ARM64Assembler.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -1588,6 +1588,13 @@
CHECK_DATASIZE();
insn(exoticStore(MEMOPSIZE, ExoticStoreFence_Release, result, src, dst));
}
+
+#if ENABLE(FAST_TLS_JIT)
+ void mrs_TPIDRRO_EL0(RegisterID dst)
+ {
+ insn(0xd53bd060 | dst); // Thanks, otool -t!
+ }
+#endif
template<int datasize>
ALWAYS_INLINE void orn(RegisterID rd, RegisterID rn, RegisterID rm)
Modified: trunk/Source/_javascript_Core/assembler/MacroAssembler.h (213752 => 213753)
--- trunk/Source/_javascript_Core/assembler/MacroAssembler.h 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/MacroAssembler.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -627,6 +627,13 @@
load32(address, dest);
}
+#if ENABLE(FAST_TLS_JIT)
+ void loadFromTLSPtr(uint32_t offset, RegisterID dst)
+ {
+ loadFromTLS32(offset, dst);
+ }
+#endif
+
DataLabel32 loadPtrWithAddressOffsetPatch(Address address, RegisterID dest)
{
return load32WithAddressOffsetPatch(address, dest);
@@ -934,6 +941,13 @@
load64(address, dest);
}
+#if ENABLE(FAST_TLS_JIT)
+ void loadFromTLSPtr(uint32_t offset, RegisterID dst)
+ {
+ loadFromTLS64(offset, dst);
+ }
+#endif
+
DataLabel32 loadPtrWithAddressOffsetPatch(Address address, RegisterID dest)
{
return load64WithAddressOffsetPatch(address, dest);
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h (213752 => 213753)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -3598,6 +3598,24 @@
m_assembler.eor<64>(dest, src, src);
}
+#if ENABLE(FAST_TLS_JIT)
+ // This will use scratch registers if the offset is not legal.
+
+ void loadFromTLS32(uint32_t offset, RegisterID dst)
+ {
+ m_assembler.mrs_TPIDRRO_EL0(dst);
+ and64(TrustedImm32(~7), dst);
+ load32(Address(dst, offset), dst);
+ }
+
+ void loadFromTLS64(uint32_t offset, RegisterID dst)
+ {
+ m_assembler.mrs_TPIDRRO_EL0(dst);
+ and64(TrustedImm32(~7), dst);
+ load64(Address(dst, offset), dst);
+ }
+#endif // ENABLE(FAST_TLS_JIT)
+
// Misc helper functions.
// Invert a relational condition, e.g. == becomes !=, < becomes >=, etc.
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h (213752 => 213753)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -3857,6 +3857,14 @@
void loadFence()
{
}
+
+#if ENABLE(FAST_TLS_JIT)
+ void loadFromTLS32(uint32_t offset, RegisterID dst)
+ {
+ m_assembler.gs();
+ m_assembler.movl_mr(offset, dst);
+ }
+#endif
static void replaceWithBreakpoint(CodeLocationLabel instructionStart)
{
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h (213752 => 213753)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -1701,6 +1701,14 @@
store64(imm, dest);
}
+#if ENABLE(FAST_TLS_JIT)
+ void loadFromTLS64(uint32_t offset, RegisterID dst)
+ {
+ m_assembler.gs();
+ m_assembler.movl_mr(offset, dst);
+ }
+#endif
+
void truncateDoubleToUint32(FPRegisterID src, RegisterID dest)
{
m_assembler.cvttsd2siq_rr(src, dest);
Modified: trunk/Source/_javascript_Core/assembler/X86Assembler.h (213752 => 213753)
--- trunk/Source/_javascript_Core/assembler/X86Assembler.h 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/X86Assembler.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -218,6 +218,7 @@
#if CPU(X86_64)
OP_MOVSXD_GvEv = 0x63,
#endif
+ PRE_GS = 0x65,
PRE_OPERAND_SIZE = 0x66,
PRE_SSE_66 = 0x66,
OP_PUSH_Iz = 0x68,
@@ -431,10 +432,10 @@
void adcl_im(int imm, const void* addr)
{
if (CAN_SIGN_EXTEND_8_32(imm)) {
- m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_ADC, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_ADC, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
} else {
- m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_ADC, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_ADC, bitwise_cast<uint32_t>(addr));
m_formatter.immediate32(imm);
}
}
@@ -458,7 +459,7 @@
#if !CPU(X86_64)
void addl_mr(const void* addr, RegisterID dst)
{
- m_formatter.oneByteOp(OP_ADD_GvEv, dst, addr);
+ m_formatter.oneByteOpAddr(OP_ADD_GvEv, dst, bitwise_cast<uint32_t>(addr));
}
#endif
@@ -631,10 +632,10 @@
void addl_im(int imm, const void* addr)
{
if (CAN_SIGN_EXTEND_8_32(imm)) {
- m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_ADD, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_ADD, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
} else {
- m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_ADD, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_ADD, bitwise_cast<uint32_t>(addr));
m_formatter.immediate32(imm);
}
}
@@ -818,10 +819,10 @@
void andl_im(int imm, const void* addr)
{
if (CAN_SIGN_EXTEND_8_32(imm)) {
- m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_AND, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_AND, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
} else {
- m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_AND, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_AND, bitwise_cast<uint32_t>(addr));
m_formatter.immediate32(imm);
}
}
@@ -1159,10 +1160,10 @@
void orl_im(int imm, const void* addr)
{
if (CAN_SIGN_EXTEND_8_32(imm)) {
- m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_OR, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_OR, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
} else {
- m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_OR, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_OR, bitwise_cast<uint32_t>(addr));
m_formatter.immediate32(imm);
}
}
@@ -1169,7 +1170,7 @@
void orl_rm(RegisterID src, const void* addr)
{
- m_formatter.oneByteOp(OP_OR_EvGv, src, addr);
+ m_formatter.oneByteOpAddr(OP_OR_EvGv, src, bitwise_cast<uint32_t>(addr));
}
#endif
@@ -1357,10 +1358,10 @@
void subl_im(int imm, const void* addr)
{
if (CAN_SIGN_EXTEND_8_32(imm)) {
- m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_SUB, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_SUB, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
} else {
- m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_SUB, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_SUB, bitwise_cast<uint32_t>(addr));
m_formatter.immediate32(imm);
}
}
@@ -1857,7 +1858,7 @@
#if CPU(X86)
void cmpb_im(int imm, const void* addr)
{
- m_formatter.oneByteOp(OP_GROUP1_EbIb, GROUP1_OP_CMP, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EbIb, GROUP1_OP_CMP, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
}
#endif
@@ -1938,16 +1939,16 @@
#else
void cmpl_rm(RegisterID reg, const void* addr)
{
- m_formatter.oneByteOp(OP_CMP_EvGv, reg, addr);
+ m_formatter.oneByteOpAddr(OP_CMP_EvGv, reg, bitwise_cast<uint32_t>(addr));
}
void cmpl_im(int imm, const void* addr)
{
if (CAN_SIGN_EXTEND_8_32(imm)) {
- m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_CMP, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_CMP, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
} else {
- m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_CMP, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_CMP, bitwise_cast<uint32_t>(addr));
m_formatter.immediate32(imm);
}
}
@@ -2025,7 +2026,7 @@
#if CPU(X86)
void testb_im(int imm, const void* addr)
{
- m_formatter.oneByteOp(OP_GROUP3_EbIb, GROUP3_OP_TEST, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP3_EbIb, GROUP3_OP_TEST, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
}
#endif
@@ -2279,7 +2280,7 @@
void movb_i8m(int imm, const void* addr)
{
ASSERT(-128 <= imm && imm < 128);
- m_formatter.oneByteOp(OP_GROUP11_EvIb, GROUP11_MOV, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP11_EvIb, GROUP11_MOV, bitwise_cast<uint32_t>(addr));
m_formatter.immediate8(imm);
}
#endif
@@ -2301,7 +2302,7 @@
#if !CPU(X86_64)
void movb_rm(RegisterID src, const void* addr)
{
- m_formatter.oneByteOp(OP_MOV_EbGb, src, addr);
+ m_formatter.oneByteOpAddr(OP_MOV_EbGb, src, bitwise_cast<uint32_t>(addr));
}
#endif
@@ -2354,6 +2355,11 @@
#endif
}
+ void movl_mr(uint32_t addr, RegisterID dst)
+ {
+ m_formatter.oneByteOpAddr(OP_MOV_GvEv, dst, addr);
+ }
+
#if CPU(X86_64)
void movq_rr(RegisterID src, RegisterID dst)
{
@@ -2407,6 +2413,11 @@
m_formatter.oneByteOp64(OP_MOV_GvEv, dst, base, index, scale, offset);
}
+ void movq_mr(uint32_t addr, RegisterID dst)
+ {
+ m_formatter.oneByteOp64Addr(OP_MOV_GvEv, dst, addr);
+ }
+
void movq_i32m(int imm, int offset, RegisterID base)
{
m_formatter.oneByteOp64(OP_GROUP11_EvIz, GROUP11_MOV, base, offset);
@@ -2435,28 +2446,26 @@
{
m_formatter.oneByteOp64(OP_MOVSXD_GvEv, dst, src);
}
-
-
#else
+ void movl_mr(const void* addr, RegisterID dst)
+ {
+ if (dst == X86Registers::eax)
+ movl_mEAX(addr);
+ else
+ m_formatter.oneByteOpAddr(OP_MOV_GvEv, dst, bitwise_cast<uint32_t>(addr));
+ }
+
void movl_rm(RegisterID src, const void* addr)
{
if (src == X86Registers::eax)
movl_EAXm(addr);
else
- m_formatter.oneByteOp(OP_MOV_EvGv, src, addr);
+ m_formatter.oneByteOpAddr(OP_MOV_EvGv, src, bitwise_cast<uint32_t>(addr));
}
- void movl_mr(const void* addr, RegisterID dst)
- {
- if (dst == X86Registers::eax)
- movl_mEAX(addr);
- else
- m_formatter.oneByteOp(OP_MOV_GvEv, dst, addr);
- }
-
void movl_i32m(int imm, const void* addr)
{
- m_formatter.oneByteOp(OP_GROUP11_EvIz, GROUP11_MOV, addr);
+ m_formatter.oneByteOpAddr(OP_GROUP11_EvIz, GROUP11_MOV, bitwise_cast<uint32_t>(addr));
m_formatter.immediate32(imm);
}
#endif
@@ -2494,7 +2503,7 @@
#if !CPU(X86_64)
void movzbl_mr(const void* address, RegisterID dst)
{
- m_formatter.twoByteOp(OP2_MOVZX_GvEb, dst, address);
+ m_formatter.twoByteOpAddr(OP2_MOVZX_GvEb, dst, bitwise_cast<uint32_t>(address));
}
#endif
@@ -2604,7 +2613,7 @@
#else
void cmovl_mr(Condition cond, const void* addr, RegisterID dst)
{
- m_formatter.twoByteOp(cmovcc(cond), dst, addr);
+ m_formatter.twoByteOpAddr(cmovcc(cond), dst, bitwise_cast<uint32_t>(addr));
}
#endif
@@ -2677,7 +2686,7 @@
#if !CPU(X86_64)
void jmp_m(const void* address)
{
- m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_JMPN, address);
+ m_formatter.oneByteOpAddr(OP_GROUP5_Ev, GROUP5_OP_JMPN, bitwise_cast<uint32_t>(address));
}
#endif
@@ -2853,7 +2862,7 @@
void addsd_mr(const void* address, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F2);
- m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, address);
+ m_formatter.twoByteOpAddr(OP2_ADDSD_VsdWsd, (RegisterID)dst, bitwise_cast<uint32_t>(address));
}
#endif
@@ -2911,7 +2920,7 @@
void cvtsi2sd_mr(const void* address, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F2);
- m_formatter.twoByteOp(OP2_CVTSI2SD_VsdEd, (RegisterID)dst, address);
+ m_formatter.twoByteOpAddr(OP2_CVTSI2SD_VsdEd, (RegisterID)dst, bitwise_cast<uint32_t>(address));
}
#endif
@@ -3068,12 +3077,12 @@
void movsd_mr(const void* address, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F2);
- m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, (RegisterID)dst, address);
+ m_formatter.twoByteOpAddr(OP2_MOVSD_VsdWsd, (RegisterID)dst, bitwise_cast<uint32_t>(address));
}
void movsd_rm(XMMRegisterID src, const void* address)
{
m_formatter.prefix(PRE_SSE_F2);
- m_formatter.twoByteOp(OP2_MOVSD_WsdVsd, (RegisterID)src, address);
+ m_formatter.twoByteOpAddr(OP2_MOVSD_WsdVsd, (RegisterID)src, bitwise_cast<uint32_t>(address));
}
#endif
@@ -3400,6 +3409,15 @@
m_formatter.prefix(PRE_LOCK);
}
+ // Causes the memory access in the next instruction to be offset by %gs. Usually you use
+ // this with a 32-bit absolute address load. That "address" ends up being the offset to
+ // %gs. This prefix is ignored by lea. Getting the value of %gs is hard - you can pretty
+ // much just use it as a secret offset.
+ void gs()
+ {
+ m_formatter.prefix(PRE_GS);
+ }
+
void cmpxchgb_rm(RegisterID src, int offset, RegisterID base)
{
m_formatter.twoByteOp8(OP2_CMPXCHGb, src, base, offset);
@@ -4053,14 +4071,17 @@
}
}
-#if !CPU(X86_64)
- ALWAYS_INLINE void memoryModRM(int reg, const void* address)
+ ALWAYS_INLINE void memoryModRMAddr(int reg, uint32_t address)
{
+#if CPU(X86_64)
+ putModRmSib(ModRmMemoryNoDisp, reg, noBase, noIndex, 0);
+#else
// noBase + ModRmMemoryNoDisp means noBase + ModRmMemoryDisp32!
putModRm(ModRmMemoryNoDisp, reg, noBase);
- putIntUnchecked(reinterpret_cast<int32_t>(address));
+#endif
+ putIntUnchecked(address);
}
-#endif
+
ALWAYS_INLINE void twoBytesVex(OneByteOpcodeID simdPrefix, RegisterID inOpReg, RegisterID r)
{
putByteUnchecked(VexPrefix::TwoBytes);
@@ -4185,14 +4206,12 @@
writer.memoryModRM(reg, base, index, scale, offset);
}
-#if !CPU(X86_64)
- void oneByteOp(OneByteOpcodeID opcode, int reg, const void* address)
+ void oneByteOpAddr(OneByteOpcodeID opcode, int reg, uint32_t address)
{
SingleInstructionBufferWriter writer(m_buffer);
writer.putByteUnchecked(opcode);
- writer.memoryModRM(reg, address);
+ writer.memoryModRMAddr(reg, address);
}
-#endif
void twoByteOp(TwoByteOpcodeID opcode)
{
@@ -4228,15 +4247,14 @@
writer.memoryModRM(reg, base, index, scale, offset);
}
-#if !CPU(X86_64)
- void twoByteOp(TwoByteOpcodeID opcode, int reg, const void* address)
+ void twoByteOpAddr(TwoByteOpcodeID opcode, int reg, uint32_t address)
{
SingleInstructionBufferWriter writer(m_buffer);
writer.putByteUnchecked(OP_2BYTE_ESCAPE);
writer.putByteUnchecked(opcode);
- writer.memoryModRM(reg, address);
+ writer.memoryModRMAddr(reg, address);
}
-#endif
+
void vexNdsLigWigTwoByteOp(OneByteOpcodeID simdPrefix, TwoByteOpcodeID opcode, RegisterID dest, RegisterID a, RegisterID b)
{
SingleInstructionBufferWriter writer(m_buffer);
@@ -4367,6 +4385,14 @@
writer.memoryModRM(reg, base, index, scale, offset);
}
+ void oneByteOp64Addr(OneByteOpcodeID opcode, int reg, uint32_t address)
+ {
+ SingleInstructionBufferWriter writer(m_buffer);
+ writer.emitRexW(reg, 0, 0);
+ writer.putByteUnchecked(opcode);
+ writer.memoryModRMAddr(reg, address);
+ }
+
void twoByteOp64(TwoByteOpcodeID opcode, int reg, RegisterID rm)
{
SingleInstructionBufferWriter writer(m_buffer);
Modified: trunk/Source/_javascript_Core/b3/testb3.cpp (213752 => 213753)
--- trunk/Source/_javascript_Core/b3/testb3.cpp 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/b3/testb3.cpp 2017-03-11 07:17:48 UTC (rev 213753)
@@ -69,6 +69,7 @@
#include "VM.h"
#include <cmath>
#include <string>
+#include <wtf/FastTLS.h>
#include <wtf/ListDump.h>
#include <wtf/Lock.h>
#include <wtf/NumberOfCores.h>
@@ -15210,6 +15211,28 @@
CHECK_EQ(numToStore, value);
}
+void testFastTLS()
+{
+#if ENABLE(FAST_TLS_JIT)
+ _pthread_setspecific_direct(WTF_TESTING_KEY, bitwise_cast<void*>(static_cast<uintptr_t>(0xbeef)));
+
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+
+ PatchpointValue* patchpoint = root->appendNew<PatchpointValue>(proc, pointerType(), Origin());
+ patchpoint->clobber(RegisterSet::macroScratchRegisters());
+ patchpoint->setGenerator(
+ [&] (CCallHelpers& jit, const StackmapGenerationParams& params) {
+ AllowMacroScratchRegisterUsage allowScratch(jit);
+ jit.loadFromTLSPtr(fastTLSOffsetForKey(WTF_TESTING_KEY), params[0].gpr());
+ });
+
+ root->appendNew<Value>(proc, Return, Origin(), patchpoint);
+
+ CHECK_EQ(compileAndRun<uintptr_t>(proc), static_cast<uintptr_t>(0xbeef));
+#endif
+}
+
// Make sure the compiler does not try to optimize anything out.
NEVER_INLINE double zero()
{
@@ -16736,6 +16759,8 @@
RUN(testWasmBoundsCheck(10000));
RUN(testWasmBoundsCheck(std::numeric_limits<unsigned>::max() - 5));
RUN(testWasmAddress());
+
+ RUN(testFastTLS());
if (isX86()) {
RUN(testBranchBitAndImmFusion(Identity, Int64, 1, Air::BranchTest32, Air::Arg::Tmp));
Modified: trunk/Source/WTF/ChangeLog (213752 => 213753)
--- trunk/Source/WTF/ChangeLog 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/ChangeLog 2017-03-11 07:17:48 UTC (rev 213753)
@@ -1,3 +1,23 @@
+2017-03-10 Filip Pizlo <fpi...@apple.com>
+
+ The JITs should be able to emit fast TLS loads
+ https://bugs.webkit.org/show_bug.cgi?id=169483
+
+ Reviewed by Keith Miller.
+
+ Consolidated what we know about fast TLS in FastTLS.h.
+
+ * WTF.xcodeproj/project.pbxproj:
+ * wtf/CMakeLists.txt:
+ * wtf/FastTLS.h: Added.
+ (WTF::loadFastTLS):
+ (WTF::fastTLSOffsetForKey):
+ * wtf/Platform.h:
+ * wtf/WTFThreadData.cpp:
+ (WTF::WTFThreadData::createAndRegisterForGetspecificDirect):
+ * wtf/WTFThreadData.h:
+ (WTF::wtfThreadData):
+
2017-03-10 Mark Lam <mark....@apple.com>
Turn ENABLE(MASM_PROBE) on by default for OS(DARWIN) release builds.
Modified: trunk/Source/WTF/WTF.xcodeproj/project.pbxproj (213752 => 213753)
--- trunk/Source/WTF/WTF.xcodeproj/project.pbxproj 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/WTF.xcodeproj/project.pbxproj 2017-03-11 07:17:48 UTC (rev 213753)
@@ -43,6 +43,7 @@
0F66B2921DC97BAB004A1D3F /* WallTime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F66B2881DC97BAB004A1D3F /* WallTime.cpp */; };
0F66B2931DC97BAB004A1D3F /* WallTime.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F66B2891DC97BAB004A1D3F /* WallTime.h */; };
0F725CAC1C50461600AD943A /* RangeSet.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F725CAB1C50461600AD943A /* RangeSet.h */; };
+ 0F79C7C41E73511800EB34D1 /* FastTLS.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F79C7C31E73511800EB34D1 /* FastTLS.h */; };
0F7C5FB61D885CF20044F5E2 /* FastBitVector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F7C5FB51D885CF20044F5E2 /* FastBitVector.cpp */; };
0F824A681B7443A0002E345D /* ParkingLot.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F824A641B7443A0002E345D /* ParkingLot.cpp */; };
0F824A691B7443A0002E345D /* ParkingLot.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F824A651B7443A0002E345D /* ParkingLot.h */; };
@@ -421,6 +422,7 @@
0F66B2881DC97BAB004A1D3F /* WallTime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WallTime.cpp; sourceTree = "<group>"; };
0F66B2891DC97BAB004A1D3F /* WallTime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WallTime.h; sourceTree = "<group>"; };
0F725CAB1C50461600AD943A /* RangeSet.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RangeSet.h; sourceTree = "<group>"; };
+ 0F79C7C31E73511800EB34D1 /* FastTLS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FastTLS.h; sourceTree = "<group>"; };
0F7C5FB51D885CF20044F5E2 /* FastBitVector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FastBitVector.cpp; sourceTree = "<group>"; };
0F824A641B7443A0002E345D /* ParkingLot.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParkingLot.cpp; sourceTree = "<group>"; };
0F824A651B7443A0002E345D /* ParkingLot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParkingLot.h; sourceTree = "<group>"; };
@@ -974,6 +976,7 @@
0FD81AC4154FB22E00983E72 /* FastBitVector.h */,
A8A472A1151A825A004123FF /* FastMalloc.cpp */,
A8A472A2151A825A004123FF /* FastMalloc.h */,
+ 0F79C7C31E73511800EB34D1 /* FastTLS.h */,
B38FD7BC168953E80065C969 /* FeatureDefines.h */,
0F9D335B165DBA73005AD387 /* FilePrintStream.cpp */,
0F9D335C165DBA73005AD387 /* FilePrintStream.h */,
@@ -1486,6 +1489,7 @@
0F0D85B417234CC100338210 /* NoLock.h in Headers */,
A8A473EF151A825B004123FF /* Noncopyable.h in Headers */,
CE46516E19DB1FB4003ECA05 /* NSMapTableSPI.h in Headers */,
+ 0F79C7C41E73511800EB34D1 /* FastTLS.h in Headers */,
A8A473F5151A825B004123FF /* NumberOfCores.h in Headers */,
7E29C33E15FFD79B00516D61 /* ObjcRuntimeExtras.h in Headers */,
1AFDE6531953B23D00C48FFA /* Optional.h in Headers */,
Modified: trunk/Source/WTF/wtf/CMakeLists.txt (213752 => 213753)
--- trunk/Source/WTF/wtf/CMakeLists.txt 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/wtf/CMakeLists.txt 2017-03-11 07:17:48 UTC (rev 213753)
@@ -30,6 +30,7 @@
DisallowCType.h
DoublyLinkedList.h
FastMalloc.h
+ FastTLS.h
FeatureDefines.h
FilePrintStream.h
FlipBytes.h
Added: trunk/Source/WTF/wtf/FastTLS.h (0 => 213753)
--- trunk/Source/WTF/wtf/FastTLS.h (rev 0)
+++ trunk/Source/WTF/wtf/FastTLS.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if HAVE(FAST_TLS)
+
+#include <pthread.h>
+#include <System/pthread_machdep.h>
+
+namespace WTF {
+
+// __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0 is taken by bmalloc, so WTF's KEY0 maps to the
+// system's KEY1.
+#define WTF_FAST_TLS_KEY0 __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY1
+#define WTF_FAST_TLS_KEY1 __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY2
+#define WTF_FAST_TLS_KEY2 __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY3
+#define WTF_FAST_TLS_KEY3 __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4
+
+// NOTE: We should manage our use of these keys here. If you want to use a key for something,
+// put a #define in here to give your key a symbolic name. This ensures that we don't
+// accidentally use the same key for more than one thing.
+
+#define WTF_THREAD_DATA_KEY WTF_FAST_TLS_KEY0
+#define WTF_TESTING_KEY WTF_FAST_TLS_KEY3
+
+#if ENABLE(FAST_TLS_JIT)
+// Below is the code that the JIT will emit.
+
+#if CPU(X86_64)
+inline uintptr_t loadFastTLS(unsigned offset)
+{
+ uintptr_t result;
+ asm volatile(
+ "movq %%gs:%1, %0"
+ : "=r"(result)
+ : "r"(offset)
+ : "memory");
+ return result;
+}
+#elif CPU(ARM64)
+inline uintptr_t loadFastTLS(unsigned passedOffset)
+{
+ uintptr_t result;
+ uintptr_t offset = passedOffset;
+ asm volatile(
+ "mrs %0, TPIDRRO_EL0\n\t"
+ "and %0, %0, #0xfffffffffffffff8\n\t"
+ "ldr %0, [%0, %1]"
+ : "=r"(result)
+ : "r"(offset)
+ : "memory");
+ return result;
+}
+#else
+#error "Bad architecture"
+#endif
+#endif // ENABLE(FAST_TLS_JIT)
+
+inline unsigned fastTLSOffsetForKey(unsigned long slot)
+{
+ return slot * sizeof(void*);
+}
+
+} // namespace WTF
+
+using WTF::fastTLSOffsetForKey;
+
+#if ENABLE(FAST_TLS_JIT)
+using WTF::loadFastTLS;
+#endif
+
+#endif // HAVE(FAST_TLS)
+
Modified: trunk/Source/WTF/wtf/Platform.h (213752 => 213753)
--- trunk/Source/WTF/wtf/Platform.h 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/wtf/Platform.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -761,6 +761,14 @@
#define HAVE_LL_SC 1
#endif // CPU(ARM64)
+#if __has_include(<System/pthread_machdep.h>)
+#define HAVE_FAST_TLS 1
+#endif
+
+#if (CPU(X86_64) || CPU(ARM64)) && HAVE(FAST_TLS)
+#define ENABLE_FAST_TLS_JIT 1
+#endif
+
/* This controls whether B3 is built. B3 is needed for FTL JIT and WebAssembly */
#if ENABLE(FTL_JIT) || ENABLE(WEBASSEMBLY)
#define ENABLE_B3_JIT 1
Modified: trunk/Source/WTF/wtf/WTFThreadData.cpp (213752 => 213753)
--- trunk/Source/WTF/wtf/WTFThreadData.cpp 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/wtf/WTFThreadData.cpp 2017-03-11 07:17:48 UTC (rev 213753)
@@ -35,7 +35,7 @@
namespace WTF {
-#if !USE(PTHREAD_GETSPECIFIC_DIRECT)
+#if !HAVE(FAST_TLS)
ThreadSpecific<WTFThreadData>* WTFThreadData::staticData;
#endif
@@ -61,12 +61,12 @@
m_atomicStringTableDestructor(m_defaultAtomicStringTable);
}
-#if USE(PTHREAD_GETSPECIFIC_DIRECT)
+#if HAVE(FAST_TLS)
WTFThreadData& WTFThreadData::createAndRegisterForGetspecificDirect()
{
WTFThreadData* data = "" WTFThreadData;
- _pthread_setspecific_direct(directKey, data);
- pthread_key_init_np(directKey, [](void* data){
+ _pthread_setspecific_direct(WTF_THREAD_DATA_KEY, data);
+ pthread_key_init_np(WTF_THREAD_DATA_KEY, [](void* data){
delete static_cast<WTFThreadData*>(data);
});
return *data;
Modified: trunk/Source/WTF/wtf/WTFThreadData.h (213752 => 213753)
--- trunk/Source/WTF/wtf/WTFThreadData.h 2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/wtf/WTFThreadData.h 2017-03-11 07:17:48 UTC (rev 213753)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2008-2017 Apple Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,25 +27,15 @@
#ifndef WTFThreadData_h
#define WTFThreadData_h
+#include <wtf/FastTLS.h>
#include <wtf/HashMap.h>
#include <wtf/HashSet.h>
#include <wtf/Noncopyable.h>
#include <wtf/StackBounds.h>
#include <wtf/StackStats.h>
-#include <wtf/text/StringHash.h>
-
-#if USE(APPLE_INTERNAL_SDK)
-#include <System/pthread_machdep.h>
-#endif
-
-#if defined(__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY1)
-#define USE_PTHREAD_GETSPECIFIC_DIRECT 1
-#endif
-
-#if !USE(PTHREAD_GETSPECIFIC_DIRECT)
#include <wtf/ThreadSpecific.h>
#include <wtf/Threading.h>
-#endif
+#include <wtf/text/StringHash.h>
namespace WTF {
@@ -122,8 +112,7 @@
void* m_savedStackPointerAtVMEntry;
void* m_savedLastStackTop;
-#if USE(PTHREAD_GETSPECIFIC_DIRECT)
- static const pthread_key_t directKey = __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY1;
+#if HAVE(FAST_TLS)
WTF_EXPORT_PRIVATE static WTFThreadData& createAndRegisterForGetspecificDirect();
#else
static WTF_EXPORTDATA ThreadSpecific<WTFThreadData>* staticData;
@@ -141,12 +130,12 @@
// WRT _javascript_Core:
// wtfThreadData() is initially called from initializeThreading(), ensuring
// this is initially called in a pthread_once locked context.
-#if !USE(PTHREAD_GETSPECIFIC_DIRECT)
+#if !HAVE(FAST_TLS)
if (!WTFThreadData::staticData)
WTFThreadData::staticData = new ThreadSpecific<WTFThreadData>;
return **WTFThreadData::staticData;
#else
- if (WTFThreadData* data = ""
+ if (WTFThreadData* data = ""
return *data;
return WTFThreadData::createAndRegisterForGetspecificDirect();
#endif
_______________________________________________ webkit-changes mailing list webkit-changes@lists.webkit.org https://lists.webkit.org/mailman/listinfo/webkit-changes