Title: [213753] trunk/Source
Revision
213753
Author
fpi...@apple.com
Date
2017-03-10 23:17:48 -0800 (Fri, 10 Mar 2017)

Log Message

The JITs should be able to emit fast TLS loads
https://bugs.webkit.org/show_bug.cgi?id=169483

Reviewed by Keith Miller.
        
Source/_javascript_Core:

Added loadFromTLS32/64/Ptr to the MacroAssembler and added a B3 test for this.

* assembler/ARM64Assembler.h:
(JSC::ARM64Assembler::mrs_TPIDRRO_EL0):
* assembler/MacroAssembler.h:
(JSC::MacroAssembler::loadFromTLSPtr):
* assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::loadFromTLS32):
(JSC::MacroAssemblerARM64::loadFromTLS64):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::loadFromTLS32):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::loadFromTLS64):
* assembler/X86Assembler.h:
(JSC::X86Assembler::adcl_im):
(JSC::X86Assembler::addl_mr):
(JSC::X86Assembler::addl_im):
(JSC::X86Assembler::andl_im):
(JSC::X86Assembler::orl_im):
(JSC::X86Assembler::orl_rm):
(JSC::X86Assembler::subl_im):
(JSC::X86Assembler::cmpb_im):
(JSC::X86Assembler::cmpl_rm):
(JSC::X86Assembler::cmpl_im):
(JSC::X86Assembler::testb_im):
(JSC::X86Assembler::movb_i8m):
(JSC::X86Assembler::movb_rm):
(JSC::X86Assembler::movl_mr):
(JSC::X86Assembler::movq_mr):
(JSC::X86Assembler::movsxd_rr):
(JSC::X86Assembler::gs):
(JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM):
* b3/testb3.cpp:
(JSC::B3::testFastTLS):
(JSC::B3::run):

Source/WTF:

Consolidated what we know about fast TLS in FastTLS.h.

* WTF.xcodeproj/project.pbxproj:
* wtf/CMakeLists.txt:
* wtf/FastTLS.h: Added.
(WTF::loadFastTLS):
(WTF::fastTLSOffsetForKey):
* wtf/Platform.h:
* wtf/WTFThreadData.cpp:
(WTF::WTFThreadData::createAndRegisterForGetspecificDirect):
* wtf/WTFThreadData.h:
(WTF::wtfThreadData):

Modified Paths

Added Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (213752 => 213753)


--- trunk/Source/_javascript_Core/ChangeLog	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/ChangeLog	2017-03-11 07:17:48 UTC (rev 213753)
@@ -1,3 +1,46 @@
+2017-03-10  Filip Pizlo  <fpi...@apple.com>
+
+        The JITs should be able to emit fast TLS loads
+        https://bugs.webkit.org/show_bug.cgi?id=169483
+
+        Reviewed by Keith Miller.
+        
+        Added loadFromTLS32/64/Ptr to the MacroAssembler and added a B3 test for this.
+
+        * assembler/ARM64Assembler.h:
+        (JSC::ARM64Assembler::mrs_TPIDRRO_EL0):
+        * assembler/MacroAssembler.h:
+        (JSC::MacroAssembler::loadFromTLSPtr):
+        * assembler/MacroAssemblerARM64.h:
+        (JSC::MacroAssemblerARM64::loadFromTLS32):
+        (JSC::MacroAssemblerARM64::loadFromTLS64):
+        * assembler/MacroAssemblerX86Common.h:
+        (JSC::MacroAssemblerX86Common::loadFromTLS32):
+        * assembler/MacroAssemblerX86_64.h:
+        (JSC::MacroAssemblerX86_64::loadFromTLS64):
+        * assembler/X86Assembler.h:
+        (JSC::X86Assembler::adcl_im):
+        (JSC::X86Assembler::addl_mr):
+        (JSC::X86Assembler::addl_im):
+        (JSC::X86Assembler::andl_im):
+        (JSC::X86Assembler::orl_im):
+        (JSC::X86Assembler::orl_rm):
+        (JSC::X86Assembler::subl_im):
+        (JSC::X86Assembler::cmpb_im):
+        (JSC::X86Assembler::cmpl_rm):
+        (JSC::X86Assembler::cmpl_im):
+        (JSC::X86Assembler::testb_im):
+        (JSC::X86Assembler::movb_i8m):
+        (JSC::X86Assembler::movb_rm):
+        (JSC::X86Assembler::movl_mr):
+        (JSC::X86Assembler::movq_mr):
+        (JSC::X86Assembler::movsxd_rr):
+        (JSC::X86Assembler::gs):
+        (JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM):
+        * b3/testb3.cpp:
+        (JSC::B3::testFastTLS):
+        (JSC::B3::run):
+
 2017-03-10  Alex Christensen  <achristen...@webkit.org>
 
         Fix watch and tv builds after r213294

Modified: trunk/Source/_javascript_Core/assembler/ARM64Assembler.h (213752 => 213753)


--- trunk/Source/_javascript_Core/assembler/ARM64Assembler.h	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/ARM64Assembler.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -1588,6 +1588,13 @@
         CHECK_DATASIZE();
         insn(exoticStore(MEMOPSIZE, ExoticStoreFence_Release, result, src, dst));
     }
+    
+#if ENABLE(FAST_TLS_JIT)
+    void mrs_TPIDRRO_EL0(RegisterID dst)
+    {
+        insn(0xd53bd060 | dst); // Thanks, otool -t!
+    }
+#endif
 
     template<int datasize>
     ALWAYS_INLINE void orn(RegisterID rd, RegisterID rn, RegisterID rm)

Modified: trunk/Source/_javascript_Core/assembler/MacroAssembler.h (213752 => 213753)


--- trunk/Source/_javascript_Core/assembler/MacroAssembler.h	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/MacroAssembler.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -627,6 +627,13 @@
         load32(address, dest);
     }
 
+#if ENABLE(FAST_TLS_JIT)
+    void loadFromTLSPtr(uint32_t offset, RegisterID dst)
+    {
+        loadFromTLS32(offset, dst);
+    }
+#endif
+
     DataLabel32 loadPtrWithAddressOffsetPatch(Address address, RegisterID dest)
     {
         return load32WithAddressOffsetPatch(address, dest);
@@ -934,6 +941,13 @@
         load64(address, dest);
     }
 
+#if ENABLE(FAST_TLS_JIT)
+    void loadFromTLSPtr(uint32_t offset, RegisterID dst)
+    {
+        loadFromTLS64(offset, dst);
+    }
+#endif
+
     DataLabel32 loadPtrWithAddressOffsetPatch(Address address, RegisterID dest)
     {
         return load64WithAddressOffsetPatch(address, dest);

Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h (213752 => 213753)


--- trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -3598,6 +3598,24 @@
         m_assembler.eor<64>(dest, src, src);
     }
     
+#if ENABLE(FAST_TLS_JIT)
+    // This will use scratch registers if the offset is not legal.
+    
+    void loadFromTLS32(uint32_t offset, RegisterID dst)
+    {
+        m_assembler.mrs_TPIDRRO_EL0(dst);
+        and64(TrustedImm32(~7), dst);
+        load32(Address(dst, offset), dst);
+    }
+    
+    void loadFromTLS64(uint32_t offset, RegisterID dst)
+    {
+        m_assembler.mrs_TPIDRRO_EL0(dst);
+        and64(TrustedImm32(~7), dst);
+        load64(Address(dst, offset), dst);
+    }
+#endif // ENABLE(FAST_TLS_JIT)
+    
     // Misc helper functions.
 
     // Invert a relational condition, e.g. == becomes !=, < becomes >=, etc.

Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h (213752 => 213753)


--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -3857,6 +3857,14 @@
     void loadFence()
     {
     }
+    
+#if ENABLE(FAST_TLS_JIT)
+    void loadFromTLS32(uint32_t offset, RegisterID dst)
+    {
+        m_assembler.gs();
+        m_assembler.movl_mr(offset, dst);
+    }
+#endif
 
     static void replaceWithBreakpoint(CodeLocationLabel instructionStart)
     {

Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h (213752 => 213753)


--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -1701,6 +1701,14 @@
         store64(imm, dest);
     }
     
+#if ENABLE(FAST_TLS_JIT)
+    void loadFromTLS64(uint32_t offset, RegisterID dst)
+    {
+        m_assembler.gs();
+        m_assembler.movl_mr(offset, dst);
+    }
+#endif
+
     void truncateDoubleToUint32(FPRegisterID src, RegisterID dest)
     {
         m_assembler.cvttsd2siq_rr(src, dest);

Modified: trunk/Source/_javascript_Core/assembler/X86Assembler.h (213752 => 213753)


--- trunk/Source/_javascript_Core/assembler/X86Assembler.h	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/assembler/X86Assembler.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -218,6 +218,7 @@
 #if CPU(X86_64)
         OP_MOVSXD_GvEv                  = 0x63,
 #endif
+        PRE_GS                          = 0x65,
         PRE_OPERAND_SIZE                = 0x66,
         PRE_SSE_66                      = 0x66,
         OP_PUSH_Iz                      = 0x68,
@@ -431,10 +432,10 @@
     void adcl_im(int imm, const void* addr)
     {
         if (CAN_SIGN_EXTEND_8_32(imm)) {
-            m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_ADC, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_ADC, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate8(imm);
         } else {
-            m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_ADC, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_ADC, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate32(imm);
         }
     }
@@ -458,7 +459,7 @@
 #if !CPU(X86_64)
     void addl_mr(const void* addr, RegisterID dst)
     {
-        m_formatter.oneByteOp(OP_ADD_GvEv, dst, addr);
+        m_formatter.oneByteOpAddr(OP_ADD_GvEv, dst, bitwise_cast<uint32_t>(addr));
     }
 #endif
 
@@ -631,10 +632,10 @@
     void addl_im(int imm, const void* addr)
     {
         if (CAN_SIGN_EXTEND_8_32(imm)) {
-            m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_ADD, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_ADD, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate8(imm);
         } else {
-            m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_ADD, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_ADD, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate32(imm);
         }
     }
@@ -818,10 +819,10 @@
     void andl_im(int imm, const void* addr)
     {
         if (CAN_SIGN_EXTEND_8_32(imm)) {
-            m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_AND, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_AND, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate8(imm);
         } else {
-            m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_AND, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_AND, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate32(imm);
         }
     }
@@ -1159,10 +1160,10 @@
     void orl_im(int imm, const void* addr)
     {
         if (CAN_SIGN_EXTEND_8_32(imm)) {
-            m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_OR, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_OR, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate8(imm);
         } else {
-            m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_OR, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_OR, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate32(imm);
         }
     }
@@ -1169,7 +1170,7 @@
 
     void orl_rm(RegisterID src, const void* addr)
     {
-        m_formatter.oneByteOp(OP_OR_EvGv, src, addr);
+        m_formatter.oneByteOpAddr(OP_OR_EvGv, src, bitwise_cast<uint32_t>(addr));
     }
 #endif
 
@@ -1357,10 +1358,10 @@
     void subl_im(int imm, const void* addr)
     {
         if (CAN_SIGN_EXTEND_8_32(imm)) {
-            m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_SUB, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_SUB, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate8(imm);
         } else {
-            m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_SUB, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_SUB, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate32(imm);
         }
     }
@@ -1857,7 +1858,7 @@
 #if CPU(X86)
     void cmpb_im(int imm, const void* addr)
     {
-        m_formatter.oneByteOp(OP_GROUP1_EbIb, GROUP1_OP_CMP, addr);
+        m_formatter.oneByteOpAddr(OP_GROUP1_EbIb, GROUP1_OP_CMP, bitwise_cast<uint32_t>(addr));
         m_formatter.immediate8(imm);
     }
 #endif
@@ -1938,16 +1939,16 @@
 #else
     void cmpl_rm(RegisterID reg, const void* addr)
     {
-        m_formatter.oneByteOp(OP_CMP_EvGv, reg, addr);
+        m_formatter.oneByteOpAddr(OP_CMP_EvGv, reg, bitwise_cast<uint32_t>(addr));
     }
 
     void cmpl_im(int imm, const void* addr)
     {
         if (CAN_SIGN_EXTEND_8_32(imm)) {
-            m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_CMP, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIb, GROUP1_OP_CMP, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate8(imm);
         } else {
-            m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_CMP, addr);
+            m_formatter.oneByteOpAddr(OP_GROUP1_EvIz, GROUP1_OP_CMP, bitwise_cast<uint32_t>(addr));
             m_formatter.immediate32(imm);
         }
     }
@@ -2025,7 +2026,7 @@
 #if CPU(X86)
     void testb_im(int imm, const void* addr)
     {
-        m_formatter.oneByteOp(OP_GROUP3_EbIb, GROUP3_OP_TEST, addr);
+        m_formatter.oneByteOpAddr(OP_GROUP3_EbIb, GROUP3_OP_TEST, bitwise_cast<uint32_t>(addr));
         m_formatter.immediate8(imm);
     }
 #endif
@@ -2279,7 +2280,7 @@
     void movb_i8m(int imm, const void* addr)
     {
         ASSERT(-128 <= imm && imm < 128);
-        m_formatter.oneByteOp(OP_GROUP11_EvIb, GROUP11_MOV, addr);
+        m_formatter.oneByteOpAddr(OP_GROUP11_EvIb, GROUP11_MOV, bitwise_cast<uint32_t>(addr));
         m_formatter.immediate8(imm);
     }
 #endif
@@ -2301,7 +2302,7 @@
 #if !CPU(X86_64)
     void movb_rm(RegisterID src, const void* addr)
     {
-        m_formatter.oneByteOp(OP_MOV_EbGb, src, addr);
+        m_formatter.oneByteOpAddr(OP_MOV_EbGb, src, bitwise_cast<uint32_t>(addr));
     }
 #endif
     
@@ -2354,6 +2355,11 @@
 #endif
     }
 
+    void movl_mr(uint32_t addr, RegisterID dst)
+    {
+        m_formatter.oneByteOpAddr(OP_MOV_GvEv, dst, addr);
+    }
+
 #if CPU(X86_64)
     void movq_rr(RegisterID src, RegisterID dst)
     {
@@ -2407,6 +2413,11 @@
         m_formatter.oneByteOp64(OP_MOV_GvEv, dst, base, index, scale, offset);
     }
 
+    void movq_mr(uint32_t addr, RegisterID dst)
+    {
+        m_formatter.oneByteOp64Addr(OP_MOV_GvEv, dst, addr);
+    }
+
     void movq_i32m(int imm, int offset, RegisterID base)
     {
         m_formatter.oneByteOp64(OP_GROUP11_EvIz, GROUP11_MOV, base, offset);
@@ -2435,28 +2446,26 @@
     {
         m_formatter.oneByteOp64(OP_MOVSXD_GvEv, dst, src);
     }
-    
-    
 #else
+    void movl_mr(const void* addr, RegisterID dst)
+    {
+        if (dst == X86Registers::eax)
+            movl_mEAX(addr);
+        else
+            m_formatter.oneByteOpAddr(OP_MOV_GvEv, dst, bitwise_cast<uint32_t>(addr));
+    }
+
     void movl_rm(RegisterID src, const void* addr)
     {
         if (src == X86Registers::eax)
             movl_EAXm(addr);
         else 
-            m_formatter.oneByteOp(OP_MOV_EvGv, src, addr);
+            m_formatter.oneByteOpAddr(OP_MOV_EvGv, src, bitwise_cast<uint32_t>(addr));
     }
     
-    void movl_mr(const void* addr, RegisterID dst)
-    {
-        if (dst == X86Registers::eax)
-            movl_mEAX(addr);
-        else
-            m_formatter.oneByteOp(OP_MOV_GvEv, dst, addr);
-    }
-
     void movl_i32m(int imm, const void* addr)
     {
-        m_formatter.oneByteOp(OP_GROUP11_EvIz, GROUP11_MOV, addr);
+        m_formatter.oneByteOpAddr(OP_GROUP11_EvIz, GROUP11_MOV, bitwise_cast<uint32_t>(addr));
         m_formatter.immediate32(imm);
     }
 #endif
@@ -2494,7 +2503,7 @@
 #if !CPU(X86_64)
     void movzbl_mr(const void* address, RegisterID dst)
     {
-        m_formatter.twoByteOp(OP2_MOVZX_GvEb, dst, address);
+        m_formatter.twoByteOpAddr(OP2_MOVZX_GvEb, dst, bitwise_cast<uint32_t>(address));
     }
 #endif
 
@@ -2604,7 +2613,7 @@
 #else
     void cmovl_mr(Condition cond, const void* addr, RegisterID dst)
     {
-        m_formatter.twoByteOp(cmovcc(cond), dst, addr);
+        m_formatter.twoByteOpAddr(cmovcc(cond), dst, bitwise_cast<uint32_t>(addr));
     }
 #endif
 
@@ -2677,7 +2686,7 @@
 #if !CPU(X86_64)
     void jmp_m(const void* address)
     {
-        m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_JMPN, address);
+        m_formatter.oneByteOpAddr(OP_GROUP5_Ev, GROUP5_OP_JMPN, bitwise_cast<uint32_t>(address));
     }
 #endif
 
@@ -2853,7 +2862,7 @@
     void addsd_mr(const void* address, XMMRegisterID dst)
     {
         m_formatter.prefix(PRE_SSE_F2);
-        m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, address);
+        m_formatter.twoByteOpAddr(OP2_ADDSD_VsdWsd, (RegisterID)dst, bitwise_cast<uint32_t>(address));
     }
 #endif
 
@@ -2911,7 +2920,7 @@
     void cvtsi2sd_mr(const void* address, XMMRegisterID dst)
     {
         m_formatter.prefix(PRE_SSE_F2);
-        m_formatter.twoByteOp(OP2_CVTSI2SD_VsdEd, (RegisterID)dst, address);
+        m_formatter.twoByteOpAddr(OP2_CVTSI2SD_VsdEd, (RegisterID)dst, bitwise_cast<uint32_t>(address));
     }
 #endif
 
@@ -3068,12 +3077,12 @@
     void movsd_mr(const void* address, XMMRegisterID dst)
     {
         m_formatter.prefix(PRE_SSE_F2);
-        m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, (RegisterID)dst, address);
+        m_formatter.twoByteOpAddr(OP2_MOVSD_VsdWsd, (RegisterID)dst, bitwise_cast<uint32_t>(address));
     }
     void movsd_rm(XMMRegisterID src, const void* address)
     {
         m_formatter.prefix(PRE_SSE_F2);
-        m_formatter.twoByteOp(OP2_MOVSD_WsdVsd, (RegisterID)src, address);
+        m_formatter.twoByteOpAddr(OP2_MOVSD_WsdVsd, (RegisterID)src, bitwise_cast<uint32_t>(address));
     }
 #endif
 
@@ -3400,6 +3409,15 @@
         m_formatter.prefix(PRE_LOCK);
     }
     
+    // Causes the memory access in the next instruction to be offset by %gs. Usually you use
+    // this with a 32-bit absolute address load. That "address" ends up being the offset to
+    // %gs. This prefix is ignored by lea. Getting the value of %gs is hard - you can pretty
+    // much just use it as a secret offset.
+    void gs()
+    {
+        m_formatter.prefix(PRE_GS);
+    }
+    
     void cmpxchgb_rm(RegisterID src, int offset, RegisterID base)
     {
         m_formatter.twoByteOp8(OP2_CMPXCHGb, src, base, offset);
@@ -4053,14 +4071,17 @@
                 }
             }
 
-#if !CPU(X86_64)
-            ALWAYS_INLINE void memoryModRM(int reg, const void* address)
+            ALWAYS_INLINE void memoryModRMAddr(int reg, uint32_t address)
             {
+#if CPU(X86_64)
+                putModRmSib(ModRmMemoryNoDisp, reg, noBase, noIndex, 0);
+#else
                 // noBase + ModRmMemoryNoDisp means noBase + ModRmMemoryDisp32!
                 putModRm(ModRmMemoryNoDisp, reg, noBase);
-                putIntUnchecked(reinterpret_cast<int32_t>(address));
+#endif
+                putIntUnchecked(address);
             }
-#endif
+
             ALWAYS_INLINE void twoBytesVex(OneByteOpcodeID simdPrefix, RegisterID inOpReg, RegisterID r)
             {
                 putByteUnchecked(VexPrefix::TwoBytes);
@@ -4185,14 +4206,12 @@
             writer.memoryModRM(reg, base, index, scale, offset);
         }
 
-#if !CPU(X86_64)
-        void oneByteOp(OneByteOpcodeID opcode, int reg, const void* address)
+        void oneByteOpAddr(OneByteOpcodeID opcode, int reg, uint32_t address)
         {
             SingleInstructionBufferWriter writer(m_buffer);
             writer.putByteUnchecked(opcode);
-            writer.memoryModRM(reg, address);
+            writer.memoryModRMAddr(reg, address);
         }
-#endif
 
         void twoByteOp(TwoByteOpcodeID opcode)
         {
@@ -4228,15 +4247,14 @@
             writer.memoryModRM(reg, base, index, scale, offset);
         }
 
-#if !CPU(X86_64)
-        void twoByteOp(TwoByteOpcodeID opcode, int reg, const void* address)
+        void twoByteOpAddr(TwoByteOpcodeID opcode, int reg, uint32_t address)
         {
             SingleInstructionBufferWriter writer(m_buffer);
             writer.putByteUnchecked(OP_2BYTE_ESCAPE);
             writer.putByteUnchecked(opcode);
-            writer.memoryModRM(reg, address);
+            writer.memoryModRMAddr(reg, address);
         }
-#endif
+
         void vexNdsLigWigTwoByteOp(OneByteOpcodeID simdPrefix, TwoByteOpcodeID opcode, RegisterID dest, RegisterID a, RegisterID b)
         {
             SingleInstructionBufferWriter writer(m_buffer);
@@ -4367,6 +4385,14 @@
             writer.memoryModRM(reg, base, index, scale, offset);
         }
 
+        void oneByteOp64Addr(OneByteOpcodeID opcode, int reg, uint32_t address)
+        {
+            SingleInstructionBufferWriter writer(m_buffer);
+            writer.emitRexW(reg, 0, 0);
+            writer.putByteUnchecked(opcode);
+            writer.memoryModRMAddr(reg, address);
+        }
+
         void twoByteOp64(TwoByteOpcodeID opcode, int reg, RegisterID rm)
         {
             SingleInstructionBufferWriter writer(m_buffer);

Modified: trunk/Source/_javascript_Core/b3/testb3.cpp (213752 => 213753)


--- trunk/Source/_javascript_Core/b3/testb3.cpp	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/_javascript_Core/b3/testb3.cpp	2017-03-11 07:17:48 UTC (rev 213753)
@@ -69,6 +69,7 @@
 #include "VM.h"
 #include <cmath>
 #include <string>
+#include <wtf/FastTLS.h>
 #include <wtf/ListDump.h>
 #include <wtf/Lock.h>
 #include <wtf/NumberOfCores.h>
@@ -15210,6 +15211,28 @@
         CHECK_EQ(numToStore, value);
 }
 
+void testFastTLS()
+{
+#if ENABLE(FAST_TLS_JIT)
+    _pthread_setspecific_direct(WTF_TESTING_KEY, bitwise_cast<void*>(static_cast<uintptr_t>(0xbeef)));
+    
+    Procedure proc;
+    BasicBlock* root = proc.addBlock();
+    
+    PatchpointValue* patchpoint = root->appendNew<PatchpointValue>(proc, pointerType(), Origin());
+    patchpoint->clobber(RegisterSet::macroScratchRegisters());
+    patchpoint->setGenerator(
+        [&] (CCallHelpers& jit, const StackmapGenerationParams& params) {
+            AllowMacroScratchRegisterUsage allowScratch(jit);
+            jit.loadFromTLSPtr(fastTLSOffsetForKey(WTF_TESTING_KEY), params[0].gpr());
+        });
+    
+    root->appendNew<Value>(proc, Return, Origin(), patchpoint);
+    
+    CHECK_EQ(compileAndRun<uintptr_t>(proc), static_cast<uintptr_t>(0xbeef));
+#endif
+}
+
 // Make sure the compiler does not try to optimize anything out.
 NEVER_INLINE double zero()
 {
@@ -16736,6 +16759,8 @@
     RUN(testWasmBoundsCheck(10000));
     RUN(testWasmBoundsCheck(std::numeric_limits<unsigned>::max() - 5));
     RUN(testWasmAddress());
+    
+    RUN(testFastTLS());
 
     if (isX86()) {
         RUN(testBranchBitAndImmFusion(Identity, Int64, 1, Air::BranchTest32, Air::Arg::Tmp));

Modified: trunk/Source/WTF/ChangeLog (213752 => 213753)


--- trunk/Source/WTF/ChangeLog	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/ChangeLog	2017-03-11 07:17:48 UTC (rev 213753)
@@ -1,3 +1,23 @@
+2017-03-10  Filip Pizlo  <fpi...@apple.com>
+
+        The JITs should be able to emit fast TLS loads
+        https://bugs.webkit.org/show_bug.cgi?id=169483
+
+        Reviewed by Keith Miller.
+        
+        Consolidated what we know about fast TLS in FastTLS.h.
+
+        * WTF.xcodeproj/project.pbxproj:
+        * wtf/CMakeLists.txt:
+        * wtf/FastTLS.h: Added.
+        (WTF::loadFastTLS):
+        (WTF::fastTLSOffsetForKey):
+        * wtf/Platform.h:
+        * wtf/WTFThreadData.cpp:
+        (WTF::WTFThreadData::createAndRegisterForGetspecificDirect):
+        * wtf/WTFThreadData.h:
+        (WTF::wtfThreadData):
+
 2017-03-10  Mark Lam  <mark....@apple.com>
 
         Turn ENABLE(MASM_PROBE) on by default for OS(DARWIN) release builds.

Modified: trunk/Source/WTF/WTF.xcodeproj/project.pbxproj (213752 => 213753)


--- trunk/Source/WTF/WTF.xcodeproj/project.pbxproj	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/WTF.xcodeproj/project.pbxproj	2017-03-11 07:17:48 UTC (rev 213753)
@@ -43,6 +43,7 @@
 		0F66B2921DC97BAB004A1D3F /* WallTime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F66B2881DC97BAB004A1D3F /* WallTime.cpp */; };
 		0F66B2931DC97BAB004A1D3F /* WallTime.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F66B2891DC97BAB004A1D3F /* WallTime.h */; };
 		0F725CAC1C50461600AD943A /* RangeSet.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F725CAB1C50461600AD943A /* RangeSet.h */; };
+		0F79C7C41E73511800EB34D1 /* FastTLS.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F79C7C31E73511800EB34D1 /* FastTLS.h */; };
 		0F7C5FB61D885CF20044F5E2 /* FastBitVector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F7C5FB51D885CF20044F5E2 /* FastBitVector.cpp */; };
 		0F824A681B7443A0002E345D /* ParkingLot.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F824A641B7443A0002E345D /* ParkingLot.cpp */; };
 		0F824A691B7443A0002E345D /* ParkingLot.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F824A651B7443A0002E345D /* ParkingLot.h */; };
@@ -421,6 +422,7 @@
 		0F66B2881DC97BAB004A1D3F /* WallTime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WallTime.cpp; sourceTree = "<group>"; };
 		0F66B2891DC97BAB004A1D3F /* WallTime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WallTime.h; sourceTree = "<group>"; };
 		0F725CAB1C50461600AD943A /* RangeSet.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RangeSet.h; sourceTree = "<group>"; };
+		0F79C7C31E73511800EB34D1 /* FastTLS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FastTLS.h; sourceTree = "<group>"; };
 		0F7C5FB51D885CF20044F5E2 /* FastBitVector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FastBitVector.cpp; sourceTree = "<group>"; };
 		0F824A641B7443A0002E345D /* ParkingLot.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParkingLot.cpp; sourceTree = "<group>"; };
 		0F824A651B7443A0002E345D /* ParkingLot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParkingLot.h; sourceTree = "<group>"; };
@@ -974,6 +976,7 @@
 				0FD81AC4154FB22E00983E72 /* FastBitVector.h */,
 				A8A472A1151A825A004123FF /* FastMalloc.cpp */,
 				A8A472A2151A825A004123FF /* FastMalloc.h */,
+				0F79C7C31E73511800EB34D1 /* FastTLS.h */,
 				B38FD7BC168953E80065C969 /* FeatureDefines.h */,
 				0F9D335B165DBA73005AD387 /* FilePrintStream.cpp */,
 				0F9D335C165DBA73005AD387 /* FilePrintStream.h */,
@@ -1486,6 +1489,7 @@
 				0F0D85B417234CC100338210 /* NoLock.h in Headers */,
 				A8A473EF151A825B004123FF /* Noncopyable.h in Headers */,
 				CE46516E19DB1FB4003ECA05 /* NSMapTableSPI.h in Headers */,
+				0F79C7C41E73511800EB34D1 /* FastTLS.h in Headers */,
 				A8A473F5151A825B004123FF /* NumberOfCores.h in Headers */,
 				7E29C33E15FFD79B00516D61 /* ObjcRuntimeExtras.h in Headers */,
 				1AFDE6531953B23D00C48FFA /* Optional.h in Headers */,

Modified: trunk/Source/WTF/wtf/CMakeLists.txt (213752 => 213753)


--- trunk/Source/WTF/wtf/CMakeLists.txt	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/wtf/CMakeLists.txt	2017-03-11 07:17:48 UTC (rev 213753)
@@ -30,6 +30,7 @@
     DisallowCType.h
     DoublyLinkedList.h
     FastMalloc.h
+    FastTLS.h
     FeatureDefines.h
     FilePrintStream.h
     FlipBytes.h

Added: trunk/Source/WTF/wtf/FastTLS.h (0 => 213753)


--- trunk/Source/WTF/wtf/FastTLS.h	                        (rev 0)
+++ trunk/Source/WTF/wtf/FastTLS.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#pragma once
+
+#if HAVE(FAST_TLS)
+
+#include <pthread.h>
+#include <System/pthread_machdep.h>
+
+namespace WTF {
+
+// __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0 is taken by bmalloc, so WTF's KEY0 maps to the
+// system's KEY1.
+#define WTF_FAST_TLS_KEY0 __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY1
+#define WTF_FAST_TLS_KEY1 __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY2
+#define WTF_FAST_TLS_KEY2 __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY3
+#define WTF_FAST_TLS_KEY3 __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4
+
+// NOTE: We should manage our use of these keys here. If you want to use a key for something,
+// put a #define in here to give your key a symbolic name. This ensures that we don't
+// accidentally use the same key for more than one thing.
+
+#define WTF_THREAD_DATA_KEY WTF_FAST_TLS_KEY0
+#define WTF_TESTING_KEY WTF_FAST_TLS_KEY3
+
+#if ENABLE(FAST_TLS_JIT)
+// Below is the code that the JIT will emit.
+
+#if CPU(X86_64)
+inline uintptr_t loadFastTLS(unsigned offset)
+{
+    uintptr_t result;
+    asm volatile(
+        "movq %%gs:%1, %0"
+        : "=r"(result)
+        : "r"(offset)
+        : "memory");
+    return result;
+}
+#elif CPU(ARM64)
+inline uintptr_t loadFastTLS(unsigned passedOffset)
+{
+    uintptr_t result;
+    uintptr_t offset = passedOffset;
+    asm volatile(
+        "mrs %0, TPIDRRO_EL0\n\t"
+        "and %0, %0, #0xfffffffffffffff8\n\t"
+        "ldr %0, [%0, %1]"
+        : "=r"(result)
+        : "r"(offset)
+        : "memory");
+    return result;
+}
+#else
+#error "Bad architecture"
+#endif
+#endif // ENABLE(FAST_TLS_JIT)
+
+inline unsigned fastTLSOffsetForKey(unsigned long slot)
+{
+    return slot * sizeof(void*);
+}
+
+} // namespace WTF
+
+using WTF::fastTLSOffsetForKey;
+
+#if ENABLE(FAST_TLS_JIT)
+using WTF::loadFastTLS;
+#endif
+
+#endif // HAVE(FAST_TLS)
+

Modified: trunk/Source/WTF/wtf/Platform.h (213752 => 213753)


--- trunk/Source/WTF/wtf/Platform.h	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/wtf/Platform.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -761,6 +761,14 @@
 #define HAVE_LL_SC 1
 #endif // CPU(ARM64)
 
+#if __has_include(<System/pthread_machdep.h>)
+#define HAVE_FAST_TLS 1
+#endif
+
+#if (CPU(X86_64) || CPU(ARM64)) && HAVE(FAST_TLS)
+#define ENABLE_FAST_TLS_JIT 1
+#endif
+
 /* This controls whether B3 is built. B3 is needed for FTL JIT and WebAssembly */
 #if ENABLE(FTL_JIT) || ENABLE(WEBASSEMBLY)
 #define ENABLE_B3_JIT 1

Modified: trunk/Source/WTF/wtf/WTFThreadData.cpp (213752 => 213753)


--- trunk/Source/WTF/wtf/WTFThreadData.cpp	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/wtf/WTFThreadData.cpp	2017-03-11 07:17:48 UTC (rev 213753)
@@ -35,7 +35,7 @@
 
 namespace WTF {
 
-#if !USE(PTHREAD_GETSPECIFIC_DIRECT)
+#if !HAVE(FAST_TLS)
 ThreadSpecific<WTFThreadData>* WTFThreadData::staticData;
 #endif
 
@@ -61,12 +61,12 @@
         m_atomicStringTableDestructor(m_defaultAtomicStringTable);
 }
 
-#if USE(PTHREAD_GETSPECIFIC_DIRECT)
+#if HAVE(FAST_TLS)
 WTFThreadData& WTFThreadData::createAndRegisterForGetspecificDirect()
 {
     WTFThreadData* data = "" WTFThreadData;
-    _pthread_setspecific_direct(directKey, data);
-    pthread_key_init_np(directKey, [](void* data){
+    _pthread_setspecific_direct(WTF_THREAD_DATA_KEY, data);
+    pthread_key_init_np(WTF_THREAD_DATA_KEY, [](void* data){
         delete static_cast<WTFThreadData*>(data);
     });
     return *data;

Modified: trunk/Source/WTF/wtf/WTFThreadData.h (213752 => 213753)


--- trunk/Source/WTF/wtf/WTFThreadData.h	2017-03-11 06:56:12 UTC (rev 213752)
+++ trunk/Source/WTF/wtf/WTFThreadData.h	2017-03-11 07:17:48 UTC (rev 213753)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2008-2017 Apple Inc. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,25 +27,15 @@
 #ifndef WTFThreadData_h
 #define WTFThreadData_h
 
+#include <wtf/FastTLS.h>
 #include <wtf/HashMap.h>
 #include <wtf/HashSet.h>
 #include <wtf/Noncopyable.h>
 #include <wtf/StackBounds.h>
 #include <wtf/StackStats.h>
-#include <wtf/text/StringHash.h>
-
-#if USE(APPLE_INTERNAL_SDK)
-#include <System/pthread_machdep.h>
-#endif
-
-#if defined(__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY1)
-#define USE_PTHREAD_GETSPECIFIC_DIRECT 1
-#endif
-
-#if !USE(PTHREAD_GETSPECIFIC_DIRECT)
 #include <wtf/ThreadSpecific.h>
 #include <wtf/Threading.h>
-#endif
+#include <wtf/text/StringHash.h>
 
 namespace WTF {
 
@@ -122,8 +112,7 @@
     void* m_savedStackPointerAtVMEntry;
     void* m_savedLastStackTop;
 
-#if USE(PTHREAD_GETSPECIFIC_DIRECT)
-    static const pthread_key_t directKey = __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY1;
+#if HAVE(FAST_TLS)
     WTF_EXPORT_PRIVATE static WTFThreadData& createAndRegisterForGetspecificDirect();
 #else
     static WTF_EXPORTDATA ThreadSpecific<WTFThreadData>* staticData;
@@ -141,12 +130,12 @@
     // WRT _javascript_Core:
     //    wtfThreadData() is initially called from initializeThreading(), ensuring
     //    this is initially called in a pthread_once locked context.
-#if !USE(PTHREAD_GETSPECIFIC_DIRECT)
+#if !HAVE(FAST_TLS)
     if (!WTFThreadData::staticData)
         WTFThreadData::staticData = new ThreadSpecific<WTFThreadData>;
     return **WTFThreadData::staticData;
 #else
-    if (WTFThreadData* data = ""
+    if (WTFThreadData* data = ""
         return *data;
     return WTFThreadData::createAndRegisterForGetspecificDirect();
 #endif
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to