Title: [97903] trunk/Source/_javascript_Core
Revision
97903
Author
[email protected]
Date
2011-10-19 17:48:22 -0700 (Wed, 19 Oct 2011)

Log Message

DFG JIT 32_64 - improve double boxing/unboxing
https://bugs.webkit.org/show_bug.cgi?id=70418

Patch by Yuqiang Xian <[email protected]> on 2011-10-19
Reviewed by Gavin Barraclough.

Double boxing/unboxing in DFG JIT 32_64 is currently implemented inefficiently,
which tries to exchange data through memory.
On X86 some SSE instructions can help us on such operations with better performance.
This improves 32-bit DFG performance by 29% on Kraken, 7% on SunSpider,
and 2% on V8, tested on Linux X86 (Core i7 Nehalem).

* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::lshiftPacked):
(JSC::MacroAssemblerX86Common::rshiftPacked):
(JSC::MacroAssemblerX86Common::orPacked):
(JSC::MacroAssemblerX86Common::moveInt32ToPacked):
(JSC::MacroAssemblerX86Common::movePackedToInt32):
* assembler/X86Assembler.h:
(JSC::X86Assembler::movd_rr):
(JSC::X86Assembler::psllq_i8r):
(JSC::X86Assembler::psrlq_i8r):
(JSC::X86Assembler::por_rr):
* dfg/DFGJITCodeGenerator.h:
(JSC::DFG::JITCodeGenerator::boxDouble):
(JSC::DFG::JITCodeGenerator::unboxDouble):
* dfg/DFGJITCodeGenerator32_64.cpp:
(JSC::DFG::JITCodeGenerator::fillDouble):
(JSC::DFG::JITCodeGenerator::fillJSValue):
(JSC::DFG::JITCodeGenerator::nonSpeculativeValueToNumber):
(JSC::DFG::JITCodeGenerator::nonSpeculativeUInt32ToNumber):
(JSC::DFG::JITCodeGenerator::nonSpeculativeKnownConstantArithOp):
(JSC::DFG::JITCodeGenerator::nonSpeculativeBasicArithOp):
* dfg/DFGJITCompiler.h:
(JSC::DFG::JITCompiler::boxDouble):
(JSC::DFG::JITCompiler::unboxDouble):
* dfg/DFGSpeculativeJIT32_64.cpp:
(JSC::DFG::SpeculativeJIT::fillSpeculateDouble):
(JSC::DFG::SpeculativeJIT::convertToDouble):
(JSC::DFG::SpeculativeJIT::compile):

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (97902 => 97903)


--- trunk/Source/_javascript_Core/ChangeLog	2011-10-20 00:44:28 UTC (rev 97902)
+++ trunk/Source/_javascript_Core/ChangeLog	2011-10-20 00:48:22 UTC (rev 97903)
@@ -1,3 +1,45 @@
+2011-10-19  Yuqiang Xian  <[email protected]>
+
+        DFG JIT 32_64 - improve double boxing/unboxing
+        https://bugs.webkit.org/show_bug.cgi?id=70418
+
+        Reviewed by Gavin Barraclough.
+
+        Double boxing/unboxing in DFG JIT 32_64 is currently implemented inefficiently,
+        which tries to exchange data through memory.
+        On X86 some SSE instructions can help us on such operations with better performance.
+        This improves 32-bit DFG performance by 29% on Kraken, 7% on SunSpider,
+        and 2% on V8, tested on Linux X86 (Core i7 Nehalem).
+
+        * assembler/MacroAssemblerX86Common.h:
+        (JSC::MacroAssemblerX86Common::lshiftPacked):
+        (JSC::MacroAssemblerX86Common::rshiftPacked):
+        (JSC::MacroAssemblerX86Common::orPacked):
+        (JSC::MacroAssemblerX86Common::moveInt32ToPacked):
+        (JSC::MacroAssemblerX86Common::movePackedToInt32):
+        * assembler/X86Assembler.h:
+        (JSC::X86Assembler::movd_rr):
+        (JSC::X86Assembler::psllq_i8r):
+        (JSC::X86Assembler::psrlq_i8r):
+        (JSC::X86Assembler::por_rr):
+        * dfg/DFGJITCodeGenerator.h:
+        (JSC::DFG::JITCodeGenerator::boxDouble):
+        (JSC::DFG::JITCodeGenerator::unboxDouble):
+        * dfg/DFGJITCodeGenerator32_64.cpp:
+        (JSC::DFG::JITCodeGenerator::fillDouble):
+        (JSC::DFG::JITCodeGenerator::fillJSValue):
+        (JSC::DFG::JITCodeGenerator::nonSpeculativeValueToNumber):
+        (JSC::DFG::JITCodeGenerator::nonSpeculativeUInt32ToNumber):
+        (JSC::DFG::JITCodeGenerator::nonSpeculativeKnownConstantArithOp):
+        (JSC::DFG::JITCodeGenerator::nonSpeculativeBasicArithOp):
+        * dfg/DFGJITCompiler.h:
+        (JSC::DFG::JITCompiler::boxDouble):
+        (JSC::DFG::JITCompiler::unboxDouble):
+        * dfg/DFGSpeculativeJIT32_64.cpp:
+        (JSC::DFG::SpeculativeJIT::fillSpeculateDouble):
+        (JSC::DFG::SpeculativeJIT::convertToDouble):
+        (JSC::DFG::SpeculativeJIT::compile):
+
 2011-10-19  Gyuyoung Kim  <[email protected]>
 
         [EFL] Fix DSO linkage of wtf_efl.

Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h (97902 => 97903)


--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h	2011-10-20 00:44:28 UTC (rev 97902)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h	2011-10-20 00:48:22 UTC (rev 97903)
@@ -40,6 +40,7 @@
 
 public:
     typedef X86Assembler::FPRegisterID FPRegisterID;
+    typedef X86Assembler::XMMRegisterID XMMRegisterID;
     
     static const int MaximumCompactPtrAlignedAddressOffset = 127;
 
@@ -735,6 +736,36 @@
         return branchDouble(DoubleEqualOrUnordered, reg, scratch);
     }
 
+    void lshiftPacked(TrustedImm32 imm, XMMRegisterID reg)
+    {
+        ASSERT(isSSE2Present());
+        m_assembler.psllq_i8r(imm.m_value, reg);
+    }
+
+    void rshiftPacked(TrustedImm32 imm, XMMRegisterID reg)
+    {
+        ASSERT(isSSE2Present());
+        m_assembler.psrlq_i8r(imm.m_value, reg);
+    }
+
+    void orPacked(XMMRegisterID src, XMMRegisterID dst)
+    {
+        ASSERT(isSSE2Present());
+        m_assembler.por_rr(src, dst);
+    }
+
+    void moveInt32ToPacked(RegisterID src, XMMRegisterID dst)
+    {
+        ASSERT(isSSE2Present());
+        m_assembler.movd_rr(src, dst);
+    }
+
+    void movePackedToInt32(XMMRegisterID src, RegisterID dst)
+    {
+        ASSERT(isSSE2Present());
+        m_assembler.movd_rr(src, dst);
+    }
+
     // Stack manipulation operations:
     //
     // The ABI is assumed to provide a stack abstraction to memory,

Modified: trunk/Source/_javascript_Core/assembler/X86Assembler.h (97902 => 97903)


--- trunk/Source/_javascript_Core/assembler/X86Assembler.h	2011-10-20 00:44:28 UTC (rev 97902)
+++ trunk/Source/_javascript_Core/assembler/X86Assembler.h	2011-10-20 00:48:22 UTC (rev 97903)
@@ -183,6 +183,9 @@
         OP2_MOVZX_GvEb      = 0xB6,
         OP2_MOVZX_GvEw      = 0xB7,
         OP2_PEXTRW_GdUdIb   = 0xC5,
+        OP2_PSLLQ_UdqIb     = 0x73,
+        OP2_PSRLQ_UdqIb     = 0x73,
+        OP2_POR_VdqWdq      = 0XEB,
     } TwoByteOpcodeID;
 
     TwoByteOpcodeID jccRel32(Condition cond)
@@ -221,6 +224,9 @@
 
         GROUP11_MOV = 0,
 
+        GROUP14_OP_PSLLQ = 6,
+        GROUP14_OP_PSRLQ = 2,
+
         ESCAPE_DD_FSTP_doubleReal = 3,
     } GroupOpcodeID;
     
@@ -1436,6 +1442,12 @@
         m_formatter.twoByteOp(OP2_MOVD_EdVd, (RegisterID)src, dst);
     }
 
+    void movd_rr(RegisterID src, XMMRegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_MOVD_VdEd, (RegisterID)dst, src);
+    }
+
 #if CPU(X86_64)
     void movq_rr(XMMRegisterID src, RegisterID dst)
     {
@@ -1500,6 +1512,26 @@
         m_formatter.immediate8(whichWord);
     }
 
+    void psllq_i8r(int imm, XMMRegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp8(OP2_PSLLQ_UdqIb, GROUP14_OP_PSLLQ, (RegisterID)dst);
+        m_formatter.immediate8(imm);
+    }
+
+    void psrlq_i8r(int imm, XMMRegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp8(OP2_PSRLQ_UdqIb, GROUP14_OP_PSRLQ, (RegisterID)dst);
+        m_formatter.immediate8(imm);
+    }
+
+    void por_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_POR_VdqWdq, (RegisterID)dst, (RegisterID)src);
+    }
+
     void subsd_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         m_formatter.prefix(PRE_SSE_F2);

Modified: trunk/Source/_javascript_Core/dfg/DFGJITCodeGenerator.h (97902 => 97903)


--- trunk/Source/_javascript_Core/dfg/DFGJITCodeGenerator.h	2011-10-20 00:44:28 UTC (rev 97902)
+++ trunk/Source/_javascript_Core/dfg/DFGJITCodeGenerator.h	2011-10-20 00:48:22 UTC (rev 97903)
@@ -479,13 +479,13 @@
         return boxDouble(fpr, allocate());
     }
 #elif USE(JSVALUE32_64)
-    void boxDouble(FPRReg fpr, GPRReg tagGPR, GPRReg payloadGPR, VirtualRegister virtualRegister)
+    void boxDouble(FPRReg fpr, GPRReg tagGPR, GPRReg payloadGPR)
     {
-        m_jit.boxDouble(fpr, tagGPR, payloadGPR, virtualRegister);
+        m_jit.boxDouble(fpr, tagGPR, payloadGPR);
     }
-    void unboxDouble(GPRReg tagGPR, GPRReg payloadGPR, FPRReg fpr, VirtualRegister virtualRegister)
+    void unboxDouble(GPRReg tagGPR, GPRReg payloadGPR, FPRReg fpr, FPRReg scratchFPR)
     {
-        m_jit.unboxDouble(tagGPR, payloadGPR, fpr, virtualRegister);
+        m_jit.unboxDouble(tagGPR, payloadGPR, fpr, scratchFPR);
     }
 #endif
 

Modified: trunk/Source/_javascript_Core/dfg/DFGJITCodeGenerator32_64.cpp (97902 => 97903)


--- trunk/Source/_javascript_Core/dfg/DFGJITCodeGenerator32_64.cpp	2011-10-20 00:44:28 UTC (rev 97902)
+++ trunk/Source/_javascript_Core/dfg/DFGJITCodeGenerator32_64.cpp	2011-10-20 00:48:22 UTC (rev 97903)
@@ -165,9 +165,10 @@
         JITCompiler::Jump hasUnboxedDouble;
 
         if (info.registerFormat() != DataFormatJSInteger) {
+            FPRTemporary scratch(this);
             JITCompiler::Jump isInteger = m_jit.branch32(MacroAssembler::Equal, tagGPR, TrustedImm32(JSValue::Int32Tag));
             m_jit.jitAssertIsJSDouble(tagGPR);
-            unboxDouble(tagGPR, payloadGPR, fpr, virtualRegister);
+            unboxDouble(tagGPR, payloadGPR, fpr, scratch.fpr());
             hasUnboxedDouble = m_jit.jump();
             isInteger.link(&m_jit);
         }
@@ -276,7 +277,7 @@
         m_fprs.lock(oldFPR);
         tagGPR = allocate();
         payloadGPR = allocate();
-        boxDouble(oldFPR, tagGPR, payloadGPR, virtualRegister);
+        boxDouble(oldFPR, tagGPR, payloadGPR);
         m_fprs.unlock(oldFPR);
         m_fprs.release(oldFPR);
         m_gprs.retain(tagGPR, virtualRegister, SpillOrderJS);
@@ -348,7 +349,7 @@
     nonNumeric.link(&m_jit);
     silentSpillAllRegisters(resultTagGPR, resultPayloadGPR);
     callOperation(dfgConvertJSValueToNumber, FPRInfo::returnValueFPR, tagGPR, payloadGPR);
-    boxDouble(FPRInfo::returnValueFPR, resultTagGPR, resultPayloadGPR, at(m_compileIndex).virtualRegister());
+    boxDouble(FPRInfo::returnValueFPR, resultTagGPR, resultPayloadGPR);
     silentFillAllRegisters(resultTagGPR, resultPayloadGPR);
     JITCompiler::Jump hasCalledToNumber = m_jit.jump();
     
@@ -433,7 +434,7 @@
     m_jit.move(JITCompiler::TrustedImmPtr(&twoToThe32), resultPayload.gpr()); // reuse resultPayload register here.
     m_jit.addDouble(JITCompiler::Address(resultPayload.gpr(), 0), boxer.fpr());
         
-    boxDouble(boxer.fpr(), resultTag.gpr(), resultPayload.gpr(), at(m_compileIndex).virtualRegister());
+    boxDouble(boxer.fpr(), resultTag.gpr(), resultPayload.gpr());
         
     JITCompiler::Jump done = m_jit.jump();
         
@@ -553,7 +554,7 @@
         failureCases.link(&m_jit);
     }
     
-    boxDouble(tmp2FPR, resultTagGPR, resultPayloadGPR, at(m_compileIndex).virtualRegister());
+    boxDouble(tmp2FPR, resultTagGPR, resultPayloadGPR);
         
     if (!isKnownNumeric(regChild)) {
         ASSERT(notInt.isSet());
@@ -635,7 +636,7 @@
     if (arg1.isDouble()) {
         arg1TagGPR = tmpTag.gpr();
         arg1PayloadGPR = tmpPayload.gpr();
-        boxDouble(arg1.fpr(), arg1TagGPR, arg1PayloadGPR, at(arg1.index()).virtualRegister());
+        boxDouble(arg1.fpr(), arg1TagGPR, arg1PayloadGPR);
         arg2TagGPR = arg2.tagGPR();
         arg2PayloadGPR = arg2.payloadGPR();
     } else if (arg2.isDouble()) {
@@ -643,7 +644,7 @@
         arg1PayloadGPR = arg1.payloadGPR();
         arg2TagGPR = tmpTag.gpr();
         arg2PayloadGPR = tmpPayload.gpr();
-        boxDouble(arg2.fpr(), arg2TagGPR, arg2PayloadGPR, at(arg2.index()).virtualRegister());
+        boxDouble(arg2.fpr(), arg2TagGPR, arg2PayloadGPR);
     } else {
         arg1TagGPR = arg1.tagGPR();
         arg1PayloadGPR = arg1.payloadGPR();
@@ -705,6 +706,7 @@
     JITCompiler::Jump child2NotInt2;
         
     if (!isKnownInteger(node.child1())) {
+        FPRTemporary scratch(this);
         child1NotInt.link(&m_jit);
             
         if (!isKnownNumeric(node.child1())) {
@@ -715,7 +717,7 @@
         if (arg1.isDouble())
             m_jit.moveDouble(arg1.fpr(), tmp1FPR);
         else
-            unboxDouble(arg1TagGPR, arg1PayloadGPR, tmp1FPR, at(arg1.index()).virtualRegister());
+            unboxDouble(arg1TagGPR, arg1PayloadGPR, tmp1FPR, scratch.fpr());
             
         // child1 is converted to a double; child2 may either be an int or
         // a boxed double
@@ -739,6 +741,7 @@
     }
         
     if (!isKnownInteger(node.child2())) {
+        FPRTemporary scratch(this);
         child2NotInt.link(&m_jit);
             
         if (!isKnownNumeric(node.child2())) {
@@ -755,7 +758,7 @@
         if (arg2.isDouble())
             m_jit.moveDouble(arg2.fpr(), tmp2FPR);
         else
-            unboxDouble(arg2TagGPR, arg2PayloadGPR, tmp2FPR, at(arg2.index()).virtualRegister());
+            unboxDouble(arg2TagGPR, arg2PayloadGPR, tmp2FPR, scratch.fpr());
     }
         
     haveFPRArguments.link(&m_jit);
@@ -790,7 +793,7 @@
         failureCases.link(&m_jit);
     }
         
-    boxDouble(tmp1FPR, resultTagGPR, resultPayloadGPR, at(m_compileIndex).virtualRegister());
+    boxDouble(tmp1FPR, resultTagGPR, resultPayloadGPR);
         
     if (!notNumbers.empty()) {
         ASSERT(op == ValueAdd);

Modified: trunk/Source/_javascript_Core/dfg/DFGJITCompiler.h (97902 => 97903)


--- trunk/Source/_javascript_Core/dfg/DFGJITCompiler.h	2011-10-20 00:44:28 UTC (rev 97902)
+++ trunk/Source/_javascript_Core/dfg/DFGJITCompiler.h	2011-10-20 00:48:22 UTC (rev 97903)
@@ -345,20 +345,23 @@
         return fpr;
     }
 #elif USE(JSVALUE32_64)
-    // FIXME: The box/unbox of doubles could be improved without exchanging data through memory,
-    // for example on x86 some SSE instructions can help do this.
-    void boxDouble(FPRReg fpr, GPRReg tagGPR, GPRReg payloadGPR, VirtualRegister virtualRegister)
+    void boxDouble(FPRReg fpr, GPRReg tagGPR, GPRReg payloadGPR)
     {
-        storeDouble(fpr, addressFor(virtualRegister));
-        load32(tagFor(virtualRegister), tagGPR);
-        load32(payloadFor(virtualRegister), payloadGPR);
+#if CPU(X86)
+        movePackedToInt32(fpr, payloadGPR);
+        rshiftPacked(TrustedImm32(32), fpr);
+        movePackedToInt32(fpr, tagGPR);
+#endif
     }
-    void unboxDouble(GPRReg tagGPR, GPRReg payloadGPR, FPRReg fpr, VirtualRegister virtualRegister)
+    void unboxDouble(GPRReg tagGPR, GPRReg payloadGPR, FPRReg fpr, FPRReg scratchFPR)
     {
         jitAssertIsJSDouble(tagGPR);
-        store32(tagGPR, tagFor(virtualRegister));
-        store32(payloadGPR, payloadFor(virtualRegister));
-        loadDouble(addressFor(virtualRegister), fpr);
+#if CPU(X86)
+        moveInt32ToPacked(payloadGPR, fpr);
+        moveInt32ToPacked(tagGPR, scratchFPR);
+        lshiftPacked(TrustedImm32(32), scratchFPR);
+        orPacked(scratchFPR, fpr);
+#endif
     }
 #endif
 

Modified: trunk/Source/_javascript_Core/dfg/DFGSpeculativeJIT32_64.cpp (97902 => 97903)


--- trunk/Source/_javascript_Core/dfg/DFGSpeculativeJIT32_64.cpp	2011-10-20 00:44:28 UTC (rev 97902)
+++ trunk/Source/_javascript_Core/dfg/DFGSpeculativeJIT32_64.cpp	2011-10-20 00:48:22 UTC (rev 97903)
@@ -218,9 +218,10 @@
         JITCompiler::Jump hasUnboxedDouble;
 
         if (info.registerFormat() != DataFormatJSInteger) {
+            FPRTemporary scratch(this);
             JITCompiler::Jump isInteger = m_jit.branch32(MacroAssembler::Equal, tagGPR, TrustedImm32(JSValue::Int32Tag));
             speculationCheck(m_jit.branch32(MacroAssembler::AboveOrEqual, tagGPR, TrustedImm32(JSValue::LowestTag)));
-            unboxDouble(tagGPR, payloadGPR, fpr, virtualRegister);
+            unboxDouble(tagGPR, payloadGPR, fpr, scratch.fpr());
             hasUnboxedDouble = m_jit.jump();
             isInteger.link(&m_jit);
         }
@@ -348,10 +349,12 @@
 
 JITCompiler::Jump SpeculativeJIT::convertToDouble(JSValueOperand& op, FPRReg result)
 {
+    FPRTemporary scratch(this);
+
     JITCompiler::Jump isInteger = m_jit.branch32(MacroAssembler::Equal, op.tagGPR(), TrustedImm32(JSValue::Int32Tag));
     JITCompiler::Jump notNumber = m_jit.branch32(MacroAssembler::AboveOrEqual, op.payloadGPR(), TrustedImm32(JSValue::LowestTag));
 
-    unboxDouble(op.tagGPR(), op.payloadGPR(), result, at(op.index()).virtualRegister());
+    unboxDouble(op.tagGPR(), op.payloadGPR(), result, scratch.fpr());
     JITCompiler::Jump done = m_jit.jump();
 
     isInteger.link(&m_jit);
@@ -1638,7 +1641,7 @@
         JSValueOperand op1(this, node.child1());
         op1.fill();
         if (op1.isDouble())
-            boxDouble(op1.fpr(), GPRInfo::returnValueGPR2, GPRInfo::returnValueGPR, at(op1.index()).virtualRegister());
+            boxDouble(op1.fpr(), GPRInfo::returnValueGPR2, GPRInfo::returnValueGPR);
         else {
             if (op1.payloadGPR() == GPRInfo::returnValueGPR2 && op1.tagGPR() == GPRInfo::returnValueGPR)
                 m_jit.swap(GPRInfo::returnValueGPR, GPRInfo::returnValueGPR2);
_______________________________________________
webkit-changes mailing list
[email protected]
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to