Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (198872 => 198873)
--- trunk/Source/_javascript_Core/ChangeLog 2016-03-31 02:03:57 UTC (rev 198872)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-03-31 02:05:13 UTC (rev 198873)
@@ -1,3 +1,90 @@
+2016-03-30 Benjamin Poulain <[email protected]>
+
+ [JSC][x86] Add the 3 operands forms of floating point addition and multiplication
+ https://bugs.webkit.org/show_bug.cgi?id=156043
+
+ Reviewed by Geoffrey Garen.
+
+ When they are available, VADD and VMUL are better options to lower
+ floating point addition and multiplication.
+
+ In the simple cases when one of the operands is aliased to the destination,
+ those forms have the same size or 1 byte shorter depending on the registers.
+
+ In the more advanced cases, we gain nice advantages with the new forms:
+ -We can get rid of the MoveDouble in front the instruction when we cannot
+ alias.
+ -We can disable aliasing entirely in Air. That is useful for latency
+ since computing coalescing is not exactly cheap.
+
+ * assembler/MacroAssemblerX86Common.cpp:
+ * assembler/MacroAssemblerX86Common.h:
+ (JSC::MacroAssemblerX86Common::and32):
+ (JSC::MacroAssemblerX86Common::mul32):
+ (JSC::MacroAssemblerX86Common::or32):
+ (JSC::MacroAssemblerX86Common::xor32):
+ (JSC::MacroAssemblerX86Common::branchAdd32):
+ The change in B3LowerToAir exposed a bug in the fake 3 operands
+ forms of those instructions. If the address is equal to
+ the destination, we were nuking the address.
+
+ For example,
+ Add32([%r11], %eax, %r11)
+ would generate:
+ move %eax, %r11
+ add32 [%r11], %r11
+ which crashes.
+
+ I updated codegen of those cases to support that case through
+ load32 [%r11], %r11
+ add32 %eax, %r11
+
+ The weird case were all arguments have the same registers
+ is handled too.
+
+ (JSC::MacroAssemblerX86Common::addDouble):
+ (JSC::MacroAssemblerX86Common::addFloat):
+ (JSC::MacroAssemblerX86Common::mulDouble):
+ (JSC::MacroAssemblerX86Common::mulFloat):
+ (JSC::MacroAssemblerX86Common::supportsFloatingPointRounding):
+ (JSC::MacroAssemblerX86Common::supportsAVX):
+ (JSC::MacroAssemblerX86Common::updateEax1EcxFlags):
+ * assembler/MacroAssemblerX86_64.h:
+ (JSC::MacroAssemblerX86_64::branchAdd64):
+ * assembler/X86Assembler.h:
+ (JSC::X86Assembler::vaddsd_rr):
+ (JSC::X86Assembler::vaddsd_mr):
+ (JSC::X86Assembler::vaddss_rr):
+ (JSC::X86Assembler::vaddss_mr):
+ (JSC::X86Assembler::vmulsd_rr):
+ (JSC::X86Assembler::vmulsd_mr):
+ (JSC::X86Assembler::vmulss_rr):
+ (JSC::X86Assembler::vmulss_mr):
+ (JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM):
+ * b3/B3LowerToAir.cpp:
+ (JSC::B3::Air::LowerToAir::appendBinOp):
+ Add the 3 operand forms so that we lower Add and Mul
+ to the best form directly.
+
+ I will change how we lower the fake 3 operands instructions
+ but the codegen should end up the same in most cases.
+ The new codegen is the load32 + op above.
+
+ * b3/air/AirInstInlines.h:
+ (JSC::B3::Air::Inst::shouldTryAliasingDef):
+ * b3/air/testair.cpp:
+ (JSC::B3::Air::testX86VMULSD):
+ (JSC::B3::Air::testX86VMULSDDestRex):
+ (JSC::B3::Air::testX86VMULSDOp1DestRex):
+ (JSC::B3::Air::testX86VMULSDOp2DestRex):
+ (JSC::B3::Air::testX86VMULSDOpsDestRex):
+ (JSC::B3::Air::testX86VMULSDAddr):
+ (JSC::B3::Air::testX86VMULSDAddrOpRexAddr):
+ (JSC::B3::Air::testX86VMULSDDestRexAddr):
+ (JSC::B3::Air::testX86VMULSDRegOpDestRexAddr):
+ (JSC::B3::Air::testX86VMULSDAddrOpDestRexAddr):
+ Make sure we have some coverage for AVX encoding of instructions.
+
2016-03-30 Saam Barati <[email protected]>
Change some release asserts in CodeBlock linking into debug asserts
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.cpp (198872 => 198873)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.cpp 2016-03-31 02:03:57 UTC (rev 198872)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.cpp 2016-03-31 02:05:13 UTC (rev 198873)
@@ -553,6 +553,7 @@
#endif
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_sse4_1CheckState = CPUIDCheckState::NotChecked;
+MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_avxCheckState = CPUIDCheckState::NotChecked;
MacroAssemblerX86Common::CPUIDCheckState MacroAssemblerX86Common::s_lzcntCheckState = CPUIDCheckState::NotChecked;
} // namespace JSC
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h (198872 => 198873)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2016-03-31 02:03:57 UTC (rev 198872)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2016-03-31 02:05:13 UTC (rev 198873)
@@ -270,14 +270,20 @@
void and32(Address op1, RegisterID op2, RegisterID dest)
{
- move32IfNeeded(op2, dest);
- and32(op1, dest);
+ if (op2 == dest)
+ and32(op1, dest);
+ else if (op1.base == dest) {
+ load32(op1, dest);
+ and32(op2, dest);
+ } else {
+ zeroExtend32ToPtr(op2, dest);
+ and32(op1, dest);
+ }
}
void and32(RegisterID op1, Address op2, RegisterID dest)
{
- move32IfNeeded(op1, dest);
- and32(op2, dest);
+ and32(op2, op1, dest);
}
void and32(TrustedImm32 imm, RegisterID src, RegisterID dest)
@@ -360,16 +366,22 @@
m_assembler.imull_mr(src.offset, src.base, dest);
}
- void mul32(Address src1, RegisterID src2, RegisterID dest)
+ void mul32(Address op1, RegisterID op2, RegisterID dest)
{
- move32IfNeeded(src2, dest);
- mul32(src1, dest);
+ if (op2 == dest)
+ mul32(op1, dest);
+ else if (op1.base == dest) {
+ load32(op1, dest);
+ mul32(op2, dest);
+ } else {
+ zeroExtend32ToPtr(op2, dest);
+ mul32(op1, dest);
+ }
}
void mul32(RegisterID src1, Address src2, RegisterID dest)
{
- move32IfNeeded(src1, dest);
- mul32(src2, dest);
+ mul32(src2, src1, dest);
}
void mul32(TrustedImm32 imm, RegisterID src, RegisterID dest)
@@ -450,14 +462,20 @@
void or32(Address op1, RegisterID op2, RegisterID dest)
{
- move32IfNeeded(op2, dest);
- or32(op1, dest);
+ if (op2 == dest)
+ or32(op1, dest);
+ else if (op1.base == dest) {
+ load32(op1, dest);
+ or32(op2, dest);
+ } else {
+ zeroExtend32ToPtr(op2, dest);
+ or32(op1, dest);
+ }
}
void or32(RegisterID op1, Address op2, RegisterID dest)
{
- move32IfNeeded(op1, dest);
- or32(op2, dest);
+ or32(op2, op1, dest);
}
void or32(TrustedImm32 imm, RegisterID src, RegisterID dest)
@@ -609,14 +627,20 @@
void xor32(Address op1, RegisterID op2, RegisterID dest)
{
- move32IfNeeded(op2, dest);
- xor32(op1, dest);
+ if (op2 == dest)
+ xor32(op1, dest);
+ else if (op1.base == dest) {
+ load32(op1, dest);
+ xor32(op2, dest);
+ } else {
+ zeroExtend32ToPtr(op2, dest);
+ xor32(op1, dest);
+ }
}
void xor32(RegisterID op1, Address op2, RegisterID dest)
{
- move32IfNeeded(op1, dest);
- xor32(op2, dest);
+ xor32(op2, op1, dest);
}
void xor32(TrustedImm32 imm, RegisterID src, RegisterID dest)
@@ -1066,96 +1090,94 @@
void addDouble(FPRegisterID src, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- m_assembler.addsd_rr(src, dest);
+ addDouble(src, dest, dest);
}
void addDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op1 == dest)
- addDouble(op2, dest);
+ if (supportsAVX())
+ m_assembler.vaddsd_rr(op1, op2, dest);
else {
- moveDouble(op2, dest);
- addDouble(op1, dest);
+ ASSERT(isSSE2Present());
+ if (op1 == dest)
+ m_assembler.addsd_rr(op2, dest);
+ else {
+ moveDouble(op2, dest);
+ m_assembler.addsd_rr(op1, dest);
+ }
}
}
void addDouble(Address src, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- m_assembler.addsd_mr(src.offset, src.base, dest);
+ addDouble(src, dest, dest);
}
void addDouble(Address op1, FPRegisterID op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op2 == dest) {
- addDouble(op1, dest);
- return;
+ if (supportsAVX())
+ m_assembler.vaddsd_mr(op1.offset, op1.base, op2, dest);
+ else {
+ ASSERT(isSSE2Present());
+ if (op2 == dest) {
+ m_assembler.addsd_mr(op1.offset, op1.base, dest);
+ return;
+ }
+
+ loadDouble(op1, dest);
+ addDouble(op2, dest);
}
-
- loadDouble(op1, dest);
- addDouble(op2, dest);
}
void addDouble(FPRegisterID op1, Address op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op1 == dest) {
- addDouble(op2, dest);
- return;
- }
-
- loadDouble(op2, dest);
- addDouble(op1, dest);
+ addDouble(op2, op1, dest);
}
void addFloat(FPRegisterID src, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- m_assembler.addss_rr(src, dest);
+ addFloat(src, dest, dest);
}
void addFloat(Address src, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- m_assembler.addss_mr(src.offset, src.base, dest);
+ addFloat(src, dest, dest);
}
void addFloat(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op1 == dest)
- addFloat(op2, dest);
+ if (supportsAVX())
+ m_assembler.vaddss_rr(op1, op2, dest);
else {
- moveDouble(op2, dest);
- addFloat(op1, dest);
+ ASSERT(isSSE2Present());
+ if (op1 == dest)
+ m_assembler.addss_rr(op2, dest);
+ else {
+ moveDouble(op2, dest);
+ m_assembler.addss_rr(op1, dest);
+ }
}
}
void addFloat(Address op1, FPRegisterID op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op2 == dest) {
- addFloat(op1, dest);
- return;
+ if (supportsAVX())
+ m_assembler.vaddss_mr(op1.offset, op1.base, op2, dest);
+ else {
+ ASSERT(isSSE2Present());
+ if (op2 == dest) {
+ m_assembler.addss_mr(op1.offset, op1.base, dest);
+ return;
+ }
+
+ loadFloat(op1, dest);
+ addFloat(op2, dest);
}
-
- loadFloat(op1, dest);
- addFloat(op2, dest);
}
void addFloat(FPRegisterID op1, Address op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op1 == dest) {
- addFloat(op2, dest);
- return;
- }
-
- loadFloat(op2, dest);
- addFloat(op1, dest);
+ addFloat(op1, op2, dest);
}
void divDouble(FPRegisterID src, FPRegisterID dest)
@@ -1226,92 +1248,92 @@
void mulDouble(FPRegisterID src, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- m_assembler.mulsd_rr(src, dest);
+ mulDouble(src, dest, dest);
}
void mulDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op1 == dest)
- mulDouble(op2, dest);
+ if (supportsAVX())
+ m_assembler.vmulsd_rr(op1, op2, dest);
else {
- moveDouble(op2, dest);
- mulDouble(op1, dest);
+ ASSERT(isSSE2Present());
+ if (op1 == dest)
+ m_assembler.mulsd_rr(op2, dest);
+ else {
+ moveDouble(op2, dest);
+ m_assembler.mulsd_rr(op1, dest);
+ }
}
}
void mulDouble(Address src, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- m_assembler.mulsd_mr(src.offset, src.base, dest);
+ mulDouble(src, dest, dest);
}
void mulDouble(Address op1, FPRegisterID op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op2 == dest) {
- mulDouble(op1, dest);
- return;
+ if (supportsAVX())
+ m_assembler.vmulsd_mr(op1.offset, op1.base, op2, dest);
+ else {
+ ASSERT(isSSE2Present());
+ if (op2 == dest) {
+ m_assembler.mulsd_mr(op1.offset, op1.base, dest);
+ return;
+ }
+ loadDouble(op1, dest);
+ mulDouble(op2, dest);
}
- loadDouble(op1, dest);
- mulDouble(op2, dest);
}
void mulDouble(FPRegisterID op1, Address op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op1 == dest) {
- mulDouble(op2, dest);
- return;
- }
- loadDouble(op2, dest);
- mulDouble(op1, dest);
+ return mulDouble(op2, op1, dest);
}
void mulFloat(FPRegisterID src, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- m_assembler.mulss_rr(src, dest);
+ mulFloat(src, dest, dest);
}
void mulFloat(Address src, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- m_assembler.mulss_mr(src.offset, src.base, dest);
+ mulFloat(src, dest, dest);
}
void mulFloat(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op1 == dest)
- mulFloat(op2, dest);
+ if (supportsAVX())
+ m_assembler.vmulss_rr(op1, op2, dest);
else {
- moveDouble(op2, dest);
- mulFloat(op1, dest);
+ ASSERT(isSSE2Present());
+ if (op1 == dest)
+ m_assembler.mulss_rr(op2, dest);
+ else {
+ moveDouble(op2, dest);
+ m_assembler.mulss_rr(op1, dest);
+ }
}
}
void mulFloat(Address op1, FPRegisterID op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op2 == dest) {
- mulFloat(op1, dest);
- return;
+ if (supportsAVX())
+ m_assembler.vmulss_mr(op1.offset, op1.base, op2, dest);
+ else {
+ ASSERT(isSSE2Present());
+ if (op2 == dest) {
+ m_assembler.mulss_mr(op1.offset, op1.base, dest);
+ return;
+ }
+ loadFloat(op1, dest);
+ mulFloat(op2, dest);
}
- loadFloat(op1, dest);
- mulFloat(op2, dest);
}
void mulFloat(FPRegisterID op1, Address op2, FPRegisterID dest)
{
- ASSERT(isSSE2Present());
- if (op1 == dest) {
- mulFloat(op2, dest);
- return;
- }
- loadFloat(op2, dest);
- mulFloat(op1, dest);
+ mulFloat(op2, op1, dest);
}
void andDouble(FPRegisterID src, FPRegisterID dst)
@@ -2143,16 +2165,21 @@
return branchAdd32(cond, src1, dest);
}
- Jump branchAdd32(ResultCondition cond, Address src1, RegisterID src2, RegisterID dest)
+ Jump branchAdd32(ResultCondition cond, Address op1, RegisterID op2, RegisterID dest)
{
- move32IfNeeded(src2, dest);
- return branchAdd32(cond, src1, dest);
+ if (op2 == dest)
+ return branchAdd32(cond, op1, dest);
+ if (op1.base == dest) {
+ load32(op1, dest);
+ return branchAdd32(cond, op2, dest);
+ }
+ zeroExtend32ToPtr(op2, dest);
+ return branchAdd32(cond, op1, dest);
}
Jump branchAdd32(ResultCondition cond, RegisterID src1, Address src2, RegisterID dest)
{
- move32IfNeeded(src1, dest);
- return branchAdd32(cond, src2, dest);
+ return branchAdd32(cond, src2, src1, dest);
}
Jump branchAdd32(ResultCondition cond, RegisterID src, TrustedImm32 imm, RegisterID dest)
@@ -2452,38 +2479,50 @@
static bool supportsFloatingPointRounding()
{
- if (s_sse4_1CheckState == CPUIDCheckState::NotChecked) {
- int flags = 0;
+ if (s_sse4_1CheckState == CPUIDCheckState::NotChecked)
+ updateEax1EcxFlags();
+ return s_sse4_1CheckState == CPUIDCheckState::Set;
+ }
+
+ static bool supportsAVX()
+ {
+ if (s_avxCheckState == CPUIDCheckState::NotChecked)
+ updateEax1EcxFlags();
+ return s_avxCheckState == CPUIDCheckState::Set;
+ }
+
+ static void updateEax1EcxFlags()
+ {
+ int flags = 0;
#if COMPILER(MSVC)
- int cpuInfo[4];
- __cpuid(cpuInfo, 0x1);
- flags = cpuInfo[2];
+ int cpuInfo[4];
+ __cpuid(cpuInfo, 0x1);
+ flags = cpuInfo[2];
#elif COMPILER(GCC_OR_CLANG)
#if CPU(X86_64)
- asm (
- "movl $0x1, %%eax;"
- "cpuid;"
- "movl %%ecx, %0;"
- : "=g" (flags)
- :
- : "%eax", "%ebx", "%ecx", "%edx"
- );
+ asm (
+ "movl $0x1, %%eax;"
+ "cpuid;"
+ "movl %%ecx, %0;"
+ : "=g" (flags)
+ :
+ : "%eax", "%ebx", "%ecx", "%edx"
+ );
#else
- asm (
- "movl $0x1, %%eax;"
- "pushl %%ebx;"
- "cpuid;"
- "popl %%ebx;"
- "movl %%ecx, %0;"
- : "=g" (flags)
- :
- : "%eax", "%ecx", "%edx"
- );
+ asm (
+ "movl $0x1, %%eax;"
+ "pushl %%ebx;"
+ "cpuid;"
+ "popl %%ebx;"
+ "movl %%ecx, %0;"
+ : "=g" (flags)
+ :
+ : "%eax", "%ecx", "%edx"
+ );
#endif
#endif // COMPILER(GCC_OR_CLANG)
- s_sse4_1CheckState = (flags & (1 << 19)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
- }
- return s_sse4_1CheckState == CPUIDCheckState::Set;
+ s_sse4_1CheckState = (flags & (1 << 19)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
+ s_avxCheckState = (flags & (1 << 28)) ? CPUIDCheckState::Set : CPUIDCheckState::Clear;
}
#if ENABLE(MASM_PROBE)
@@ -2731,7 +2770,8 @@
Clear,
Set
};
- static CPUIDCheckState s_sse4_1CheckState;
+ JS_EXPORT_PRIVATE static CPUIDCheckState s_sse4_1CheckState;
+ JS_EXPORT_PRIVATE static CPUIDCheckState s_avxCheckState;
static CPUIDCheckState s_lzcntCheckState;
};
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h (198872 => 198873)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h 2016-03-31 02:03:57 UTC (rev 198872)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h 2016-03-31 02:05:13 UTC (rev 198873)
@@ -922,16 +922,21 @@
return branchAdd64(cond, src1, dest);
}
- Jump branchAdd64(ResultCondition cond, Address src1, RegisterID src2, RegisterID dest)
+ Jump branchAdd64(ResultCondition cond, Address op1, RegisterID op2, RegisterID dest)
{
- move(src2, dest);
- return branchAdd64(cond, src1, dest);
+ if (op2 == dest)
+ return branchAdd64(cond, op1, dest);
+ if (op1.base == dest) {
+ load32(op1, dest);
+ return branchAdd64(cond, op2, dest);
+ }
+ move(op2, dest);
+ return branchAdd64(cond, op1, dest);
}
Jump branchAdd64(ResultCondition cond, RegisterID src1, Address src2, RegisterID dest)
{
- move(src1, dest);
- return branchAdd64(cond, src2, dest);
+ return branchAdd64(cond, src2, src1, dest);
}
Jump branchAdd64(ResultCondition cond, RegisterID src, RegisterID dest)
Modified: trunk/Source/_javascript_Core/assembler/X86Assembler.h (198872 => 198873)
--- trunk/Source/_javascript_Core/assembler/X86Assembler.h 2016-03-31 02:03:57 UTC (rev 198872)
+++ trunk/Source/_javascript_Core/assembler/X86Assembler.h 2016-03-31 02:05:13 UTC (rev 198873)
@@ -305,6 +305,17 @@
OP3_MFENCE = 0xF0,
} ThreeByteOpcodeID;
+ struct VexPrefix {
+ enum : uint8_t {
+ TwoBytes = 0xC5,
+ ThreeBytes = 0xC4
+ };
+ };
+ enum class VexImpliedBytes : uint8_t {
+ TwoBytesOp = 1,
+ ThreeBytesOp38 = 2,
+ ThreeBytesOp3A = 3
+ };
TwoByteOpcodeID cmovcc(Condition cond)
{
@@ -2087,24 +2098,44 @@
m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
+ void vaddsd_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigCommutativeTwoByteOp(PRE_SSE_F2, OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)a, (RegisterID)b);
+ }
+
void addsd_mr(int offset, RegisterID base, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void vaddsd_mr(int offset, RegisterID base, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F2, OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)b, base, offset);
+ }
+
void addss_rr(XMMRegisterID src, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
+ void vaddss_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigCommutativeTwoByteOp(PRE_SSE_F3, OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)a, (RegisterID)b);
+ }
+
void addss_mr(int offset, RegisterID base, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void vaddss_mr(int offset, RegisterID base, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F3, OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)b, base, offset);
+ }
+
#if !CPU(X86_64)
void addsd_mr(const void* address, XMMRegisterID dst)
{
@@ -2295,24 +2326,44 @@
m_formatter.twoByteOp(OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
+ void vmulsd_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigCommutativeTwoByteOp(PRE_SSE_F2, OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)a, (RegisterID)b);
+ }
+
void mulsd_mr(int offset, RegisterID base, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_MULSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void vmulsd_mr(int offset, RegisterID base, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F2, OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)b, base, offset);
+ }
+
void mulss_rr(XMMRegisterID src, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
+ void vmulss_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigCommutativeTwoByteOp(PRE_SSE_F3, OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)a, (RegisterID)b);
+ }
+
void mulss_mr(int offset, RegisterID base, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_MULSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void vmulss_mr(int offset, RegisterID base, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F3, OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)b, base, offset);
+ }
+
void pextrw_irr(int whichWord, XMMRegisterID src, RegisterID dst)
{
m_formatter.prefix(PRE_SSE_66);
@@ -3068,6 +3119,46 @@
putIntUnchecked(reinterpret_cast<int32_t>(address));
}
#endif
+ ALWAYS_INLINE void twoBytesVex(OneByteOpcodeID simdPrefix, RegisterID inOpReg, RegisterID r)
+ {
+ putByteUnchecked(VexPrefix::TwoBytes);
+
+ uint8_t secondByte = vexEncodeSimdPrefix(simdPrefix);
+ secondByte |= (~inOpReg & 0xf) << 3;
+ secondByte |= !regRequiresRex(r) << 7;
+ putByteUnchecked(secondByte);
+ }
+
+ ALWAYS_INLINE void threeBytesVexNds(OneByteOpcodeID simdPrefix, VexImpliedBytes impliedBytes, RegisterID r, RegisterID inOpReg, RegisterID b)
+ {
+ putByteUnchecked(VexPrefix::ThreeBytes);
+
+ uint8_t secondByte = static_cast<uint8_t>(impliedBytes);
+ secondByte |= !regRequiresRex(r) << 7;
+ secondByte |= 1 << 6; // REX.X
+ secondByte |= !regRequiresRex(b) << 5;
+ putByteUnchecked(secondByte);
+
+ uint8_t thirdByte = vexEncodeSimdPrefix(simdPrefix);
+ thirdByte |= (~inOpReg & 0xf) << 3;
+ putByteUnchecked(thirdByte);
+ }
+ private:
+ uint8_t vexEncodeSimdPrefix(OneByteOpcodeID simdPrefix)
+ {
+ switch (simdPrefix) {
+ case 0x66:
+ return 1;
+ case 0xF3:
+ return 2;
+ case 0xF2:
+ return 3;
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+ return 0;
+ }
+
};
// Word-sized operands / no operand instruction formatters.
@@ -3189,7 +3280,33 @@
writer.memoryModRM(reg, address);
}
#endif
+ void vexNdsLigWigCommutativeTwoByteOp(OneByteOpcodeID simdPrefix, TwoByteOpcodeID opcode, RegisterID dest, RegisterID a, RegisterID b)
+ {
+ SingleInstructionBufferWriter writer(m_buffer);
+ // Since this is a commutative operation, we can try switching the arguments.
+ if (regRequiresRex(b))
+ std::swap(a, b);
+
+ if (regRequiresRex(b))
+ writer.threeBytesVexNds(simdPrefix, VexImpliedBytes::TwoBytesOp, dest, a, b);
+ else
+ writer.twoBytesVex(simdPrefix, a, dest);
+ writer.putByteUnchecked(opcode);
+ writer.registerModRM(dest, b);
+ }
+
+ void vexNdsLigWigTwoByteOp(OneByteOpcodeID simdPrefix, TwoByteOpcodeID opcode, RegisterID dest, RegisterID a, RegisterID base, int offset)
+ {
+ SingleInstructionBufferWriter writer(m_buffer);
+ if (regRequiresRex(base))
+ writer.threeBytesVexNds(simdPrefix, VexImpliedBytes::TwoBytesOp, dest, a, base);
+ else
+ writer.twoBytesVex(simdPrefix, a, dest);
+ writer.putByteUnchecked(opcode);
+ writer.memoryModRM(dest, base, offset);
+ }
+
void threeByteOp(TwoByteOpcodeID twoBytePrefix, ThreeByteOpcodeID opcode)
{
SingleInstructionBufferWriter writer(m_buffer);
Modified: trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp (198872 => 198873)
--- trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp 2016-03-31 02:03:57 UTC (rev 198872)
+++ trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp 2016-03-31 02:05:13 UTC (rev 198873)
@@ -715,8 +715,13 @@
// over three operand forms.
if (left != right) {
+ ArgPromise leftAddr = loadPromise(left);
+ if (isValidForm(opcode, leftAddr.kind(), Arg::Tmp, Arg::Tmp)) {
+ append(opcode, leftAddr.consume(*this), tmp(right), result);
+ return;
+ }
+
if (commutativity == Commutative) {
- ArgPromise leftAddr = loadPromise(left);
if (isValidForm(opcode, leftAddr.kind(), Arg::Tmp)) {
append(relaxedMoveForType(m_value->type()), tmp(right), result);
append(opcode, leftAddr.consume(*this), result);
@@ -725,6 +730,10 @@
}
ArgPromise rightAddr = loadPromise(right);
+ if (isValidForm(opcode, Arg::Tmp, rightAddr.kind(), Arg::Tmp)) {
+ append(opcode, tmp(left), rightAddr.consume(*this), result);
+ return;
+ }
if (isValidForm(opcode, rightAddr.kind(), Arg::Tmp)) {
append(relaxedMoveForType(m_value->type()), tmp(left), result);
append(opcode, rightAddr.consume(*this), result);
Modified: trunk/Source/_javascript_Core/b3/air/AirInstInlines.h (198872 => 198873)
--- trunk/Source/_javascript_Core/b3/air/AirInstInlines.h 2016-03-31 02:03:57 UTC (rev 198872)
+++ trunk/Source/_javascript_Core/b3/air/AirInstInlines.h 2016-03-31 02:05:13 UTC (rev 198873)
@@ -180,17 +180,24 @@
case Or64:
case Xor32:
case Xor64:
- case AddDouble:
- case AddFloat:
case AndFloat:
case AndDouble:
- case MulDouble:
- case MulFloat:
case XorDouble:
case XorFloat:
if (args.size() == 3)
return 2;
break;
+ case AddDouble:
+ case AddFloat:
+ case MulDouble:
+ case MulFloat:
+#if CPU(X86) || CPU(X86_64)
+ if (MacroAssembler::supportsAVX())
+ return Nullopt;
+#endif
+ if (args.size() == 3)
+ return 2;
+ break;
case BranchAdd32:
case BranchAdd64:
if (args.size() == 4)
Modified: trunk/Source/_javascript_Core/b3/air/testair.cpp (198872 => 198873)
--- trunk/Source/_javascript_Core/b3/air/testair.cpp 2016-03-31 02:03:57 UTC (rev 198872)
+++ trunk/Source/_javascript_Core/b3/air/testair.cpp 2016-03-31 02:05:13 UTC (rev 198873)
@@ -1633,6 +1633,151 @@
CHECK(things[3] == 3);
}
+#if CPU(X86) || CPU(X86_64)
+void testX86VMULSD()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(FPRInfo::argumentFPR1), Tmp(FPRInfo::argumentFPR2));
+ root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDDestRex()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm15));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDOp1DestRex()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
+ root->append(MulDouble, nullptr, Tmp(X86Registers::xmm14), Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm15));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDOp2DestRex()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm14));
+ root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm15));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDOpsDestRex()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
+ root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm13));
+ root->append(MulDouble, nullptr, Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm13), Tmp(X86Registers::xmm15));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDAddr()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(GPRInfo::argumentGPR0), - 16), Tmp(FPRInfo::argumentFPR2));
+ root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ double secondArg = 4.2;
+ CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 2, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDAddrOpRexAddr()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
+ root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(X86Registers::r13), - 16), Tmp(FPRInfo::argumentFPR2));
+ root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ double secondArg = 4.2;
+ CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 2, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDDestRexAddr()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(GPRInfo::argumentGPR0), 16), Tmp(X86Registers::xmm15));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ double secondArg = 4.2;
+ CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 2, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDRegOpDestRexAddr()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14));
+ root->append(MulDouble, nullptr, Arg::addr(Tmp(GPRInfo::argumentGPR0)), Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm15));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ double secondArg = 4.2;
+ CHECK(compileAndRun<double>(proc, 2.4, &secondArg, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDAddrOpDestRexAddr()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
+ root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(X86Registers::r13), 8), Tmp(X86Registers::xmm15));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ double secondArg = 4.2;
+ CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, pureNaN()) == 2.4 * 4.2);
+}
+
+#endif
+
#define RUN(test) do { \
if (!shouldRun(#test)) \
break; \
@@ -1693,6 +1838,20 @@
RUN(testShuffleSwapDouble());
RUN(testShuffleShiftDouble());
+#if CPU(X86) || CPU(X86_64)
+ RUN(testX86VMULSD());
+ RUN(testX86VMULSDDestRex());
+ RUN(testX86VMULSDOp1DestRex());
+ RUN(testX86VMULSDOp2DestRex());
+ RUN(testX86VMULSDOpsDestRex());
+
+ RUN(testX86VMULSDAddr());
+ RUN(testX86VMULSDAddrOpRexAddr());
+ RUN(testX86VMULSDDestRexAddr());
+ RUN(testX86VMULSDRegOpDestRexAddr());
+ RUN(testX86VMULSDAddrOpDestRexAddr());
+#endif
+
if (tasks.isEmpty())
usage();