Reviewers: William Hesse,
Description:
X64: Adding macro to load double from memory, and use SSE3 instruction if
present.
The SSE3 instruction loads the entire register, avoiding partial register
dependencies.
Please review this at http://codereview.chromium.org/6883159/
SVN Base: https://v8.googlecode.com/svn/branches/bleeding_edge
Affected files:
src/x64/assembler-x64.h
src/x64/assembler-x64.cc
M src/x64/code-stubs-x64.cc
M src/x64/codegen-x64.cc
M src/x64/deoptimizer-x64.cc
M src/x64/disasm-x64.cc
M src/x64/lithium-codegen-x64.cc
M src/x64/lithium-gap-resolver-x64.cc
M src/x64/macro-assembler-x64.h
M src/x64/macro-assembler-x64.cc
src/x64/stub-cache-x64.cc
Index: src/x64/assembler-x64.cc
diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc
index
c06bc0c4517581a50811fba07b389bd19e0bf1b0..288c8e2af5c40f9686106485eb3e84a84def4ebf
100644
--- a/src/x64/assembler-x64.cc
+++ b/src/x64/assembler-x64.cc
@@ -2657,6 +2657,17 @@ void Assembler::movapd(XMMRegister dst, XMMRegister
src) {
}
+void Assembler::movddup(XMMRegister dst, const Operand& src) {
+ ASSERT(CpuFeatures::IsEnabled(SSE3));
+ EnsureSpace ensure_space(this);
+ emit(0xF2);
+ emit_optional_rex_32(dst, src);
+ emit(0x0F);
+ emit(0x12);
+ emit_sse_operand(dst, src);
+}
+
+
void Assembler::movss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0xF3); // single
Index: src/x64/assembler-x64.h
diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h
index
8a9938ba09c84cee769ed182b8fbc3ce59d795f5..a79d75ed7f114652b44ee3b45461f4367a5ce792
100644
--- a/src/x64/assembler-x64.h
+++ b/src/x64/assembler-x64.h
@@ -1312,6 +1312,8 @@ class Assembler : public AssemblerBase {
void movss(XMMRegister dst, const Operand& src);
void movss(const Operand& dst, XMMRegister src);
+ void movddup(XMMRegister dst, const Operand& src);
+
void cvttss2si(Register dst, const Operand& src);
void cvttss2si(Register dst, XMMRegister src);
void cvttsd2si(Register dst, const Operand& src);
Index: src/x64/code-stubs-x64.cc
diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc
index
76fcc88cbfad2b30df277c6ae50859fce69f8921..fdcfa14649859542c581f2470e96ffb8ccdbb40a
100644
--- a/src/x64/code-stubs-x64.cc
+++ b/src/x64/code-stubs-x64.cc
@@ -1032,7 +1032,7 @@ void
TranscendentalCacheStub::Generate(MacroAssembler* masm) {
__ fstp(0); // Clear FPU stack.
__ ret(kPointerSize);
} else { // UNTAGGED.
- __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
__ Ret();
}
@@ -1052,7 +1052,7 @@ void
TranscendentalCacheStub::Generate(MacroAssembler* masm) {
if (tagged) {
__ ret(kPointerSize);
} else { // UNTAGGED.
- __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
__ Ret();
// Skip cache and return answer directly, only in untagged case.
@@ -1062,7 +1062,7 @@ void
TranscendentalCacheStub::Generate(MacroAssembler* masm) {
__ fld_d(Operand(rsp, 0));
GenerateOperation(masm);
__ fstp_d(Operand(rsp, 0));
- __ movsd(xmm1, Operand(rsp, 0));
+ __ LoadDbl(xmm1, Operand(rsp, 0));
__ addq(rsp, Immediate(kDoubleSize));
// We return the value in xmm1 without adding it to the cache, but
// we cause a scavenging GC so that future allocations will succeed.
@@ -1090,7 +1090,7 @@ void
TranscendentalCacheStub::Generate(MacroAssembler* masm) {
__ push(rax);
__ CallRuntime(RuntimeFunction(), 1);
__ LeaveInternalFrame();
- __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
__ Ret();
}
}
@@ -1359,11 +1359,11 @@ void
FloatingPointHelper::LoadSSE2NumberOperands(MacroAssembler* masm) {
Label load_smi_rdx, load_nonsmi_rax, load_smi_rax, done;
// Load operand in rdx into xmm0.
__ JumpIfSmi(rdx, &load_smi_rdx);
- __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
// Load operand in rax into xmm1.
__ JumpIfSmi(rax, &load_smi_rax);
__ bind(&load_nonsmi_rax);
- __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
__ jmp(&done);
__ bind(&load_smi_rdx);
@@ -1387,14 +1387,14 @@ void
FloatingPointHelper::LoadSSE2UnknownOperands(MacroAssembler* masm,
__ JumpIfSmi(rdx, &load_smi_rdx);
__ cmpq(FieldOperand(rdx, HeapObject::kMapOffset), rcx);
__ j(not_equal, not_numbers); // Argument in rdx is not a number.
- __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
// Load operand in rax into xmm1, or branch to not_numbers.
__ JumpIfSmi(rax, &load_smi_rax);
__ bind(&load_nonsmi_rax);
__ cmpq(FieldOperand(rax, HeapObject::kMapOffset), rcx);
__ j(not_equal, not_numbers);
- __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
__ jmp(&done);
__ bind(&load_smi_rdx);
@@ -1428,7 +1428,7 @@ void
FloatingPointHelper::NumbersToSmis(MacroAssembler* masm,
__ cmpq(FieldOperand(first, HeapObject::kMapOffset), heap_number_map);
__ j(not_equal, on_not_smis);
// Convert HeapNumber to smi if possible.
- __ movsd(xmm0, FieldOperand(first, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(first, HeapNumber::kValueOffset));
__ movq(scratch2, xmm0);
__ cvttsd2siq(smi_result, xmm0);
// Check if conversion was successful by converting back and
@@ -1449,7 +1449,7 @@ void
FloatingPointHelper::NumbersToSmis(MacroAssembler* masm,
__ cmpq(FieldOperand(second, HeapObject::kMapOffset), heap_number_map);
__ j(not_equal, on_not_smis);
// Convert second to smi, if possible.
- __ movsd(xmm0, FieldOperand(second, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(second, HeapNumber::kValueOffset));
__ movq(scratch2, xmm0);
__ cvttsd2siq(smi_result, xmm0);
__ cvtlsi2sd(xmm1, smi_result);
@@ -1583,7 +1583,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
Heap::kHeapNumberMapRootIndex);
__ j(not_equal, &call_runtime);
- __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
// Optimized version of pow if exponent is a smi.
// xmm0 contains the base.
@@ -1633,7 +1633,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ CompareRoot(FieldOperand(rax, HeapObject::kMapOffset),
Heap::kHeapNumberMapRootIndex);
__ j(not_equal, &call_runtime);
- __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
// Test if exponent is nan.
__ ucomisd(xmm1, xmm1);
__ j(parity_even, &call_runtime);
@@ -1654,7 +1654,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
__ cmpl(rcx, Immediate(HeapNumber::kExponentMask));
// base is NaN or +/-Infinity
__ j(greater_equal, &call_runtime);
- __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
// base is in xmm0 and exponent is in xmm1.
__ bind(&handle_special_cases);
@@ -2381,8 +2381,8 @@ void
NumberToStringStub::GenerateLookupNumberStringCache(MacroAssembler* masm,
times_1,
FixedArray::kHeaderSize));
__ JumpIfSmi(probe, not_found);
- __ movsd(xmm0, FieldOperand(object, HeapNumber::kValueOffset));
- __ movsd(xmm1, FieldOperand(probe, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(object, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(probe, HeapNumber::kValueOffset));
__ ucomisd(xmm0, xmm1);
__ j(parity_even, not_found); // Bail out if NaN is involved.
__ j(not_equal, not_found); // The cache did not contain this value.
@@ -2524,7 +2524,7 @@ void CompareStub::Generate(MacroAssembler* masm) {
// greater-equal. Return -1 for them, so the comparison yields
// false for all conditions except not-equal.
__ Set(rax, EQUAL);
- __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
__ ucomisd(xmm0, xmm0);
__ setcc(parity_even, rax);
// rax is 0 for equal non-NaN heapnumbers, 1 for NaNs.
@@ -4467,8 +4467,8 @@ void
ICCompareStub::GenerateHeapNumbers(MacroAssembler* masm) {
__ j(not_equal, &miss);
// Load left and right operand
- __ movsd(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
- __ movsd(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(rdx, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(rax, HeapNumber::kValueOffset));
// Compare operands
__ ucomisd(xmm0, xmm1);
Index: src/x64/codegen-x64.cc
diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc
index
f8f2d6e687896f4150e9a3068dbb280693cce6c9..4af3d7bd9f985b984e7fe89890c61a968506d6f9
100644
--- a/src/x64/codegen-x64.cc
+++ b/src/x64/codegen-x64.cc
@@ -110,13 +110,13 @@ ModuloFunction CreateModuloFunction() {
int64_t kNaNValue = V8_INT64_C(0x7ff8000000000000);
__ movq(rcx, kNaNValue, RelocInfo::NONE);
__ movq(Operand(rsp, kPointerSize), rcx);
- __ movsd(xmm0, Operand(rsp, kPointerSize));
+ __ LoadDbl(xmm0, Operand(rsp, kPointerSize));
__ jmp(&return_result);
// If result is valid, return that.
__ bind(&valid_result);
__ fstp_d(Operand(rsp, kPointerSize));
- __ movsd(xmm0, Operand(rsp, kPointerSize));
+ __ LoadDbl(xmm0, Operand(rsp, kPointerSize));
// Clean up FPU stack and exceptions and return xmm0
__ bind(&return_result);
Index: src/x64/deoptimizer-x64.cc
diff --git a/src/x64/deoptimizer-x64.cc b/src/x64/deoptimizer-x64.cc
index
abac2b6b38c8979cd7dccdc31a45c4eca187ccae..20650065768e7e92f891939ea9f3064a83f8d23c
100644
--- a/src/x64/deoptimizer-x64.cc
+++ b/src/x64/deoptimizer-x64.cc
@@ -756,7 +756,7 @@ void Deoptimizer::EntryGenerator::Generate() {
for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; ++i) {
XMMRegister xmm_reg = XMMRegister::FromAllocationIndex(i);
int src_offset = i * kDoubleSize + double_regs_offset;
- __ movsd(xmm_reg, Operand(rbx, src_offset));
+ __ LoadDbl(xmm_reg, Operand(rbx, src_offset));
}
}
Index: src/x64/disasm-x64.cc
diff --git a/src/x64/disasm-x64.cc b/src/x64/disasm-x64.cc
index
82bc6ef93b932933c0219a5ae5aab277dc4210f8..876950ae7cb1ced9e9bf08add28a43c1ca964220
100644
--- a/src/x64/disasm-x64.cc
+++ b/src/x64/disasm-x64.cc
@@ -1098,6 +1098,12 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte*
data) {
AppendToBuffer("%s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
}
+ } else if (opcode == 0x12) {
+ // movddup xmm1, xmm2/m64
+ int mod, regop, rm;
+ get_modrm(*current, &mod, ®op, &rm);
+ AppendToBuffer("movddup %s,", NameOfXMMRegister(regop));
+ current += PrintRightXMMOperand(current);
} else if (opcode == 0x2A) {
// CVTSI2SD: integer to XMM double conversion.
int mod, regop, rm;
Index: src/x64/lithium-codegen-x64.cc
diff --git a/src/x64/lithium-codegen-x64.cc b/src/x64/lithium-codegen-x64.cc
index
688a557ee8710819abb682b569a939f45819b4cc..4f3eed3b7b8c6b584228530d067659b6b058c1d9
100644
--- a/src/x64/lithium-codegen-x64.cc
+++ b/src/x64/lithium-codegen-x64.cc
@@ -2833,7 +2833,7 @@ void LCodeGen::DoPower(LPower* instr) {
__ bind(&non_smi);
__ CmpObjectType(right_reg, HEAP_NUMBER_TYPE , kScratchRegister);
DeoptimizeIf(not_equal, instr->environment());
- __ movsd(xmm1, FieldOperand(right_reg, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm1, FieldOperand(right_reg, HeapNumber::kValueOffset));
__ bind(&call);
__ PrepareCallCFunction(2);
@@ -3436,7 +3436,7 @@ void LCodeGen::EmitNumberUntagD(Register input_reg,
// Heap number to XMM conversion.
__ bind(&heap_number);
- __ movsd(result_reg, FieldOperand(input_reg, HeapNumber::kValueOffset));
+ __ LoadDbl(result_reg, FieldOperand(input_reg,
HeapNumber::kValueOffset));
__ jmp(&done);
// Smi to XMM conversion
@@ -3476,7 +3476,7 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr)
{
__ bind(&heap_number);
- __ movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
__ cvttsd2siq(input_reg, xmm0);
__ Set(kScratchRegister, V8_UINT64_C(0x8000000000000000));
__ cmpq(input_reg, kScratchRegister);
@@ -3486,7 +3486,7 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr)
{
DeoptimizeIf(not_equal, instr->environment());
XMMRegister xmm_temp = ToDoubleRegister(instr->TempAt(0));
- __ movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
__ cvttsd2si(input_reg, xmm0);
__ cvtlsi2sd(xmm_temp, input_reg);
__ ucomisd(xmm0, xmm_temp);
Index: src/x64/lithium-gap-resolver-x64.cc
diff --git a/src/x64/lithium-gap-resolver-x64.cc
b/src/x64/lithium-gap-resolver-x64.cc
index
c3c617c456d03c40a6c1dcbc627116aae78115f1..c6d317ffe86b26d6e4fcb4d8337cb8dbd89a85b2
100644
--- a/src/x64/lithium-gap-resolver-x64.cc
+++ b/src/x64/lithium-gap-resolver-x64.cc
@@ -222,10 +222,10 @@ void LGapResolver::EmitMove(int index) {
} else if (source->IsDoubleStackSlot()) {
Operand src = cgen_->ToOperand(source);
if (destination->IsDoubleRegister()) {
- __ movsd(cgen_->ToDoubleRegister(destination), src);
+ __ LoadDbl(cgen_->ToDoubleRegister(destination), src);
} else {
ASSERT(destination->IsDoubleStackSlot());
- __ movsd(xmm0, src);
+ __ LoadDbl(xmm0, src);
__ movsd(cgen_->ToOperand(destination), xmm0);
}
} else {
@@ -264,7 +264,7 @@ void LGapResolver::EmitSwap(int index) {
// Swap two stack slots or two double stack slots.
Operand src = cgen_->ToOperand(source);
Operand dst = cgen_->ToOperand(destination);
- __ movsd(xmm0, src);
+ __ LoadDbl(xmm0, src);
__ movq(kScratchRegister, dst);
__ movsd(dst, xmm0);
__ movq(src, kScratchRegister);
@@ -287,7 +287,7 @@ void LGapResolver::EmitSwap(int index) {
LOperand* other = source->IsDoubleRegister() ? destination : source;
ASSERT(other->IsDoubleStackSlot());
Operand other_operand = cgen_->ToOperand(other);
- __ movsd(xmm0, other_operand);
+ __ LoadDbl(xmm0, other_operand);
__ movsd(other_operand, reg);
__ movsd(reg, xmm0);
Index: src/x64/macro-assembler-x64.cc
diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc
index
339420679184b15ead2d8a4921d07368eeb86106..26c6e3324b20102565245cbb7f5bc50fd38c0bee
100644
--- a/src/x64/macro-assembler-x64.cc
+++ b/src/x64/macro-assembler-x64.cc
@@ -2260,7 +2260,7 @@ void MacroAssembler::LeaveExitFrame(bool
save_doubles) {
int offset = -2 * kPointerSize;
for (int i = 0; i < XMMRegister::kNumAllocatableRegisters; i++) {
XMMRegister reg = XMMRegister::FromAllocationIndex(i);
- movsd(reg, Operand(rbp, offset - ((i + 1) * kDoubleSize)));
+ LoadDbl(reg, Operand(rbp, offset - ((i + 1) * kDoubleSize)));
}
}
// Get the return address from the stack and restore the frame pointer.
@@ -2894,6 +2894,16 @@ void MacroAssembler::CallCFunction(Register
function, int num_arguments) {
}
+void MacroAssembler::LoadDbl(XMMRegister dst, const Operand& src) {
+ if (CpuFeatures::IsSupported(SSE3)) {
+ CpuFeatures::Scope enable(SSE3);
+ movddup(dst, src);
+ } else {
+ movsd(dst, src);
+ }
+}
+
+
CodePatcher::CodePatcher(byte* address, int size)
: address_(address),
size_(size),
Index: src/x64/macro-assembler-x64.h
diff --git a/src/x64/macro-assembler-x64.h b/src/x64/macro-assembler-x64.h
index
4c177205b6be4f143933f751678641b9dbc14f52..7f8846af652d87825758be7c63699f712f634fb0
100644
--- a/src/x64/macro-assembler-x64.h
+++ b/src/x64/macro-assembler-x64.h
@@ -928,6 +928,11 @@ class MacroAssembler: public Assembler {
// function and map can be the same.
void LoadGlobalFunctionInitialMap(Register function, Register map);
+ // Loads a double value from memory to the low half of an XMMRegister.
+ // May or may not change the top half of the destionation register,
+ // depending on the opcode used.
+ void LoadDbl(XMMRegister dst, const Operand& src);
+
//
---------------------------------------------------------------------------
// Runtime calls
Index: src/x64/stub-cache-x64.cc
diff --git a/src/x64/stub-cache-x64.cc b/src/x64/stub-cache-x64.cc
index
ef6f7583882016c5133b4706b3cf4047aab6bf79..35f659b7bf069d54b5f59d95f563351f77037f6a
100644
--- a/src/x64/stub-cache-x64.cc
+++ b/src/x64/stub-cache-x64.cc
@@ -3391,7 +3391,7 @@ MaybeObject*
ExternalArrayStubCompiler::CompileKeyedStoreStub(
// The WebGL specification leaves the behavior of storing NaN and
// +/-Infinity into integer arrays basically undefined. For more
// reproducible behavior, convert these to zero.
- __ movsd(xmm0, FieldOperand(rax, HeapNumber::kValueOffset));
+ __ LoadDbl(xmm0, FieldOperand(rax, HeapNumber::kValueOffset));
__ movq(rbx, FieldOperand(rbx, ExternalArray::kExternalPointerOffset));
// rdi: untagged index
// rbx: base pointer of external storage
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev