Revision: 4934
Author: [email protected]
Date: Thu Jun 24 02:03:49 2010
Log: X64: Remove more fpu code. Unroll more local initialization loops.
Review URL: http://codereview.chromium.org/2815028
http://code.google.com/p/v8/source/detail?r=4934
Modified:
/branches/bleeding_edge/src/x64/assembler-x64.cc
/branches/bleeding_edge/src/x64/assembler-x64.h
/branches/bleeding_edge/src/x64/codegen-x64.cc
/branches/bleeding_edge/src/x64/ic-x64.cc
/branches/bleeding_edge/src/x64/virtual-frame-x64.cc
/branches/bleeding_edge/src/x64/virtual-frame-x64.h
=======================================
--- /branches/bleeding_edge/src/x64/assembler-x64.cc Wed Jun 23 07:05:18
2010
+++ /branches/bleeding_edge/src/x64/assembler-x64.cc Thu Jun 24 02:03:49
2010
@@ -2736,6 +2736,28 @@
emit(0x5A);
emit_sse_operand(dst, src);
}
+
+
+void Assembler::cvtsd2si(Register dst, XMMRegister src) {
+ EnsureSpace ensure_space(this);
+ last_pc_ = pc_;
+ emit(0xF2);
+ emit_optional_rex_32(dst, src);
+ emit(0x0F);
+ emit(0x2D);
+ emit_sse_operand(dst, src);
+}
+
+
+void Assembler::cvtsd2siq(Register dst, XMMRegister src) {
+ EnsureSpace ensure_space(this);
+ last_pc_ = pc_;
+ emit(0xF2);
+ emit_rex_64(dst, src);
+ emit(0x0F);
+ emit(0x2D);
+ emit_sse_operand(dst, src);
+}
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
=======================================
--- /branches/bleeding_edge/src/x64/assembler-x64.h Wed Jun 23 07:05:18 2010
+++ /branches/bleeding_edge/src/x64/assembler-x64.h Thu Jun 24 02:03:49 2010
@@ -1128,6 +1128,9 @@
void cvtss2sd(XMMRegister dst, const Operand& src);
void cvtsd2ss(XMMRegister dst, XMMRegister src);
+ void cvtsd2si(Register dst, XMMRegister src);
+ void cvtsd2siq(Register dst, XMMRegister src);
+
void addsd(XMMRegister dst, XMMRegister src);
void subsd(XMMRegister dst, XMMRegister src);
void mulsd(XMMRegister dst, XMMRegister src);
=======================================
--- /branches/bleeding_edge/src/x64/codegen-x64.cc Wed Jun 23 07:05:18 2010
+++ /branches/bleeding_edge/src/x64/codegen-x64.cc Thu Jun 24 02:03:49 2010
@@ -2641,7 +2641,7 @@
// Generate code to set the elements in the array that are not
// literals.
- for (int i = 0; i < node->values()->length(); i++) {
+ for (int i = 0; i < length; i++) {
Expression* value = node->values()->at(i);
// If value is a literal the property value is already set in the
=======================================
--- /branches/bleeding_edge/src/x64/ic-x64.cc Wed Jun 23 07:05:18 2010
+++ /branches/bleeding_edge/src/x64/ic-x64.cc Thu Jun 24 02:03:49 2010
@@ -791,7 +791,6 @@
// Allocate a HeapNumber for the int and perform int-to-double
// conversion.
- ASSERT(array_type == kExternalUnsignedIntArray);
// The value is zero-extended since we loaded the value from memory
// with movl.
__ cvtqsi2sd(xmm0, rcx);
@@ -1121,55 +1120,41 @@
// The WebGL specification leaves the behavior of storing NaN and
// +/-Infinity into integer arrays basically undefined. For more
// reproducible behavior, convert these to zero.
- __ fld_d(FieldOperand(rax, HeapNumber::kValueOffset));
+ __ movsd(xmm0, FieldOperand(rax, HeapNumber::kValueOffset));
__ movq(rbx, FieldOperand(rbx, ExternalArray::kExternalPointerOffset));
// rdi: untagged index
// rbx: base pointer of external storage
// top of FPU stack: value
if (array_type == kExternalFloatArray) {
- __ fstp_s(Operand(rbx, rdi, times_4, 0));
+ __ cvtsd2ss(xmm0, xmm0);
+ __ movss(Operand(rbx, rdi, times_4, 0), xmm0);
__ ret(0);
} else {
// Need to perform float-to-int conversion.
- // Test the top of the FP stack for NaN.
- Label is_nan;
- __ fucomi(0);
- __ j(parity_even, &is_nan);
-
- __ push(rdx); // Make room on the stack. Receiver is no longer
needed.
- // TODO(lrn): If the rounding of this conversion is not deliberate,
maybe
- // switch to xmm registers.
- __ fistp_d(Operand(rsp, 0));
- __ pop(rdx);
+ // Test the value for NaN.
+
+ // Convert to int32 and store the low byte/word.
+ // If the value is NaN or +/-infinity, the result is 0x80000000,
+ // which is automatically zero when taken mod 2^n, n < 32.
// rdx: value (converted to an untagged integer)
// rdi: untagged index
// rbx: base pointer of external storage
switch (array_type) {
case kExternalByteArray:
case kExternalUnsignedByteArray:
+ __ cvtsd2si(rdx, xmm0);
__ movb(Operand(rbx, rdi, times_1, 0), rdx);
break;
case kExternalShortArray:
case kExternalUnsignedShortArray:
+ __ cvtsd2si(rdx, xmm0);
__ movw(Operand(rbx, rdi, times_2, 0), rdx);
break;
case kExternalIntArray:
case kExternalUnsignedIntArray: {
- // We also need to explicitly check for +/-Infinity. These are
- // converted to MIN_INT, but we need to be careful not to
- // confuse with legal uses of MIN_INT. Since MIN_INT truncated
- // to 8 or 16 bits is zero, we only perform this test when storing
- // 32-bit ints.
- Label not_infinity;
- // This test would apparently detect both NaN and Infinity,
- // but we've already checked for NaN using the FPU hardware
- // above.
- __ movzxwq(rcx, FieldOperand(rax, HeapNumber::kValueOffset + 6));
- __ and_(rcx, Immediate(0x7FF0));
- __ cmpw(rcx, Immediate(0x7FF0));
- __ j(not_equal, ¬_infinity);
- __ movq(rdx, Immediate(0));
- __ bind(¬_infinity);
+ // Convert to int64, so that NaN and infinities become
+ // 0x8000000000000000, which is zero mod 2^32.
+ __ cvtsd2siq(rdx, xmm0);
__ movl(Operand(rbx, rdi, times_4, 0), rdx);
break;
}
@@ -1177,31 +1162,6 @@
UNREACHABLE();
break;
}
- __ ret(0);
-
- __ bind(&is_nan);
- // rdi: untagged index
- // rbx: base pointer of external storage
- __ ffree();
- __ fincstp();
- __ Set(rdx, 0);
- switch (array_type) {
- case kExternalByteArray:
- case kExternalUnsignedByteArray:
- __ movb(Operand(rbx, rdi, times_1, 0), rdx);
- break;
- case kExternalShortArray:
- case kExternalUnsignedShortArray:
- __ movw(Operand(rbx, rdi, times_2, 0), rdx);
- break;
- case kExternalIntArray:
- case kExternalUnsignedIntArray:
- __ movl(Operand(rbx, rdi, times_4, 0), rdx);
- break;
- default:
- UNREACHABLE();
- break;
- }
__ ret(0);
}
=======================================
--- /branches/bleeding_edge/src/x64/virtual-frame-x64.cc Tue Jun 22
03:07:57 2010
+++ /branches/bleeding_edge/src/x64/virtual-frame-x64.cc Thu Jun 24
02:03:49 2010
@@ -115,25 +115,45 @@
Handle<Object> undefined = Factory::undefined_value();
FrameElement initial_value =
FrameElement::ConstantElement(undefined, FrameElement::SYNCED);
- if (count == 1) {
- __ Push(undefined);
- } else if (count < kLocalVarBound) {
- // For less locals the unrolled loop is more compact.
- __ movq(kScratchRegister, undefined, RelocInfo::EMBEDDED_OBJECT);
+ if (count < kLocalVarBound) {
+ // For fewer locals the unrolled loop is more compact.
+
+ // Hope for one of the first eight registers, where the push
operation
+ // takes only one byte (kScratchRegister needs the REX.W bit).
+ Result tmp = cgen()->allocator()->Allocate();
+ ASSERT(tmp.is_valid());
+ __ movq(tmp.reg(), undefined, RelocInfo::EMBEDDED_OBJECT);
for (int i = 0; i < count; i++) {
- __ push(kScratchRegister);
+ __ push(tmp.reg());
}
} else {
// For more locals a loop in generated code is more compact.
Label alloc_locals_loop;
Result cnt = cgen()->allocator()->Allocate();
ASSERT(cnt.is_valid());
- __ movq(cnt.reg(), Immediate(count));
__ movq(kScratchRegister, undefined, RelocInfo::EMBEDDED_OBJECT);
+#ifdef DEBUG
+ Label loop_size;
+ __ bind(&loop_size);
+#endif
+ if (is_uint8(count)) {
+ // Loading imm8 is shorter than loading imm32.
+ // Loading only partial byte register, and using decb below.
+ __ movb(cnt.reg(), Immediate(count));
+ } else {
+ __ movl(cnt.reg(), Immediate(count));
+ }
__ bind(&alloc_locals_loop);
__ push(kScratchRegister);
- __ decl(cnt.reg());
+ if (is_uint8(count)) {
+ __ decb(cnt.reg());
+ } else {
+ __ decl(cnt.reg());
+ }
__ j(not_zero, &alloc_locals_loop);
+#ifdef DEBUG
+ CHECK(masm()->SizeOfCodeGeneratedSince(&loop_size) < kLocalVarBound);
+#endif
}
for (int i = 0; i < count; i++) {
elements_.Add(initial_value);
=======================================
--- /branches/bleeding_edge/src/x64/virtual-frame-x64.h Wed Jun 16 03:03:47
2010
+++ /branches/bleeding_edge/src/x64/virtual-frame-x64.h Thu Jun 24 02:03:49
2010
@@ -200,7 +200,7 @@
inline void PrepareForReturn();
// Number of local variables after when we use a loop for allocating.
- static const int kLocalVarBound = 7;
+ static const int kLocalVarBound = 14;
// Allocate and initialize the frame-allocated locals.
void AllocateStackSlots();
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev