Reviewers: ulan, jochen (OOO Wed-Thu),

Message:
This speeds up Kraken by about 2.3-2.7%. Most of that improvement comes from
desaturate and fft.

Description:
ARM64: Use default-NaN mode to canonicalize NaNs.

BUG=

Please review this at https://codereview.chromium.org/255343004/

SVN Base: https://v8.googlecode.com/svn/branches/bleeding_edge

Affected files (+76, -14 lines):
  M src/arm64/code-stubs-arm64.cc
  M src/arm64/ic-arm64.cc
  M src/arm64/lithium-arm64.h
  M src/arm64/lithium-codegen-arm64.cc
  M src/arm64/macro-assembler-arm64.h
  M src/arm64/macro-assembler-arm64.cc
  M src/objects.h


Index: src/arm64/code-stubs-arm64.cc
diff --git a/src/arm64/code-stubs-arm64.cc b/src/arm64/code-stubs-arm64.cc
index 884be5800eeddc32b014eae4b21f249457578282..5bffd19c1a26c3c27a3c5adcb44e807e9d4781bb 100644
--- a/src/arm64/code-stubs-arm64.cc
+++ b/src/arm64/code-stubs-arm64.cc
@@ -1577,6 +1577,7 @@ void CEntryStub::Generate(MacroAssembler* masm) {
   //         jssp[8]:     Preserved x22 (used for argc).
   //         jssp[0]:     Preserved x21 (used for argv).
   __ Drop(x11);
+  __ AssertFPCRState();
   __ Ret();

   // The stack pointer is still csp if we aren't returning, and the frame
@@ -1660,6 +1661,11 @@ void JSEntryStub::GenerateBody(MacroAssembler* masm, bool is_construct) {
   __ Mov(jssp, csp);
   __ SetStackPointer(jssp);

+ // Configure the FPCR. We don't restore it, so this is technically not allowed + // according to AAPCS64. However, we only set default-NaN mode and this will
+  // be harmless for most C code. Also, it works for ARM.
+  __ ConfigureFPCR();
+
   ProfileEntryHookStub::MaybeCallEntryHook(masm);

   // Set up the reserved register for 0.0.
@@ -4519,7 +4525,7 @@ void StoreArrayLiteralElementStub::Generate(MacroAssembler* masm) {

   __ Bind(&double_elements);
   __ Ldr(x10, FieldMemOperand(array, JSObject::kElementsOffset));
-  __ StoreNumberToDoubleElements(value, index_smi, x10, x11, d0, d1,
+  __ StoreNumberToDoubleElements(value, index_smi, x10, x11, d0,
                                  &slow_elements);
   __ Ret();
 }
@@ -4621,6 +4627,7 @@ void DirectCEntryStub::Generate(MacroAssembler* masm) {
   __ Blr(x10);
   // Return to calling code.
   __ Peek(lr, 0);
+  __ AssertFPCRState();
   __ Ret();

   __ SetStackPointer(old_stack_pointer);
Index: src/arm64/ic-arm64.cc
diff --git a/src/arm64/ic-arm64.cc b/src/arm64/ic-arm64.cc
index a0a1f03113ba6d60ca82f2069e80a7e3344e53ab..c09b847ba5d469f4cf40177e29c4d605fb0b916a 100644
--- a/src/arm64/ic-arm64.cc
+++ b/src/arm64/ic-arm64.cc
@@ -1021,7 +1021,6 @@ static void KeyedStoreGenerateGenericHelper(
                                  elements,
                                  x10,
                                  d0,
-                                 d1,
                                  &transition_double_elements);
   if (increment_length == kIncrementLength) {
     // Add 1 to receiver->length.
Index: src/arm64/lithium-arm64.h
diff --git a/src/arm64/lithium-arm64.h b/src/arm64/lithium-arm64.h
index b036446746ce391c460b3ca08a081422e44492b7..da91bca7bd060291f969494d2dcba4550f3a37fa 100644
--- a/src/arm64/lithium-arm64.h
+++ b/src/arm64/lithium-arm64.h
@@ -2361,6 +2361,10 @@ class LStoreKeyed : public LTemplateInstruction<0, 3, T> {
   }

   bool NeedsCanonicalization() {
+    if (hydrogen()->value()->IsAdd() || hydrogen()->value()->IsSub() ||
+        hydrogen()->value()->IsMul() || hydrogen()->value()->IsDiv()) {
+      return false;
+    }
     return this->hydrogen()->NeedsCanonicalization();
   }
uint32_t additional_index() const { return this->hydrogen()->index_offset(); }
Index: src/arm64/lithium-codegen-arm64.cc
diff --git a/src/arm64/lithium-codegen-arm64.cc b/src/arm64/lithium-codegen-arm64.cc index 3ecd2403ba3b6aa0848006d90439a11b746e0bc3..10e0f080a4a6570f6ac897b798c777aab1b23934 100644
--- a/src/arm64/lithium-codegen-arm64.cc
+++ b/src/arm64/lithium-codegen-arm64.cc
@@ -5100,11 +5100,8 @@ void LCodeGen::DoStoreKeyedFixedDouble(LStoreKeyedFixedDouble* instr) {
   }

   if (instr->NeedsCanonicalization()) {
-    DoubleRegister dbl_scratch = double_scratch();
-    __ Fmov(dbl_scratch,
-            FixedDoubleArray::canonical_not_the_hole_nan_as_double());
-    __ Fmaxnm(dbl_scratch, dbl_scratch, value);
-    __ Str(dbl_scratch, FieldMemOperand(store_base, offset));
+    __ CanonicalizeNaN(double_scratch(), value);
+    __ Str(double_scratch(), FieldMemOperand(store_base, offset));
   } else {
     __ Str(value, FieldMemOperand(store_base, offset));
   }
Index: src/arm64/macro-assembler-arm64.cc
diff --git a/src/arm64/macro-assembler-arm64.cc b/src/arm64/macro-assembler-arm64.cc index 3f84321a5e7057781a478d76f11e1c8f3e76dfd9..4d277b72c966ad3e95efd279e7825ee4bcb7bd41 100644
--- a/src/arm64/macro-assembler-arm64.cc
+++ b/src/arm64/macro-assembler-arm64.cc
@@ -1222,6 +1222,58 @@ void MacroAssembler::AssertStackConsistency() {
 }


+void MacroAssembler::AssertFPCRState(Register fpcr) {
+  if (emit_debug_code()) {
+    Label unexpected_mode, done;
+    UseScratchRegisterScope temps(this);
+    if (fpcr.IsNone()) {
+      fpcr = temps.AcquireX();
+      Mrs(fpcr, FPCR);
+    }
+
+    // Assert that default-NaN mode is set.
+    Tbz(fpcr, DN_offset, &unexpected_mode);
+
+    // Assert that flush-to-zero is not set.
+    Tbnz(fpcr, FZ_offset, &unexpected_mode);
+
+    // Assert that the rounding mode is nearest-with-ties-to-even.
+    STATIC_ASSERT(FPTieEven == 0);
+    Tst(fpcr, RMode_mask);
+    B(eq, &done);
+
+    Bind(&unexpected_mode);
+    Abort(kUnexpectedFPCRMode);
+
+    Bind(&done);
+  }
+}
+
+
+void MacroAssembler::ConfigureFPCR() {
+  UseScratchRegisterScope temps(this);
+  Register fpcr = temps.AcquireX();
+  Mrs(fpcr, FPCR);
+
+ // If necessary, enable default-NaN mode. The default values of the other FPCR
+  // options should be suitable.
+  Label no_write_required;
+  Tbnz(fpcr, DN_offset, &no_write_required);
+
+  Orr(fpcr, fpcr, DN_mask);
+  Msr(FPCR, fpcr);
+
+  Bind(&no_write_required);
+  AssertFPCRState(fpcr);
+}
+
+
+void MacroAssembler::CanonicalizeNaN(const FPRegister& dst,
+                                     const FPRegister& src) {
+  Fsub(dst, src, fp_zero);
+}
+
+
 void MacroAssembler::LoadRoot(CPURegister destination,
                               Heap::RootListIndex index) {
   // TODO(jbramley): Most root values are constants, and can be synthesized
@@ -3888,7 +3940,6 @@ void MacroAssembler::StoreNumberToDoubleElements(Register value_reg,
                                                  Register elements_reg,
                                                  Register scratch1,
                                                  FPRegister fpscratch1,
-                                                 FPRegister fpscratch2,
                                                  Label* fail,
                                                  int elements_offset) {
   ASSERT(!AreAliased(value_reg, key_reg, elements_reg, scratch1));
@@ -3906,12 +3957,9 @@ void MacroAssembler::StoreNumberToDoubleElements(Register value_reg,
            fail, DONT_DO_SMI_CHECK);

   Ldr(fpscratch1, FieldMemOperand(value_reg, HeapNumber::kValueOffset));
- Fmov(fpscratch2, FixedDoubleArray::canonical_not_the_hole_nan_as_double());

-  // Check for NaN by comparing the number to itself: NaN comparison will
-  // report unordered, indicated by the overflow flag being set.
-  Fcmp(fpscratch1, fpscratch1);
-  Fcsel(fpscratch1, fpscratch2, fpscratch1, vs);
+  // Canonicalize NaNs.
+  CanonicalizeNaN(fpscratch1);

   // Store the result.
   Bind(&store_num);
Index: src/arm64/macro-assembler-arm64.h
diff --git a/src/arm64/macro-assembler-arm64.h b/src/arm64/macro-assembler-arm64.h index 965d1dbfd9d5ef125b40d84ad83063079d40fcc3..265a0cef1a5acf3f8b65b1d4ae74104a9521c009 100644
--- a/src/arm64/macro-assembler-arm64.h
+++ b/src/arm64/macro-assembler-arm64.h
@@ -788,6 +788,13 @@ class MacroAssembler : public Assembler {
   // Root register.
   inline void InitializeRootRegister();

+  void AssertFPCRState(Register fpcr = NoReg);
+  void ConfigureFPCR();
+  void CanonicalizeNaN(const FPRegister& dst, const FPRegister& src);
+  void CanonicalizeNaN(const FPRegister& reg) {
+    CanonicalizeNaN(reg, reg);
+  }
+
   // Load an object from the root table.
   void LoadRoot(CPURegister destination,
                 Heap::RootListIndex index);
@@ -1533,7 +1540,6 @@ class MacroAssembler : public Assembler {
                                    Register elements_reg,
                                    Register scratch1,
                                    FPRegister fpscratch1,
-                                   FPRegister fpscratch2,
                                    Label* fail,
                                    int elements_offset = 0);

Index: src/objects.h
diff --git a/src/objects.h b/src/objects.h
index 6402f86d341919e6f16fb3ba33db96d819ae5458..2b51b81c990990e3262608226d7edae960be6b9f 100644
--- a/src/objects.h
+++ b/src/objects.h
@@ -1333,6 +1333,7 @@ class MaybeObject BASE_EMBEDDED {
V(kUnexpectedNegativeValue, "Unexpected negative value") \ V(kUnexpectedNumberOfPreAllocatedPropertyFields, \ "Unexpected number of pre-allocated property fields") \ + V(kUnexpectedFPCRMode, "Unexpected FPCR mode.") \ V(kUnexpectedSmi, "Unexpected smi value") \ V(kUnexpectedStringFunction, "Unexpected String function") \ V(kUnexpectedStringType, "Unexpected string type") \


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to