[v8-dev] [v8] r13054 committed - Faster implementation of Math.exp()...

codesite-noreply Mon, 26 Nov 2012 05:13:47 -0800

Revision: 13054
Author:   [email protected]
Date:     Mon Nov 26 05:12:35 2012
Log:      Faster implementation of Math.exp()


Review URL: https://codereview.chromium.org/11418149
http://code.google.com/p/v8/source/detail?r=13054

Added:
 /branches/bleeding_edge/test/mjsunit/math-exp-precision.js
Modified:
 /branches/bleeding_edge/src/arm/codegen-arm.cc
 /branches/bleeding_edge/src/arm/codegen-arm.h
 /branches/bleeding_edge/src/arm/lithium-arm.cc
 /branches/bleeding_edge/src/arm/lithium-arm.h
 /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc
 /branches/bleeding_edge/src/arm/simulator-arm.cc
 /branches/bleeding_edge/src/arm/simulator-arm.h
 /branches/bleeding_edge/src/assembler.cc
 /branches/bleeding_edge/src/assembler.h
 /branches/bleeding_edge/src/codegen.h
 /branches/bleeding_edge/src/flag-definitions.h
 /branches/bleeding_edge/src/hydrogen-instructions.h
 /branches/bleeding_edge/src/hydrogen.cc
 /branches/bleeding_edge/src/ia32/assembler-ia32.cc
 /branches/bleeding_edge/src/ia32/assembler-ia32.h
 /branches/bleeding_edge/src/ia32/code-stubs-ia32.cc
 /branches/bleeding_edge/src/ia32/codegen-ia32.cc
 /branches/bleeding_edge/src/ia32/codegen-ia32.h
 /branches/bleeding_edge/src/ia32/disasm-ia32.cc
 /branches/bleeding_edge/src/ia32/lithium-codegen-ia32.cc
 /branches/bleeding_edge/src/ia32/lithium-ia32.cc
 /branches/bleeding_edge/src/ia32/lithium-ia32.h
 /branches/bleeding_edge/src/platform-posix.cc
 /branches/bleeding_edge/src/platform-win32.cc
 /branches/bleeding_edge/src/platform.h
 /branches/bleeding_edge/src/runtime.cc
 /branches/bleeding_edge/src/v8.cc
 /branches/bleeding_edge/src/x64/assembler-x64.cc
 /branches/bleeding_edge/src/x64/assembler-x64.h
 /branches/bleeding_edge/src/x64/code-stubs-x64.cc
 /branches/bleeding_edge/src/x64/codegen-x64.cc
 /branches/bleeding_edge/src/x64/codegen-x64.h
 /branches/bleeding_edge/src/x64/lithium-codegen-x64.cc
 /branches/bleeding_edge/src/x64/lithium-x64.cc
 /branches/bleeding_edge/src/x64/lithium-x64.h

=======================================
--- /dev/null

+++ /branches/bleeding_edge/test/mjsunit/math-exp-precision.js Mon Nov 2605:12:35 2012

@@ -0,0 +1,64 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Tests that the --fast-math implementation of Math.exp() has
+// reasonable precision.
+
+function exp(x) {
+  return Math.exp(x);
+}
+
+var first_call_result = exp(Math.PI);
+var second_call_result = exp(Math.PI);
+
+function assertAlmostEquals(expected, actual, x) {
+  if (expected == 0 && actual == 0) return;  // OK
+  if (expected == Number.POSITIVE_INFINITY &&
+      actual == Number.POSITIVE_INFINITY) {
+    return;  // OK
+  }
+  relative_diff = Math.abs(expected/actual - 1);

+ assertTrue(relative_diff < 1e-12, "relative difference of " +relative_diff +

+                                    " for input " + x);
+}
+
+var increment = Math.PI / 35;  // Roughly 0.1, but we want to try many
+                               // different mantissae.
+for (var x = -708; x < 710; x += increment) {
+  var ex = exp(x);
+  var reference = Math.pow(Math.E, x);
+  assertAlmostEquals(reference, ex, x);
+  if (ex > 0 && isFinite(ex)) {
+    var back = Math.log(ex);
+    assertAlmostEquals(x, back, x + " (backwards)");
+  }
+}
+
+// Make sure optimizing the function does not alter the result.
+var last_call_result = exp(Math.PI);
+assertEquals(first_call_result, second_call_result);
+assertEquals(first_call_result, last_call_result);
=======================================
--- /branches/bleeding_edge/src/arm/codegen-arm.cc      Thu Nov 15 05:31:27 2012
+++ /branches/bleeding_edge/src/arm/codegen-arm.cc      Mon Nov 26 05:12:35 2012
@@ -31,11 +31,11 @@

 #include "codegen.h"
 #include "macro-assembler.h"
+#include "simulator-arm.h"

 namespace v8 {
 namespace internal {

-#define __ ACCESS_MASM(masm)

UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Typetype) {

   switch (type) {
@@ -47,6 +47,74 @@
   }
   return NULL;
 }
+
+
+#define __ masm.
+
+
+#if defined(USE_SIMULATOR)
+byte* fast_exp_arm_machine_code = NULL;
+double fast_exp_simulator(double x) {
+  return Simulator::current(Isolate::Current())->CallFP(
+      fast_exp_arm_machine_code, x, 0);
+}
+#endif
+
+
+UnaryMathFunction CreateExpFunction() {
+  if (!CpuFeatures::IsSupported(VFP2)) return &exp;
+  if (!FLAG_fast_math) return &exp;
+  size_t actual_size;

+ byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size,true));

+  if (buffer == NULL) return &exp;
+  ExternalReference::InitializeMathExpData();
+
+  MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+
+  {
+    CpuFeatures::Scope use_vfp(VFP2);
+    DoubleRegister input = d0;
+    DoubleRegister result = d1;
+    DoubleRegister double_scratch1 = d2;
+    DoubleRegister double_scratch2 = d3;
+    Register temp1 = r4;
+    Register temp2 = r5;
+    Register temp3 = r6;
+
+    if (masm.use_eabi_hardfloat()) {
+      // Input value is in d0 anyway, nothing to do.
+    } else {
+      __ vmov(input, r0, r1);
+    }
+    __ Push(temp3, temp2, temp1);
+    MathExpGenerator::EmitMathExp(
+        &masm, input, result, double_scratch1, double_scratch2,
+        temp1, temp2, temp3);
+    __ Pop(temp3, temp2, temp1);
+    if (masm.use_eabi_hardfloat()) {
+      __ vmov(d0, result);
+    } else {
+      __ vmov(r0, r1, result);
+    }
+    __ Ret();
+  }
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+
+  CPU::FlushICache(buffer, actual_size);
+  OS::ProtectCode(buffer, actual_size);
+
+#if !defined(USE_SIMULATOR)
+  return FUNCTION_CAST<UnaryMathFunction>(buffer);
+#else
+  fast_exp_arm_machine_code = buffer;
+  return &fast_exp_simulator;
+#endif
+}
+
+
+#undef __


 UnaryMathFunction CreateSqrtFunction() {
@@ -73,6 +141,8 @@

//-------------------------------------------------------------------------

 // Code generators

+#define __ ACCESS_MASM(masm)
+
 void ElementsTransitionGenerator::GenerateMapChangeElementsTransition(
     MacroAssembler* masm) {
   // ----------- S t a t e -------------
@@ -449,6 +519,78 @@
   __ ldrb(result, MemOperand(string, index));
   __ bind(&done);
 }
+
+
+static MemOperand ExpConstant(int index, Register base) {
+  return MemOperand(base, index * kDoubleSize);
+}
+
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   DoubleRegister input,
+                                   DoubleRegister result,
+                                   DoubleRegister double_scratch1,
+                                   DoubleRegister double_scratch2,
+                                   Register temp1,
+                                   Register temp2,
+                                   Register temp3) {
+  ASSERT(!input.is(result));
+  ASSERT(!input.is(double_scratch1));
+  ASSERT(!input.is(double_scratch2));
+  ASSERT(!result.is(double_scratch1));
+  ASSERT(!result.is(double_scratch2));
+  ASSERT(!double_scratch1.is(double_scratch2));
+  ASSERT(!temp1.is(temp2));
+  ASSERT(!temp1.is(temp3));
+  ASSERT(!temp2.is(temp3));
+  ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
+
+  Label done;
+
+  __ mov(temp3, Operand(ExternalReference::math_exp_constants(0)));
+
+  __ vldr(double_scratch1, ExpConstant(0, temp3));
+  __ vmov(result, kDoubleRegZero);
+  __ VFPCompareAndSetFlags(double_scratch1, input);
+  __ b(ge, &done);
+  __ vldr(double_scratch2, ExpConstant(1, temp3));
+  __ VFPCompareAndSetFlags(input, double_scratch2);
+  __ vldr(result, ExpConstant(2, temp3));
+  __ b(ge, &done);
+  __ vldr(double_scratch1, ExpConstant(3, temp3));
+  __ vldr(result, ExpConstant(4, temp3));
+  __ vmul(double_scratch1, double_scratch1, input);
+  __ vadd(double_scratch1, double_scratch1, result);
+  __ vmov(temp2, temp1, double_scratch1);
+  __ vsub(double_scratch1, double_scratch1, result);
+  __ vldr(result, ExpConstant(6, temp3));
+  __ vldr(double_scratch2, ExpConstant(5, temp3));
+  __ vmul(double_scratch1, double_scratch1, double_scratch2);
+  __ vsub(double_scratch1, double_scratch1, input);
+  __ vsub(result, result, double_scratch1);
+  __ vmul(input, double_scratch1, double_scratch1);
+  __ vmul(result, result, input);
+  __ mov(temp1, Operand(temp2, LSR, 11));
+  __ vldr(double_scratch2, ExpConstant(7, temp3));
+  __ vmul(result, result, double_scratch2);
+  __ vsub(result, result, double_scratch1);
+  __ vldr(double_scratch2, ExpConstant(8, temp3));
+  __ vadd(result, result, double_scratch2);
+  __ movw(ip, 0x7ff);
+  __ and_(temp2, temp2, Operand(ip));
+  __ add(temp1, temp1, Operand(0x3ff));
+  __ mov(temp1, Operand(temp1, LSL, 20));
+
+  // Must not call ExpConstant() after overwriting temp3!
+  __ mov(temp3, Operand(ExternalReference::math_exp_log_table()));
+  __ ldr(ip, MemOperand(temp3, temp2, LSL, 3));
+  __ add(temp3, temp3, Operand(kPointerSize));
+  __ ldr(temp2, MemOperand(temp3, temp2, LSL, 3));
+  __ orr(temp1, temp1, temp2);
+  __ vmov(input, ip, temp1);
+  __ vmul(result, result, input);
+  __ bind(&done);
+}

 #undef __

=======================================
--- /branches/bleeding_edge/src/arm/codegen-arm.h       Thu Nov  8 04:18:11 2012
+++ /branches/bleeding_edge/src/arm/codegen-arm.h       Mon Nov 26 05:12:35 2012
@@ -91,6 +91,22 @@
   DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
 };

+
+class MathExpGenerator : public AllStatic {
+ public:
+  static void EmitMathExp(MacroAssembler* masm,
+                          DoubleRegister input,
+                          DoubleRegister result,
+                          DoubleRegister double_scratch1,
+                          DoubleRegister double_scratch2,
+                          Register temp1,
+                          Register temp2,
+                          Register temp3);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
+};
+
 } }  // namespace v8::internal

 #endif  // V8_ARM_CODEGEN_ARM_H_
=======================================
--- /branches/bleeding_edge/src/arm/lithium-arm.cc      Thu Nov 22 02:19:05 2012
+++ /branches/bleeding_edge/src/arm/lithium-arm.cc      Mon Nov 26 05:12:35 2012
@@ -295,6 +295,11 @@
   stream->Add("/%s ", hydrogen()->OpName());
   value()->PrintTo(stream);
 }
+
+
+void LMathExp::PrintDataTo(StringStream* stream) {
+  value()->PrintTo(stream);
+}


 void LLoadContextSlot::PrintDataTo(StringStream* stream) {
@@ -1041,6 +1046,15 @@
     LOperand* input = UseFixedDouble(instr->value(), d2);

LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input,NULL);

     return MarkAsCall(DefineFixedDouble(result, d2), instr);
+  } else if (op == kMathExp) {
+    ASSERT(instr->representation().IsDouble());
+    ASSERT(instr->value()->representation().IsDouble());
+    LOperand* input = UseTempRegister(instr->value());
+    LOperand* temp1 = TempRegister();
+    LOperand* temp2 = TempRegister();
+    LOperand* double_temp = FixedTemp(d3);  // Chosen by fair dice roll.

+ LMathExp* result = new(zone()) LMathExp(input, double_temp, temp1,temp2);

+    return DefineAsRegister(result);
   } else if (op == kMathPowHalf) {
     LOperand* input = UseFixedDouble(instr->value(), d2);
     LOperand* temp = FixedTemp(d3);
=======================================
--- /branches/bleeding_edge/src/arm/lithium-arm.h       Thu Nov 22 02:19:05 2012
+++ /branches/bleeding_edge/src/arm/lithium-arm.h       Mon Nov 26 05:12:35 2012
@@ -131,6 +131,7 @@
   V(LoadNamedFieldPolymorphic)                  \
   V(LoadNamedGeneric)                           \
   V(MapEnumLength)                              \
+  V(MathExp)                                    \
   V(MathFloorOfDiv)                             \
   V(MathMinMax)                                 \
   V(ModI)                                       \
@@ -681,6 +682,30 @@
 };


+class LMathExp: public LTemplateInstruction<1, 1, 3> {
+ public:
+  LMathExp(LOperand* value,
+           LOperand* double_temp,
+           LOperand* temp1,
+           LOperand* temp2) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    temps_[2] = double_temp;
+    ExternalReference::InitializeMathExpData();
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* temp1() { return temps_[0]; }
+  LOperand* temp2() { return temps_[1]; }
+  LOperand* double_temp() { return temps_[2]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
+
+  virtual void PrintDataTo(StringStream* stream);
+};
+
+
 class LCmpObjectEqAndBranch: public LControlInstruction<2, 0> {
  public:
   LCmpObjectEqAndBranch(LOperand* left, LOperand* right) {
=======================================

--- /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc Fri Nov 2302:53:03 2012+++ /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc Mon Nov 2605:12:35 2012

@@ -3805,6 +3805,20 @@

__ CallCFunction(ExternalReference::random_uint32_function(isolate()),1);

   // Return value is in r0.
 }
+
+
+void LCodeGen::DoMathExp(LMathExp* instr) {
+  DoubleRegister input = ToDoubleRegister(instr->value());
+  DoubleRegister result = ToDoubleRegister(instr->result());
+  DoubleRegister double_scratch1 = ToDoubleRegister(instr->double_temp());
+  DoubleRegister double_scratch2 = double_scratch0();
+  Register temp1 = ToRegister(instr->temp1());
+  Register temp2 = ToRegister(instr->temp2());
+
+  MathExpGenerator::EmitMathExp(
+      masm(), input, result, double_scratch1, double_scratch2,
+      temp1, temp2, scratch0());
+}


 void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
=======================================

--- /branches/bleeding_edge/src/arm/simulator-arm.cc Wed Nov 14 03:01:182012+++ /branches/bleeding_edge/src/arm/simulator-arm.cc Mon Nov 26 05:12:352012

@@ -3301,33 +3301,7 @@
 }


-int32_t Simulator::Call(byte* entry, int argument_count, ...) {
-  va_list parameters;
-  va_start(parameters, argument_count);
-  // Set up arguments
-
-  // First four arguments passed in registers.
-  ASSERT(argument_count >= 4);
-  set_register(r0, va_arg(parameters, int32_t));
-  set_register(r1, va_arg(parameters, int32_t));
-  set_register(r2, va_arg(parameters, int32_t));
-  set_register(r3, va_arg(parameters, int32_t));
-
-  // Remaining arguments passed on stack.
-  int original_stack = get_register(sp);
-  // Compute position of stack on entry to generated code.

- int entry_stack = (original_stack - (argument_count - 4) *sizeof(int32_t));

-  if (OS::ActivationFrameAlignment() != 0) {
-    entry_stack &= -OS::ActivationFrameAlignment();
-  }
-  // Store remaining arguments on stack, from low to high memory.
-  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
-  for (int i = 4; i < argument_count; i++) {
-    stack_argument[i - 4] = va_arg(parameters, int32_t);
-  }
-  va_end(parameters);
-  set_register(sp, entry_stack);
-
+void Simulator::CallInternal(byte* entry) {
   // Prepare to execute the code at entry
   set_register(pc, reinterpret_cast<int32_t>(entry));

// Put down marker for end of simulation. The simulator will stopsimulation

@@ -3381,6 +3355,37 @@
   set_register(r9, r9_val);
   set_register(r10, r10_val);
   set_register(r11, r11_val);
+}
+
+
+int32_t Simulator::Call(byte* entry, int argument_count, ...) {
+  va_list parameters;
+  va_start(parameters, argument_count);
+  // Set up arguments
+
+  // First four arguments passed in registers.
+  ASSERT(argument_count >= 4);
+  set_register(r0, va_arg(parameters, int32_t));
+  set_register(r1, va_arg(parameters, int32_t));
+  set_register(r2, va_arg(parameters, int32_t));
+  set_register(r3, va_arg(parameters, int32_t));
+
+  // Remaining arguments passed on stack.
+  int original_stack = get_register(sp);
+  // Compute position of stack on entry to generated code.

+ int entry_stack = (original_stack - (argument_count - 4) *sizeof(int32_t));

+  if (OS::ActivationFrameAlignment() != 0) {
+    entry_stack &= -OS::ActivationFrameAlignment();
+  }
+  // Store remaining arguments on stack, from low to high memory.
+  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
+  for (int i = 4; i < argument_count; i++) {
+    stack_argument[i - 4] = va_arg(parameters, int32_t);
+  }
+  va_end(parameters);
+  set_register(sp, entry_stack);
+
+  CallInternal(entry);

   // Pop stack passed arguments.
   CHECK_EQ(entry_stack, get_register(sp));
@@ -3389,6 +3394,27 @@
   int32_t result = get_register(r0);
   return result;
 }
+
+
+double Simulator::CallFP(byte* entry, double d0, double d1) {
+  if (use_eabi_hardfloat()) {
+    set_d_register_from_double(0, d0);
+    set_d_register_from_double(1, d1);
+  } else {
+    int buffer[2];
+    ASSERT(sizeof(buffer[0]) * 2 == sizeof(d0));
+    memcpy(buffer, &d0, sizeof(d0));
+    set_dw_register(0, buffer);
+    memcpy(buffer, &d1, sizeof(d1));
+    set_dw_register(2, buffer);
+  }
+  CallInternal(entry);
+  if (use_eabi_hardfloat()) {
+    return get_double_from_d_register(0);
+  } else {
+    return get_double_from_register_pair(0);
+  }
+}


 uintptr_t Simulator::PushAddress(uintptr_t address) {
=======================================
--- /branches/bleeding_edge/src/arm/simulator-arm.h     Mon Aug  6 07:28:27 2012
+++ /branches/bleeding_edge/src/arm/simulator-arm.h     Mon Nov 26 05:12:35 2012
@@ -205,6 +205,8 @@

// generated RegExp code with 7 parameters. This is a conveniencefunction,

   // which sets up the simulator state and grabs the result on return.
   int32_t Call(byte* entry, int argument_count, ...);
+  // Alternative: call a 2-argument double function.
+  double CallFP(byte* entry, double d0, double d1);

   // Push an address onto the JS stack.
   uintptr_t PushAddress(uintptr_t address);
@@ -356,6 +358,8 @@
   template<class InputType, int register_size>
       void SetVFPRegister(int reg_index, const InputType& value);

+  void CallInternal(byte* entry);
+
   // Architecture state.
   // Saturating instructions require a Q flag to indicate saturation.

// There is currently no way to read the CPSR directly, and thus readthe Q

=======================================
--- /branches/bleeding_edge/src/assembler.cc    Thu Nov 22 06:59:52 2012
+++ /branches/bleeding_edge/src/assembler.cc    Mon Nov 26 05:12:35 2012
@@ -103,6 +103,11 @@

const char* const RelocInfo::kFillerCommentString = "DEOPTIMIZATIONPADDING";


+static bool math_exp_data_initialized = false;
+static Mutex* math_exp_data_mutex = NULL;
+static double* math_exp_constants_array = NULL;
+static double* math_exp_log_table_array = NULL;
+

//-----------------------------------------------------------------------------

 // Implementation of AssemblerBase

@@ -836,9 +841,73 @@
   double_constants.canonical_non_hole_nan = OS::nan_value();
   double_constants.the_hole_nan = BitCast<double>(kHoleNanInt64);
   double_constants.negative_infinity = -V8_INFINITY;
+
+  math_exp_data_mutex = OS::CreateMutex();
 }


+void ExternalReference::InitializeMathExpData() {
+  // Early return?
+  if (math_exp_data_initialized) return;
+
+  math_exp_data_mutex->Lock();
+  if (!math_exp_data_initialized) {
+    // If this is changed, generated code must be adapted too.
+    const int kTableSizeBits = 11;
+    const int kTableSize = 1 << kTableSizeBits;
+    const double kTableSizeDouble = static_cast<double>(kTableSize);
+
+    math_exp_constants_array = new double[9];
+    // Input values smaller than this always return 0.
+    math_exp_constants_array[0] = -708.39641853226408;
+    // Input values larger than this always return +Infinity.
+    math_exp_constants_array[1] = 709.78271289338397;
+    math_exp_constants_array[2] = V8_INFINITY;
+    // The rest is black magic. Do not attempt to understand it. It is
+    // loosely based on the "expd" function published at:

+ //http://herumi.blogspot.com/2011/08/fast-double-precision-exponential.html

+    const double constant3 = (1 << kTableSizeBits) / log(2.0);
+    math_exp_constants_array[3] = constant3;
+    math_exp_constants_array[4] =
+        static_cast<double>(static_cast<int64_t>(3) << 51);
+    math_exp_constants_array[5] = 1 / constant3;
+    math_exp_constants_array[6] = 3.0000000027955394;
+    math_exp_constants_array[7] = 0.16666666685227835;
+    math_exp_constants_array[8] = 1;
+
+    math_exp_log_table_array = new double[kTableSize];
+    for (int i = 0; i < kTableSize; i++) {
+      double value = pow(2, i / kTableSizeDouble);
+
+      uint64_t bits = BitCast<uint64_t, double>(value);
+      bits &= (static_cast<uint64_t>(1) << 52) - 1;
+      double mantissa = BitCast<double, uint64_t>(bits);
+
+      // <just testing>
+      uint64_t doublebits;
+      memcpy(&doublebits, &value, sizeof doublebits);
+      doublebits &= (static_cast<uint64_t>(1) << 52) - 1;
+      double mantissa2;
+      memcpy(&mantissa2, &doublebits, sizeof mantissa2);
+      CHECK_EQ(mantissa, mantissa2);
+      // </just testing>
+
+      math_exp_log_table_array[i] = mantissa;
+    }
+
+    math_exp_data_initialized = true;
+  }
+  math_exp_data_mutex->Unlock();
+}
+
+
+void ExternalReference::TearDownMathExpData() {
+  delete[] math_exp_constants_array;
+  delete[] math_exp_log_table_array;
+  delete math_exp_data_mutex;
+}
+
+

ExternalReference::ExternalReference(Builtins::CFunctionId id, Isolate*isolate)

   : address_(Redirect(isolate, Builtins::c_function_address(id))) {}

@@ -1271,6 +1340,19 @@
                                     FUNCTION_ADDR(math_log_double),
                                     BUILTIN_FP_CALL));
 }
+
+

+ExternalReference ExternalReference::math_exp_constants(intconstant_index) {

+  ASSERT(math_exp_data_initialized);
+  return ExternalReference(
+      reinterpret_cast<void*>(math_exp_constants_array + constant_index));
+}
+
+
+ExternalReference ExternalReference::math_exp_log_table() {
+  ASSERT(math_exp_data_initialized);

+ returnExternalReference(reinterpret_cast<void*>(math_exp_log_table_array));

+}


 ExternalReference ExternalReference::page_flags(Page* page) {
=======================================
--- /branches/bleeding_edge/src/assembler.h     Thu Nov 22 06:59:52 2012
+++ /branches/bleeding_edge/src/assembler.h     Mon Nov 26 05:12:35 2012
@@ -604,6 +604,8 @@
   };

   static void SetUp();
+  static void InitializeMathExpData();
+  static void TearDownMathExpData();

   typedef void* ExternalReferenceRedirector(void* original, Type type);

@@ -725,6 +727,9 @@
   static ExternalReference math_tan_double_function(Isolate* isolate);
   static ExternalReference math_log_double_function(Isolate* isolate);

+  static ExternalReference math_exp_constants(int constant_index);
+  static ExternalReference math_exp_log_table();
+
   static ExternalReference page_flags(Page* page);

   Address address() const {return reinterpret_cast<Address>(address_);}
=======================================
--- /branches/bleeding_edge/src/codegen.h       Wed May 23 07:24:29 2012
+++ /branches/bleeding_edge/src/codegen.h       Mon Nov 26 05:12:35 2012
@@ -90,6 +90,7 @@
 typedef double (*UnaryMathFunction)(double x);

UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Typetype);

+UnaryMathFunction CreateExpFunction();
 UnaryMathFunction CreateSqrtFunction();


=======================================
--- /branches/bleeding_edge/src/flag-definitions.h      Thu Nov 22 05:04:11 2012
+++ /branches/bleeding_edge/src/flag-definitions.h      Mon Nov 26 05:12:35 2012
@@ -181,6 +181,7 @@
 DEFINE_int(max_inlined_nodes_cumulative, 196,

"maximum cumulative number of AST nodes considered forinlining")

 DEFINE_bool(loop_invariant_code_motion, true, "loop invariant code motion")

+DEFINE_bool(fast_math, true, "faster (but maybe less accurate) mathfunctions")

 DEFINE_bool(collect_megamorphic_maps_from_stub_cache,
             true,
             "crankshaft harvests type feedback from stub cache")
=======================================

--- /branches/bleeding_edge/src/hydrogen-instructions.h Fri Nov 23 02:53:032012+++ /branches/bleeding_edge/src/hydrogen-instructions.h Mon Nov 26 05:12:352012

@@ -2084,6 +2084,9 @@
         set_representation(Representation::Double());
         SetGVNFlag(kChangesNewSpacePromotion);
         break;
+      case kMathExp:
+        set_representation(Representation::Double());
+        break;
       default:
         UNREACHABLE();
     }
@@ -2110,6 +2113,7 @@
         case kMathSqrt:
         case kMathPowHalf:
         case kMathLog:
+        case kMathExp:
         case kMathSin:
         case kMathCos:
         case kMathTan:
=======================================
--- /branches/bleeding_edge/src/hydrogen.cc     Fri Nov 23 02:53:03 2012
+++ /branches/bleeding_edge/src/hydrogen.cc     Mon Nov 26 05:12:35 2012
@@ -7253,6 +7253,9 @@
   if (!expr->target()->shared()->HasBuiltinFunctionId()) return false;
   BuiltinFunctionId id = expr->target()->shared()->builtin_function_id();
   switch (id) {
+    case kMathExp:
+      if (!FLAG_fast_math) break;
+      // Fall through if FLAG_fast_math.
     case kMathRound:
     case kMathAbs:
     case kMathSqrt:
@@ -7313,6 +7316,9 @@
         return true;
       }
       break;
+    case kMathExp:
+      if (!FLAG_fast_math) break;
+      // Fall through if FLAG_fast_math.
     case kMathRound:
     case kMathFloor:
     case kMathAbs:
=======================================

--- /branches/bleeding_edge/src/ia32/assembler-ia32.cc Thu Nov 22 02:28:292012+++ /branches/bleeding_edge/src/ia32/assembler-ia32.cc Mon Nov 26 05:12:352012

@@ -1964,6 +1964,16 @@
   EMIT(0x58);
   emit_sse_operand(dst, src);
 }
+
+
+void Assembler::addsd(XMMRegister dst, const Operand& src) {
+  ASSERT(CpuFeatures::IsEnabled(SSE2));
+  EnsureSpace ensure_space(this);
+  EMIT(0xF2);
+  EMIT(0x0F);
+  EMIT(0x58);
+  emit_sse_operand(dst, src);
+}


 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
@@ -1974,6 +1984,16 @@
   EMIT(0x59);
   emit_sse_operand(dst, src);
 }
+
+
+void Assembler::mulsd(XMMRegister dst, const Operand& src) {
+  ASSERT(CpuFeatures::IsEnabled(SSE2));
+  EnsureSpace ensure_space(this);
+  EMIT(0xF2);
+  EMIT(0x0F);
+  EMIT(0x59);
+  emit_sse_operand(dst, src);
+}


 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
@@ -2372,7 +2392,7 @@
 }


-void Assembler::pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle) {
+void Assembler::pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
   ASSERT(CpuFeatures::IsEnabled(SSE2));
   EnsureSpace ensure_space(this);
   EMIT(0x66);
=======================================

--- /branches/bleeding_edge/src/ia32/assembler-ia32.h Thu Nov 22 02:28:292012+++ /branches/bleeding_edge/src/ia32/assembler-ia32.h Mon Nov 26 05:12:352012

@@ -990,8 +990,10 @@
   void cvtsd2ss(XMMRegister dst, XMMRegister src);

   void addsd(XMMRegister dst, XMMRegister src);
+  void addsd(XMMRegister dst, const Operand& src);
   void subsd(XMMRegister dst, XMMRegister src);
   void mulsd(XMMRegister dst, XMMRegister src);
+  void mulsd(XMMRegister dst, const Operand& src);
   void divsd(XMMRegister dst, XMMRegister src);
   void xorpd(XMMRegister dst, XMMRegister src);
   void xorps(XMMRegister dst, XMMRegister src);
@@ -1048,7 +1050,7 @@
   void psllq(XMMRegister dst, XMMRegister src);
   void psrlq(XMMRegister reg, int8_t shift);
   void psrlq(XMMRegister dst, XMMRegister src);
-  void pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle);
+  void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
   void pextrd(Register dst, XMMRegister src, int8_t offset) {
     pextrd(Operand(dst), src, offset);
   }
=======================================

--- /branches/bleeding_edge/src/ia32/code-stubs-ia32.cc Wed Nov 21 23:05:202012+++ /branches/bleeding_edge/src/ia32/code-stubs-ia32.cc Mon Nov 26 05:12:352012

@@ -3115,10 +3115,10 @@
     // F2XM1 calculates 2^st(0) - 1 for -1 < st(0) < 1
     __ f2xm1();    // 2^(X-rnd(X)) - 1, rnd(X)
     __ fld1();     // 1, 2^(X-rnd(X)) - 1, rnd(X)
-    __ faddp(1);   // 1, 2^(X-rnd(X)), rnd(X)
+    __ faddp(1);   // 2^(X-rnd(X)), rnd(X)
     // FSCALE calculates st(0) * 2^st(1)
     __ fscale();   // 2^X, rnd(X)
-    __ fstp(1);
+    __ fstp(1);    // 2^X
     // Bail out to runtime in case of exceptions in the status word.
     __ fnstsw_ax();
     __ test_b(eax, 0x5F);  // We check for all but precision exception.
=======================================

--- /branches/bleeding_edge/src/ia32/codegen-ia32.cc Thu Nov 15 05:31:272012+++ /branches/bleeding_edge/src/ia32/codegen-ia32.cc Mon Nov 26 05:12:352012

@@ -100,6 +100,43 @@
   OS::ProtectCode(buffer, actual_size);
   return FUNCTION_CAST<UnaryMathFunction>(buffer);
 }
+
+
+UnaryMathFunction CreateExpFunction() {
+  if (!CpuFeatures::IsSupported(SSE2)) return &exp;
+  if (!FLAG_fast_math) return &exp;
+  size_t actual_size;

+ byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size,true));

+  if (buffer == NULL) return &exp;
+  ExternalReference::InitializeMathExpData();
+
+  MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+  // esp[1 * kPointerSize]: raw double input
+  // esp[0 * kPointerSize]: return address
+  {
+    CpuFeatures::Scope use_sse2(SSE2);
+    XMMRegister input = xmm1;
+    XMMRegister result = xmm2;
+    __ movdbl(input, Operand(esp, 1 * kPointerSize));
+    __ push(eax);
+    __ push(ebx);
+
+    MathExpGenerator::EmitMathExp(&masm, input, result, xmm0, eax, ebx);
+
+    __ pop(ebx);
+    __ pop(eax);
+    __ movdbl(Operand(esp, 1 * kPointerSize), result);
+    __ fld_d(Operand(esp, 1 * kPointerSize));
+    __ Ret();
+  }
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+
+  CPU::FlushICache(buffer, actual_size);
+  OS::ProtectCode(buffer, actual_size);
+  return FUNCTION_CAST<UnaryMathFunction>(buffer);
+}


 UnaryMathFunction CreateSqrtFunction() {
@@ -754,6 +791,63 @@
                                   SeqOneByteString::kHeaderSize));
   __ bind(&done);
 }
+
+
+static Operand ExpConstant(int index) {

+ returnOperand::StaticVariable(ExternalReference::math_exp_constants(index));

+}
+
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   XMMRegister input,
+                                   XMMRegister result,
+                                   XMMRegister double_scratch,
+                                   Register temp1,
+                                   Register temp2) {
+  ASSERT(!input.is(double_scratch));
+  ASSERT(!input.is(result));
+  ASSERT(!result.is(double_scratch));
+  ASSERT(!temp1.is(temp2));
+  ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
+
+  Label done;
+
+  __ movdbl(double_scratch, ExpConstant(0));
+  __ xorpd(result, result);
+  __ ucomisd(double_scratch, input);
+  __ j(above_equal, &done);
+  __ ucomisd(input, ExpConstant(1));
+  __ movdbl(result, ExpConstant(2));
+  __ j(above_equal, &done);
+  __ movdbl(double_scratch, ExpConstant(3));
+  __ movdbl(result, ExpConstant(4));
+  __ mulsd(double_scratch, input);
+  __ addsd(double_scratch, result);
+  __ movd(temp2, double_scratch);
+  __ subsd(double_scratch, result);
+  __ movdbl(result, ExpConstant(6));
+  __ mulsd(double_scratch, ExpConstant(5));
+  __ subsd(double_scratch, input);
+  __ subsd(result, double_scratch);
+  __ movsd(input, double_scratch);
+  __ mulsd(input, double_scratch);
+  __ mulsd(result, input);
+  __ mov(temp1, temp2);
+  __ mulsd(result, ExpConstant(7));
+  __ subsd(result, double_scratch);
+  __ add(temp1, Immediate(0x1ff800));
+  __ addsd(result, ExpConstant(8));
+  __ and_(temp2, Immediate(0x7ff));
+  __ shr(temp1, 11);
+  __ shl(temp1, 20);
+  __ movd(input, temp1);

+ __ pshufd(input, input, static_cast<uint8_t>(0xe1)); // Order: 11 10 0001

+  __ movdbl(double_scratch, Operand::StaticArray(
+      temp2, times_8, ExternalReference::math_exp_log_table()));
+  __ por(input, double_scratch);
+  __ mulsd(result, input);
+  __ bind(&done);
+}

 #undef __

=======================================
--- /branches/bleeding_edge/src/ia32/codegen-ia32.h     Thu Nov  8 04:18:11 2012
+++ /branches/bleeding_edge/src/ia32/codegen-ia32.h     Mon Nov 26 05:12:35 2012
@@ -92,6 +92,20 @@
   DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
 };

+
+class MathExpGenerator : public AllStatic {
+ public:
+  static void EmitMathExp(MacroAssembler* masm,
+                          XMMRegister input,
+                          XMMRegister result,
+                          XMMRegister double_scratch,
+                          Register temp1,
+                          Register temp2);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
+};
+
 } }  // namespace v8::internal

 #endif  // V8_IA32_CODEGEN_IA32_H_
=======================================
--- /branches/bleeding_edge/src/ia32/disasm-ia32.cc     Wed Sep 19 05:48:49 2012
+++ /branches/bleeding_edge/src/ia32/disasm-ia32.cc     Mon Nov 26 05:12:35 2012
@@ -869,6 +869,7 @@
     case 0xAF: return "imul";
     case 0xA5: return "shld";
     case 0xAD: return "shrd";
+    case 0xAC: return "shrd";  // 3-operand version.
     case 0xAB: return "bts";
     default: return NULL;
   }
=======================================

--- /branches/bleeding_edge/src/ia32/lithium-codegen-ia32.cc Fri Nov 2302:53:03 2012+++ /branches/bleeding_edge/src/ia32/lithium-codegen-ia32.cc Mon Nov 2605:12:35 2012

@@ -3607,6 +3607,16 @@
   __ add(Operand(esp), Immediate(kDoubleSize));
   __ bind(&done);
 }
+
+
+void LCodeGen::DoMathExp(LMathExp* instr) {
+  XMMRegister input = ToDoubleRegister(instr->value());
+  XMMRegister result = ToDoubleRegister(instr->result());
+  Register temp1 = ToRegister(instr->temp1());
+  Register temp2 = ToRegister(instr->temp2());
+
+  MathExpGenerator::EmitMathExp(masm(), input, result, xmm0, temp1, temp2);
+}


 void LCodeGen::DoMathTan(LUnaryMathOperation* instr) {
=======================================

--- /branches/bleeding_edge/src/ia32/lithium-ia32.cc Thu Nov 22 02:19:052012+++ /branches/bleeding_edge/src/ia32/lithium-ia32.cc Mon Nov 26 05:12:352012

@@ -297,6 +297,11 @@
   stream->Add("/%s ", hydrogen()->OpName());
   value()->PrintTo(stream);
 }
+
+
+void LMathExp::PrintDataTo(StringStream* stream) {
+  value()->PrintTo(stream);
+}


 void LMathPowHalf::PrintDataTo(StringStream* stream) {
@@ -1087,6 +1092,14 @@
     LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(context,
                                                                   input);
     return DefineSameAsFirst(result);
+  } else if (op == kMathExp) {
+    ASSERT(instr->representation().IsDouble());
+    ASSERT(instr->value()->representation().IsDouble());
+    LOperand* value = UseTempRegister(instr->value());
+    LOperand* temp1 = TempRegister();
+    LOperand* temp2 = TempRegister();
+    LMathExp* result = new(zone()) LMathExp(value, temp1, temp2);
+    return DefineAsRegister(result);
   } else if (op == kMathSin || op == kMathCos || op == kMathTan) {
     LOperand* context = UseFixed(instr->context(), esi);
     LOperand* input = UseFixedDouble(instr->value(), xmm1);
=======================================
--- /branches/bleeding_edge/src/ia32/lithium-ia32.h     Thu Nov 22 02:19:05 2012
+++ /branches/bleeding_edge/src/ia32/lithium-ia32.h     Mon Nov 26 05:12:35 2012
@@ -125,6 +125,7 @@
   V(LoadNamedFieldPolymorphic)                  \
   V(LoadNamedGeneric)                           \
   V(MapEnumLength)                              \
+  V(MathExp)                                    \
   V(MathFloorOfDiv)                             \
   V(MathMinMax)                                 \
   V(MathPowHalf)                                \
@@ -639,6 +640,27 @@
 };


+class LMathExp: public LTemplateInstruction<1, 1, 2> {
+ public:
+  LMathExp(LOperand* value,
+           LOperand* temp1,
+           LOperand* temp2) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    ExternalReference::InitializeMathExpData();
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* temp1() { return temps_[0]; }
+  LOperand* temp2() { return temps_[1]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
+
+  virtual void PrintDataTo(StringStream* stream);
+};
+
+
 class LMathPowHalf: public LTemplateInstruction<1, 2, 1> {
  public:
   LMathPowHalf(LOperand* context, LOperand* value, LOperand* temp) {
=======================================
--- /branches/bleeding_edge/src/platform-posix.cc       Tue Jul 10 05:52:36 2012
+++ /branches/bleeding_edge/src/platform-posix.cc       Mon Nov 26 05:12:35 2012
@@ -142,11 +142,19 @@

UNARY_MATH_FUNCTION(cos,CreateTranscendentalFunction(TranscendentalCache::COS))UNARY_MATH_FUNCTION(tan,CreateTranscendentalFunction(TranscendentalCache::TAN))UNARY_MATH_FUNCTION(log,CreateTranscendentalFunction(TranscendentalCache::LOG))

+UNARY_MATH_FUNCTION(exp, CreateExpFunction())
 UNARY_MATH_FUNCTION(sqrt, CreateSqrtFunction())

 #undef MATH_FUNCTION


+void lazily_initialize_fast_exp() {
+  if (fast_exp_function == NULL) {
+    init_fast_exp_function();
+  }
+}
+
+
 double OS::nan_value() {
   // NAN from math.h is defined in C99 and not in POSIX.
   return NAN;
@@ -332,6 +340,7 @@
   init_fast_cos_function();
   init_fast_tan_function();
   init_fast_log_function();
+  // fast_exp is initialized lazily.
   init_fast_sqrt_function();
 }

=======================================
--- /branches/bleeding_edge/src/platform-win32.cc       Fri Nov 16 02:38:10 2012
+++ /branches/bleeding_edge/src/platform-win32.cc       Mon Nov 26 05:12:35 2012
@@ -199,11 +199,19 @@

+UNARY_MATH_FUNCTION(exp, CreateExpFunction())
 UNARY_MATH_FUNCTION(sqrt, CreateSqrtFunction())

 #undef MATH_FUNCTION


+void lazily_initialize_fast_exp() {
+  if (fast_exp_function == NULL) {
+    init_fast_exp_function();
+  }
+}
+
+
 void MathSetup() {
 #ifdef _WIN64
   init_modulo_function();
@@ -212,6 +220,7 @@
   init_fast_cos_function();
   init_fast_tan_function();
   init_fast_log_function();
+  // fast_exp is initialized lazily.
   init_fast_sqrt_function();
 }

=======================================
--- /branches/bleeding_edge/src/platform.h      Fri Nov 16 02:38:10 2012
+++ /branches/bleeding_edge/src/platform.h      Mon Nov 26 05:12:35 2012
@@ -119,12 +119,16 @@
 double ceiling(double x);
 double modulo(double x, double y);

-// Custom implementation of sin, cos, tan and log.
+// Custom implementation of math functions.
 double fast_sin(double input);
 double fast_cos(double input);
 double fast_tan(double input);
 double fast_log(double input);
+double fast_exp(double input);
 double fast_sqrt(double input);
+// The custom exp implementation needs 16KB of lookup data; initialize it
+// on demand.
+void lazily_initialize_fast_exp();

 // Forward declarations.
 class Socket;
=======================================
--- /branches/bleeding_edge/src/runtime.cc      Thu Nov 22 02:25:22 2012
+++ /branches/bleeding_edge/src/runtime.cc      Mon Nov 26 05:12:35 2012
@@ -7153,7 +7153,8 @@
   isolate->counters()->math_exp()->Increment();

   CONVERT_DOUBLE_ARG_CHECKED(x, 0);
-  return isolate->transcendental_cache()->Get(TranscendentalCache::EXP, x);
+  lazily_initialize_fast_exp();
+  return isolate->heap()->NumberFromDouble(fast_exp(x));
 }


=======================================
--- /branches/bleeding_edge/src/v8.cc   Thu Nov  8 05:44:59 2012
+++ /branches/bleeding_edge/src/v8.cc   Mon Nov 26 05:12:35 2012
@@ -115,6 +115,7 @@

   ElementsAccessor::TearDown();
   LOperand::TearDownCaches();
+  ExternalReference::TearDownMathExpData();
   RegisteredExtension::UnregisterAll();

   is_running_ = false;
=======================================

--- /branches/bleeding_edge/src/x64/assembler-x64.cc Thu Nov 22 02:28:292012+++ /branches/bleeding_edge/src/x64/assembler-x64.cc Mon Nov 26 05:12:352012

@@ -2805,6 +2805,16 @@
   emit(0x58);
   emit_sse_operand(dst, src);
 }
+
+
+void Assembler::addsd(XMMRegister dst, const Operand& src) {
+  EnsureSpace ensure_space(this);
+  emit(0xF2);
+  emit_optional_rex_32(dst, src);
+  emit(0x0F);
+  emit(0x58);
+  emit_sse_operand(dst, src);
+}


 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
@@ -2815,6 +2825,16 @@
   emit(0x59);
   emit_sse_operand(dst, src);
 }
+
+
+void Assembler::mulsd(XMMRegister dst, const Operand& src) {
+  EnsureSpace ensure_space(this);
+  emit(0xF2);
+  emit_optional_rex_32(dst, src);
+  emit(0x0F);
+  emit(0x59);
+  emit_sse_operand(dst, src);
+}


 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
=======================================
--- /branches/bleeding_edge/src/x64/assembler-x64.h     Thu Nov 22 02:28:29 2012
+++ /branches/bleeding_edge/src/x64/assembler-x64.h     Mon Nov 26 05:12:35 2012
@@ -1363,8 +1363,10 @@
   void cvtsd2siq(Register dst, XMMRegister src);

   void addsd(XMMRegister dst, XMMRegister src);
+  void addsd(XMMRegister dst, const Operand& src);
   void subsd(XMMRegister dst, XMMRegister src);
   void mulsd(XMMRegister dst, XMMRegister src);
+  void mulsd(XMMRegister dst, const Operand& src);
   void divsd(XMMRegister dst, XMMRegister src);

   void andpd(XMMRegister dst, XMMRegister src);
=======================================

--- /branches/bleeding_edge/src/x64/code-stubs-x64.cc Wed Nov 21 23:05:202012+++ /branches/bleeding_edge/src/x64/code-stubs-x64.cc Mon Nov 26 05:12:352012

@@ -2221,7 +2221,7 @@
     // F2XM1 calculates 2^st(0) - 1 for -1 < st(0) < 1
     __ f2xm1();    // 2^(X-rnd(X)) - 1, rnd(X)
     __ fld1();     // 1, 2^(X-rnd(X)) - 1, rnd(X)
-    __ faddp(1);   // 1, 2^(X-rnd(X)), rnd(X)
+    __ faddp(1);   // 2^(X-rnd(X)), rnd(X)
     // FSCALE calculates st(0) * 2^st(1)
     __ fscale();   // 2^X, rnd(X)
     __ fstp(1);
=======================================
--- /branches/bleeding_edge/src/x64/codegen-x64.cc      Thu Nov 15 05:31:27 2012
+++ /branches/bleeding_edge/src/x64/codegen-x64.cc      Mon Nov 26 05:12:35 2012
@@ -97,6 +97,36 @@
   OS::ProtectCode(buffer, actual_size);
   return FUNCTION_CAST<UnaryMathFunction>(buffer);
 }
+
+
+UnaryMathFunction CreateExpFunction() {
+  if (!FLAG_fast_math) return &exp;
+  size_t actual_size;

+ byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size,true));

+  if (buffer == NULL) return &exp;
+  ExternalReference::InitializeMathExpData();
+
+  MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+  // xmm0: raw double input.
+  XMMRegister input = xmm0;
+  XMMRegister result = xmm1;
+  __ push(rax);
+  __ push(rbx);
+
+  MathExpGenerator::EmitMathExp(&masm, input, result, xmm2, rax, rbx);
+
+  __ pop(rbx);
+  __ pop(rax);
+  __ movsd(xmm0, result);
+  __ Ret();
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+
+  CPU::FlushICache(buffer, actual_size);
+  OS::ProtectCode(buffer, actual_size);
+  return FUNCTION_CAST<UnaryMathFunction>(buffer);
+}


 UnaryMathFunction CreateSqrtFunction() {
@@ -574,6 +604,58 @@
                                   SeqOneByteString::kHeaderSize));
   __ bind(&done);
 }
+
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   XMMRegister input,
+                                   XMMRegister result,
+                                   XMMRegister double_scratch,
+                                   Register temp1,
+                                   Register temp2) {
+  ASSERT(!input.is(result));
+  ASSERT(!input.is(double_scratch));
+  ASSERT(!result.is(double_scratch));
+  ASSERT(!temp1.is(temp2));
+  ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
+
+  Label done;
+
+  __ movq(kScratchRegister, ExternalReference::math_exp_constants(0));
+  __ movsd(double_scratch, Operand(kScratchRegister, 0 * kDoubleSize));
+  __ xorpd(result, result);
+  __ ucomisd(double_scratch, input);
+  __ j(above_equal, &done);
+  __ ucomisd(input, Operand(kScratchRegister, 1 * kDoubleSize));
+  __ movsd(result, Operand(kScratchRegister, 2 * kDoubleSize));
+  __ j(above_equal, &done);
+  __ movsd(double_scratch, Operand(kScratchRegister, 3 * kDoubleSize));
+  __ movsd(result, Operand(kScratchRegister, 4 * kDoubleSize));
+  __ mulsd(double_scratch, input);
+  __ addsd(double_scratch, result);
+  __ movq(temp2, double_scratch);
+  __ subsd(double_scratch, result);
+  __ movsd(result, Operand(kScratchRegister, 6 * kDoubleSize));
+  __ lea(temp1, Operand(temp2, 0x1ff800));
+  __ and_(temp2, Immediate(0x7ff));
+  __ shr(temp1, Immediate(11));
+  __ mulsd(double_scratch, Operand(kScratchRegister, 5 * kDoubleSize));
+  __ movq(kScratchRegister, ExternalReference::math_exp_log_table());
+  __ shl(temp1, Immediate(52));
+  __ or_(temp1, Operand(kScratchRegister, temp2, times_8, 0));
+  __ movq(kScratchRegister, ExternalReference::math_exp_constants(0));
+  __ subsd(double_scratch, input);
+  __ movsd(input, double_scratch);
+  __ subsd(result, double_scratch);
+  __ mulsd(input, double_scratch);
+  __ mulsd(result, input);
+  __ movq(input, temp1);
+  __ mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
+  __ subsd(result, double_scratch);
+  __ addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
+  __ mulsd(result, input);
+
+  __ bind(&done);
+}

 #undef __

=======================================
--- /branches/bleeding_edge/src/x64/codegen-x64.h       Thu Nov  8 04:18:11 2012
+++ /branches/bleeding_edge/src/x64/codegen-x64.h       Mon Nov 26 05:12:35 2012
@@ -86,6 +86,20 @@
   DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
 };

+
+class MathExpGenerator : public AllStatic {
+ public:
+  static void EmitMathExp(MacroAssembler* masm,
+                          XMMRegister input,
+                          XMMRegister result,
+                          XMMRegister double_scratch,
+                          Register temp1,
+                          Register temp2);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
+};
+
 } }  // namespace v8::internal

 #endif  // V8_X64_CODEGEN_X64_H_
=======================================

--- /branches/bleeding_edge/src/x64/lithium-codegen-x64.cc Fri Nov 2302:53:03 2012+++ /branches/bleeding_edge/src/x64/lithium-codegen-x64.cc Mon Nov 2605:12:35 2012

@@ -3475,6 +3475,16 @@
   __ movq(rsi, Operand(rbp, StandardFrameConstants::kContextOffset));
   // Return value is in rax.
 }
+
+
+void LCodeGen::DoMathExp(LMathExp* instr) {
+  XMMRegister input = ToDoubleRegister(instr->value());
+  XMMRegister result = ToDoubleRegister(instr->result());
+  Register temp1 = ToRegister(instr->temp1());
+  Register temp2 = ToRegister(instr->temp2());
+
+  MathExpGenerator::EmitMathExp(masm(), input, result, xmm0, temp1, temp2);
+}


 void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
=======================================
--- /branches/bleeding_edge/src/x64/lithium-x64.cc      Thu Nov 22 02:19:05 2012
+++ /branches/bleeding_edge/src/x64/lithium-x64.cc      Mon Nov 26 05:12:35 2012
@@ -297,6 +297,11 @@
   stream->Add("/%s ", hydrogen()->OpName());
   value()->PrintTo(stream);
 }
+
+
+void LMathExp::PrintDataTo(StringStream* stream) {
+  value()->PrintTo(stream);
+}


 void LLoadContextSlot::PrintDataTo(StringStream* stream) {
@@ -1046,6 +1051,14 @@
     LOperand* input = UseFixedDouble(instr->value(), xmm1);
     LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input);
     return MarkAsCall(DefineFixedDouble(result, xmm1), instr);
+  } else if (op == kMathExp) {
+    ASSERT(instr->representation().IsDouble());
+    ASSERT(instr->value()->representation().IsDouble());
+    LOperand* value = UseTempRegister(instr->value());
+    LOperand* temp1 = TempRegister();
+    LOperand* temp2 = TempRegister();
+    LMathExp* result = new(zone()) LMathExp(value, temp1, temp2);
+    return DefineAsRegister(result);
   } else {
     LOperand* input = UseRegisterAtStart(instr->value());
     LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input);
=======================================
--- /branches/bleeding_edge/src/x64/lithium-x64.h       Thu Nov 22 02:19:05 2012
+++ /branches/bleeding_edge/src/x64/lithium-x64.h       Mon Nov 26 05:12:35 2012
@@ -131,6 +131,7 @@
   V(LoadNamedField)                             \
   V(LoadNamedFieldPolymorphic)                  \
   V(LoadNamedGeneric)                           \
+  V(MathExp)                                    \
   V(MathFloorOfDiv)                             \
   V(MathMinMax)                                 \
   V(ModI)                                       \
@@ -643,6 +644,25 @@
 };


+class LMathExp: public LTemplateInstruction<1, 1, 2> {
+ public:
+  LMathExp(LOperand* value, LOperand* temp1, LOperand* temp2) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    ExternalReference::InitializeMathExpData();
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* temp1() { return temps_[0]; }
+  LOperand* temp2() { return temps_[1]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
+
+  virtual void PrintDataTo(StringStream* stream);
+};
+
+
 class LCmpObjectEqAndBranch: public LControlInstruction<2, 0> {
  public:
   LCmpObjectEqAndBranch(LOperand* left, LOperand* right) {

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

[v8-dev] [v8] r13054 committed - Faster implementation of Math.exp()...

Reply via email to