Revision: 13089
Author:   [email protected]
Date:     Thu Nov 29 01:22:08 2012
Log:      MIPS: Faster implementation of Math.exp()

Port r13054 (636985d7)

BUG=
TEST=

Review URL: https://codereview.chromium.org/11415192
Patch from Akos Palfi <[email protected]>.
http://code.google.com/p/v8/source/detail?r=13089

Modified:
 /branches/bleeding_edge/src/mips/codegen-mips.cc
 /branches/bleeding_edge/src/mips/codegen-mips.h
 /branches/bleeding_edge/src/mips/lithium-codegen-mips.cc
 /branches/bleeding_edge/src/mips/lithium-mips.cc
 /branches/bleeding_edge/src/mips/lithium-mips.h
 /branches/bleeding_edge/src/mips/simulator-mips.cc
 /branches/bleeding_edge/src/mips/simulator-mips.h

=======================================
--- /branches/bleeding_edge/src/mips/codegen-mips.cc Wed Nov 28 23:38:00 2012 +++ /branches/bleeding_edge/src/mips/codegen-mips.cc Thu Nov 29 01:22:08 2012
@@ -31,11 +31,11 @@

 #include "codegen.h"
 #include "macro-assembler.h"
+#include "simulator-mips.h"

 namespace v8 {
 namespace internal {

-#define __ ACCESS_MASM(masm)

UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
   switch (type) {
@@ -47,6 +47,74 @@
   }
   return NULL;
 }
+
+
+#define __ masm.
+
+
+#if defined(USE_SIMULATOR)
+byte* fast_exp_mips_machine_code = NULL;
+double fast_exp_simulator(double x) {
+  return Simulator::current(Isolate::Current())->CallFP(
+      fast_exp_mips_machine_code, x, 0);
+}
+#endif
+
+
+UnaryMathFunction CreateExpFunction() {
+  if (!CpuFeatures::IsSupported(FPU)) return &exp;
+  if (!FLAG_fast_math) return &exp;
+  size_t actual_size;
+ byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
+  if (buffer == NULL) return &exp;
+  ExternalReference::InitializeMathExpData();
+
+  MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+
+  {
+    CpuFeatures::Scope use_fpu(FPU);
+    DoubleRegister input = f12;
+    DoubleRegister result = f0;
+    DoubleRegister double_scratch1 = f4;
+    DoubleRegister double_scratch2 = f6;
+    Register temp1 = t0;
+    Register temp2 = t1;
+    Register temp3 = t2;
+
+    if (!IsMipsSoftFloatABI) {
+      // Input value is in f12 anyway, nothing to do.
+    } else {
+      __ Move(input, a0, a1);
+    }
+    __ Push(temp3, temp2, temp1);
+    MathExpGenerator::EmitMathExp(
+        &masm, input, result, double_scratch1, double_scratch2,
+        temp1, temp2, temp3);
+    __ Pop(temp3, temp2, temp1);
+    if (!IsMipsSoftFloatABI) {
+      // Result is already in f0, nothing to do.
+    } else {
+      __ Move(a0, a1, result);
+    }
+    __ Ret();
+  }
+
+  CodeDesc desc;
+  masm.GetCode(&desc);
+
+  CPU::FlushICache(buffer, actual_size);
+  OS::ProtectCode(buffer, actual_size);
+
+#if !defined(USE_SIMULATOR)
+  return FUNCTION_CAST<UnaryMathFunction>(buffer);
+#else
+  fast_exp_mips_machine_code = buffer;
+  return &fast_exp_simulator;
+#endif
+}
+
+
+#undef __


 UnaryMathFunction CreateSqrtFunction() {
@@ -72,6 +140,8 @@
// -------------------------------------------------------------------------
 // Code generators

+#define __ ACCESS_MASM(masm)
+
 void ElementsTransitionGenerator::GenerateMapChangeElementsTransition(
     MacroAssembler* masm) {
   // ----------- S t a t e -------------
@@ -445,6 +515,81 @@
   __ lbu(result, MemOperand(at));
   __ bind(&done);
 }
+
+
+static MemOperand ExpConstant(int index, Register base) {
+  return MemOperand(base, index * kDoubleSize);
+}
+
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   DoubleRegister input,
+                                   DoubleRegister result,
+                                   DoubleRegister double_scratch1,
+                                   DoubleRegister double_scratch2,
+                                   Register temp1,
+                                   Register temp2,
+                                   Register temp3) {
+  ASSERT(!input.is(result));
+  ASSERT(!input.is(double_scratch1));
+  ASSERT(!input.is(double_scratch2));
+  ASSERT(!result.is(double_scratch1));
+  ASSERT(!result.is(double_scratch2));
+  ASSERT(!double_scratch1.is(double_scratch2));
+  ASSERT(!temp1.is(temp2));
+  ASSERT(!temp1.is(temp3));
+  ASSERT(!temp2.is(temp3));
+  ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
+
+  Label done;
+
+  __ li(temp3, Operand(ExternalReference::math_exp_constants(0)));
+
+  __ ldc1(double_scratch1, ExpConstant(0, temp3));
+  __ Move(result, kDoubleRegZero);
+  __ BranchF(&done, NULL, ge, double_scratch1, input);
+  __ ldc1(double_scratch2, ExpConstant(1, temp3));
+  __ ldc1(result, ExpConstant(2, temp3));
+  __ BranchF(&done, NULL, ge, input, double_scratch2);
+  __ ldc1(double_scratch1, ExpConstant(3, temp3));
+  __ ldc1(result, ExpConstant(4, temp3));
+  __ mul_d(double_scratch1, double_scratch1, input);
+  __ add_d(double_scratch1, double_scratch1, result);
+  __ Move(temp2, temp1, double_scratch1);
+  __ sub_d(double_scratch1, double_scratch1, result);
+  __ ldc1(result, ExpConstant(6, temp3));
+  __ ldc1(double_scratch2, ExpConstant(5, temp3));
+  __ mul_d(double_scratch1, double_scratch1, double_scratch2);
+  __ sub_d(double_scratch1, double_scratch1, input);
+  __ sub_d(result, result, double_scratch1);
+  __ mul_d(input, double_scratch1, double_scratch1);
+  __ mul_d(result, result, input);
+  __ srl(temp1, temp2, 11);
+  __ ldc1(double_scratch2, ExpConstant(7, temp3));
+  __ mul_d(result, result, double_scratch2);
+  __ sub_d(result, result, double_scratch1);
+  __ ldc1(double_scratch2, ExpConstant(8, temp3));
+  __ add_d(result, result, double_scratch2);
+  __ li(at, 0x7ff);
+  __ And(temp2, temp2, at);
+  __ Addu(temp1, temp1, Operand(0x3ff));
+  __ sll(temp1, temp1, 20);
+
+  // Must not call ExpConstant() after overwriting temp3!
+  __ li(temp3, Operand(ExternalReference::math_exp_log_table()));
+  __ sll(at, temp2, 3);
+  __ addu(at, at, temp3);
+  __ lw(at, MemOperand(at));
+  __ Addu(temp3, temp3, Operand(kPointerSize));
+  __ sll(temp2, temp2, 3);
+  __ addu(temp2, temp2, temp3);
+  __ lw(temp2, MemOperand(temp2));
+  __ Or(temp1, temp1, temp2);
+  __ Move(input, at, temp1);
+  __ mul_d(result, result, input);
+  __ bind(&done);
+}
+

 // nop(CODE_AGE_MARKER_NOP)
 static const uint32_t kCodeAgePatchFirstInstruction = 0x00010180;
=======================================
--- /branches/bleeding_edge/src/mips/codegen-mips.h     Wed Nov 28 23:38:00 2012
+++ /branches/bleeding_edge/src/mips/codegen-mips.h     Thu Nov 29 01:22:08 2012
@@ -90,6 +90,22 @@
   DISALLOW_COPY_AND_ASSIGN(StringCharLoadGenerator);
 };

+
+class MathExpGenerator : public AllStatic {
+ public:
+  static void EmitMathExp(MacroAssembler* masm,
+                          DoubleRegister input,
+                          DoubleRegister result,
+                          DoubleRegister double_scratch1,
+                          DoubleRegister double_scratch2,
+                          Register temp1,
+                          Register temp2,
+                          Register temp3);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MathExpGenerator);
+};
+
 } }  // namespace v8::internal

 #endif  // V8_MIPS_CODEGEN_MIPS_H_
=======================================
--- /branches/bleeding_edge/src/mips/lithium-codegen-mips.cc Thu Nov 29 01:12:31 2012 +++ /branches/bleeding_edge/src/mips/lithium-codegen-mips.cc Thu Nov 29 01:22:08 2012
@@ -3498,6 +3498,20 @@
__ CallCFunction(ExternalReference::random_uint32_function(isolate()), 1);
   // Return value is in v0.
 }
+
+
+void LCodeGen::DoMathExp(LMathExp* instr) {
+  DoubleRegister input = ToDoubleRegister(instr->value());
+  DoubleRegister result = ToDoubleRegister(instr->result());
+  DoubleRegister double_scratch1 = ToDoubleRegister(instr->double_temp());
+  DoubleRegister double_scratch2 = double_scratch0();
+  Register temp1 = ToRegister(instr->temp1());
+  Register temp2 = ToRegister(instr->temp2());
+
+  MathExpGenerator::EmitMathExp(
+      masm(), input, result, double_scratch1, double_scratch2,
+      temp1, temp2, scratch0());
+}


 void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
=======================================
--- /branches/bleeding_edge/src/mips/lithium-mips.cc Thu Nov 29 01:09:39 2012 +++ /branches/bleeding_edge/src/mips/lithium-mips.cc Thu Nov 29 01:22:08 2012
@@ -295,6 +295,11 @@
   stream->Add("/%s ", hydrogen()->OpName());
   value()->PrintTo(stream);
 }
+
+
+void LMathExp::PrintDataTo(StringStream* stream) {
+  value()->PrintTo(stream);
+}


 void LLoadContextSlot::PrintDataTo(StringStream* stream) {
@@ -1040,6 +1045,15 @@
     LOperand* input = UseFixedDouble(instr->value(), f4);
LUnaryMathOperation* result = new(zone()) LUnaryMathOperation(input, NULL);
     return MarkAsCall(DefineFixedDouble(result, f4), instr);
+  } else if (op == kMathExp) {
+    ASSERT(instr->representation().IsDouble());
+    ASSERT(instr->value()->representation().IsDouble());
+    LOperand* input = UseTempRegister(instr->value());
+    LOperand* temp1 = TempRegister();
+    LOperand* temp2 = TempRegister();
+    LOperand* double_temp = FixedTemp(f6);  // Chosen by fair dice roll.
+ LMathExp* result = new(zone()) LMathExp(input, double_temp, temp1, temp2);
+    return DefineAsRegister(result);
   } else if (op == kMathPowHalf) {
     // Input cannot be the same as the result.
     // See lithium-codegen-mips.cc::DoMathPowHalf.
=======================================
--- /branches/bleeding_edge/src/mips/lithium-mips.h     Thu Nov 22 02:19:05 2012
+++ /branches/bleeding_edge/src/mips/lithium-mips.h     Thu Nov 29 01:22:08 2012
@@ -131,6 +131,7 @@
   V(LoadNamedFieldPolymorphic)                  \
   V(LoadNamedGeneric)                           \
   V(MapEnumLength)                              \
+  V(MathExp)                                    \
   V(MathMinMax)                                 \
   V(ModI)                                       \
   V(MulI)                                       \
@@ -641,6 +642,30 @@
 };


+class LMathExp: public LTemplateInstruction<1, 1, 3> {
+ public:
+  LMathExp(LOperand* value,
+           LOperand* double_temp,
+           LOperand* temp1,
+           LOperand* temp2) {
+    inputs_[0] = value;
+    temps_[0] = temp1;
+    temps_[1] = temp2;
+    temps_[2] = double_temp;
+    ExternalReference::InitializeMathExpData();
+  }
+
+  LOperand* value() { return inputs_[0]; }
+  LOperand* temp1() { return temps_[0]; }
+  LOperand* temp2() { return temps_[1]; }
+  LOperand* double_temp() { return temps_[2]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MathExp, "math-exp")
+
+  virtual void PrintDataTo(StringStream* stream);
+};
+
+
 class LCmpObjectEqAndBranch: public LControlInstruction<2, 0> {
  public:
   LCmpObjectEqAndBranch(LOperand* left, LOperand* right) {
=======================================
--- /branches/bleeding_edge/src/mips/simulator-mips.cc Tue Nov 20 06:11:53 2012 +++ /branches/bleeding_edge/src/mips/simulator-mips.cc Thu Nov 29 01:22:08 2012
@@ -1014,6 +1014,13 @@
   // Zero register always holds 0.
   registers_[reg] = (reg == 0) ? 0 : value;
 }
+
+
+void Simulator::set_dw_register(int reg, const int* dbl) {
+  ASSERT((reg >= 0) && (reg < kNumSimuRegisters));
+  registers_[reg] = dbl[0];
+  registers_[reg + 1] = dbl[1];
+}


 void Simulator::set_fpu_register(int fpureg, int32_t value) {
@@ -1043,6 +1050,19 @@
   else
return registers_[reg] + ((reg == pc) ? Instruction::kPCReadOffset : 0);
 }
+
+
+double Simulator::get_double_from_register_pair(int reg) {
+  ASSERT((reg >= 0) && (reg < kNumSimuRegisters) && ((reg % 2) == 0));
+
+  double dm_val = 0.0;
+  // Read the bits from the unsigned integer register_[] array
+  // into the double precision floating point value and return it.
+  char buffer[2 * sizeof(registers_[0])];
+  memcpy(buffer, &registers_[reg], 2 * sizeof(registers_[0]));
+  memcpy(&dm_val, buffer, 2 * sizeof(registers_[0]));
+  return(dm_val);
+}


 int32_t Simulator::get_fpu_register(int fpureg) const {
@@ -2718,34 +2738,7 @@
 }


-int32_t Simulator::Call(byte* entry, int argument_count, ...) {
-  va_list parameters;
-  va_start(parameters, argument_count);
-  // Set up arguments.
-
-  // First four arguments passed in registers.
-  ASSERT(argument_count >= 4);
-  set_register(a0, va_arg(parameters, int32_t));
-  set_register(a1, va_arg(parameters, int32_t));
-  set_register(a2, va_arg(parameters, int32_t));
-  set_register(a3, va_arg(parameters, int32_t));
-
-  // Remaining arguments passed on stack.
-  int original_stack = get_register(sp);
-  // Compute position of stack on entry to generated code.
- int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t)
-                                    - kCArgsSlotsSize);
-  if (OS::ActivationFrameAlignment() != 0) {
-    entry_stack &= -OS::ActivationFrameAlignment();
-  }
-  // Store remaining arguments on stack, from low to high memory.
-  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
-  for (int i = 4; i < argument_count; i++) {
-    stack_argument[i - 4 + kCArgSlotCount] = va_arg(parameters, int32_t);
-  }
-  va_end(parameters);
-  set_register(sp, entry_stack);
-
+void Simulator::CallInternal(byte* entry) {
   // Prepare to execute the code at entry.
   set_register(pc, reinterpret_cast<int32_t>(entry));
// Put down marker for end of simulation. The simulator will stop simulation
@@ -2809,6 +2802,38 @@
   set_register(gp, gp_val);
   set_register(sp, sp_val);
   set_register(fp, fp_val);
+}
+
+
+int32_t Simulator::Call(byte* entry, int argument_count, ...) {
+  va_list parameters;
+  va_start(parameters, argument_count);
+  // Set up arguments.
+
+  // First four arguments passed in registers.
+  ASSERT(argument_count >= 4);
+  set_register(a0, va_arg(parameters, int32_t));
+  set_register(a1, va_arg(parameters, int32_t));
+  set_register(a2, va_arg(parameters, int32_t));
+  set_register(a3, va_arg(parameters, int32_t));
+
+  // Remaining arguments passed on stack.
+  int original_stack = get_register(sp);
+  // Compute position of stack on entry to generated code.
+ int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t)
+                                    - kCArgsSlotsSize);
+  if (OS::ActivationFrameAlignment() != 0) {
+    entry_stack &= -OS::ActivationFrameAlignment();
+  }
+  // Store remaining arguments on stack, from low to high memory.
+  intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
+  for (int i = 4; i < argument_count; i++) {
+    stack_argument[i - 4 + kCArgSlotCount] = va_arg(parameters, int32_t);
+  }
+  va_end(parameters);
+  set_register(sp, entry_stack);
+
+  CallInternal(entry);

   // Pop stack passed arguments.
   CHECK_EQ(entry_stack, get_register(sp));
@@ -2817,6 +2842,27 @@
   int32_t result = get_register(v0);
   return result;
 }
+
+
+double Simulator::CallFP(byte* entry, double d0, double d1) {
+  if (!IsMipsSoftFloatABI) {
+    set_fpu_register_double(f12, d0);
+    set_fpu_register_double(f14, d1);
+  } else {
+    int buffer[2];
+    ASSERT(sizeof(buffer[0]) * 2 == sizeof(d0));
+    memcpy(buffer, &d0, sizeof(d0));
+    set_dw_register(a0, buffer);
+    memcpy(buffer, &d1, sizeof(d1));
+    set_dw_register(a2, buffer);
+  }
+  CallInternal(entry);
+  if (!IsMipsSoftFloatABI) {
+    return get_fpu_register_double(f0);
+  } else {
+    return get_double_from_register_pair(v0);
+  }
+}


 uintptr_t Simulator::PushAddress(uintptr_t address) {
=======================================
--- /branches/bleeding_edge/src/mips/simulator-mips.h Thu May 24 00:29:49 2012 +++ /branches/bleeding_edge/src/mips/simulator-mips.h Thu Nov 29 01:22:08 2012
@@ -184,7 +184,9 @@
// architecture specification and is off by a 8 from the currently executing
   // instruction.
   void set_register(int reg, int32_t value);
+  void set_dw_register(int dreg, const int* dbl);
   int32_t get_register(int reg) const;
+  double get_double_from_register_pair(int reg);
   // Same for FPURegisters.
   void set_fpu_register(int fpureg, int32_t value);
   void set_fpu_register_float(int fpureg, float value);
@@ -214,6 +216,8 @@
// generated RegExp code with 7 parameters. This is a convenience function,
   // which sets up the simulator state and grabs the result on return.
   int32_t Call(byte* entry, int argument_count, ...);
+  // Alternative: call a 2-argument double function.
+  double CallFP(byte* entry, double d0, double d1);

   // Push an address onto the JS stack.
   uintptr_t PushAddress(uintptr_t address);
@@ -353,6 +357,7 @@
   void GetFpArgs(double* x, int32_t* y);
   void SetFpResult(const double& result);

+  void CallInternal(byte* entry);

   // Architecture state.
   // Registers.

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to