[v8-dev] [v8 commit] r2131 - A bunch of changes to speed up math on ARM.

codesite-noreply Wed, 10 Jun 2009 03:22:09 -0700

Author: [email protected]
Date: Wed Jun 10 03:20:37 2009
New Revision: 2131


Modified:
    branches/bleeding_edge/src/arm/codegen-arm.cc
    branches/bleeding_edge/src/code-stubs.h
    branches/bleeding_edge/src/codegen.h
    branches/bleeding_edge/src/d8.js
    branches/bleeding_edge/src/ia32/codegen-ia32.cc
    branches/bleeding_edge/src/objects.h

Log:
A bunch of changes to speed up math on ARM.
* Identify heap numbers that contain non-Smi int32s and do bit
ops on them without calling the fp hardware or emulation.
* Identify results that are non-Smi int32s and write them into
heap numbers without calling the fp hardware or emulation.
* Do unary minus on heap numbers without going into the runtime
system.
* On add, sub and mul if we have both Smi and heapnumber inputs
to the same operation then convert the Smi to a double and do
the op without going into runtime system.  This also applies
if we have two Smi inputs but the result is not Smi.
Review URL: http://codereview.chromium.org/119241

Modified: branches/bleeding_edge/src/arm/codegen-arm.cc
==============================================================================
--- branches/bleeding_edge/src/arm/codegen-arm.cc       (original)
+++ branches/bleeding_edge/src/arm/codegen-arm.cc       Wed Jun 10 03:20:37 2009
@@ -703,6 +703,7 @@
    }

    void Generate(MacroAssembler* masm);
+  void HandleNonSmiBitwiseOp(MacroAssembler* masm);

    const char* GetName() {
      switch (op_) {
@@ -3566,7 +3567,10 @@
          break;

        case Token::SUB: {
-        UnarySubStub stub;
+        bool overwrite =
+            (node->AsBinaryOperation() != NULL &&
+             node->AsBinaryOperation()->ResultOverwriteAllowed());
+        UnarySubStub stub(overwrite);
          frame_->CallStub(&stub, 0);
          break;
        }
@@ -4336,6 +4340,223 @@
  }


+// Count leading zeros in a 32 bit word.  On ARM5 and later it uses the clz
+// instruction.  On pre-ARM5 hardware this routine gives the wrong answer  
for 0
+// (31 instead of 32).
+static void CountLeadingZeros(
+    MacroAssembler* masm,
+    Register source,
+    Register scratch,
+    Register zeros) {
+#ifdef __ARM_ARCH_5__
+  __ clz(zeros, source);  // This instruction is only supported after ARM5.
+#else
+  __ mov(zeros, Operand(0));
+  __ mov(scratch, source);
+  // Top 16.
+  __ tst(scratch, Operand(0xffff0000));
+  __ add(zeros, zeros, Operand(16), LeaveCC, eq);
+  __ mov(scratch, Operand(scratch, LSL, 16), LeaveCC, eq);
+  // Top 8.
+  __ tst(scratch, Operand(0xff000000));
+  __ add(zeros, zeros, Operand(8), LeaveCC, eq);
+  __ mov(scratch, Operand(scratch, LSL, 8), LeaveCC, eq);
+  // Top 4.
+  __ tst(scratch, Operand(0xf0000000));
+  __ add(zeros, zeros, Operand(4), LeaveCC, eq);
+  __ mov(scratch, Operand(scratch, LSL, 4), LeaveCC, eq);
+  // Top 2.
+  __ tst(scratch, Operand(0xc0000000));
+  __ add(zeros, zeros, Operand(2), LeaveCC, eq);
+  __ mov(scratch, Operand(scratch, LSL, 2), LeaveCC, eq);
+  // Top bit.
+  __ tst(scratch, Operand(0x80000000));
+  __ add(zeros, zeros, Operand(1), LeaveCC, eq);
+#endif
+}
+
+
+// Takes a Smi and converts to an IEEE 64 bit floating point value in two
+// registers.  The format is 1 sign bit, 11 exponent bits (biased 1023) and
+// 52 fraction bits (20 in the first word, 32 in the second).  Zeros is a
+// scratch register.  Destroys the source register.  No GC occurs during  
this
+// stub so you don't have to set up the frame.
+class ConvertToDoubleStub : public CodeStub {
+ public:
+  ConvertToDoubleStub(Register result_reg_1,
+                      Register result_reg_2,
+                      Register source_reg,
+                      Register scratch_reg)
+      : result1_(result_reg_1),
+        result2_(result_reg_2),
+        source_(source_reg),
+        zeros_(scratch_reg) { }
+
+ private:
+  Register result1_;
+  Register result2_;
+  Register source_;
+  Register zeros_;
+
+  // Minor key encoding in 16 bits.
+  class ModeBits: public BitField<OverwriteMode, 0, 2> {};
+  class OpBits: public BitField<Token::Value, 2, 14> {};
+
+  Major MajorKey() { return ConvertToDouble; }
+  int MinorKey() {
+    // Encode the parameters in a unique 16 bit value.
+    return  result1_.code() +
+           (result2_.code() << 4) +
+           (source_.code() << 8) +
+           (zeros_.code() << 12);
+  }
+
+  void Generate(MacroAssembler* masm);
+
+  const char* GetName() { return "ConvertToDoubleStub"; }
+
+#ifdef DEBUG
+  void Print() { PrintF("ConvertToDoubleStub\n"); }
+#endif
+};
+
+
+void ConvertToDoubleStub::Generate(MacroAssembler* masm) {
+  Label not_special, done;
+  // Convert from Smi to integer.
+  __ mov(source_, Operand(source_, ASR, kSmiTagSize));
+  // Move sign bit from source to destination.  This works because the  
sign bit
+  // in the exponent word of the double has the same position and polarity  
as
+  // the 2's complement sign bit in a Smi.
+  ASSERT(HeapNumber::kSignMask == 0x80000000u);
+  __ and_(result1_, source_, Operand(HeapNumber::kSignMask), SetCC);
+  // Subtract from 0 if source was negative.
+  __ rsb(source_, source_, Operand(0), LeaveCC, ne);
+  __ cmp(source_, Operand(1));
+  __ b(gt, &not_special);
+
+  // We have -1, 0 or 1, which we treat specially.
+  __ cmp(source_, Operand(0));
+  // For 1 or -1 we need to or in the 0 exponent (biased to 1023).
+  static const uint32_t exponent_word_for_1 =
+      HeapNumber::kExponentBias << HeapNumber::kExponentShift;
+  __ orr(result1_, result1_, Operand(exponent_word_for_1), LeaveCC, ne);
+  // 1, 0 and -1 all have 0 for the second word.
+  __ mov(result2_, Operand(0));
+  __ jmp(&done);
+
+  __ bind(&not_special);
+  // Count leading zeros.  Uses result2 for a scratch register on pre-ARM5.
+  // Gets the wrong answer for 0, but we already checked for that case  
above.
+  CountLeadingZeros(masm, source_, result2_, zeros_);
+  // Compute exponent and or it into the exponent register.
+  // We use result2 as a scratch register here.
+  __ rsb(result2_, zeros_, Operand(31 + HeapNumber::kExponentBias));
+  __ orr(result1_,
+         result1_,
+         Operand(result2_, LSL, HeapNumber::kExponentShift));
+  // Shift up the source chopping the top bit off.
+  __ add(zeros_, zeros_, Operand(1));
+  // This wouldn't work for 1.0 or -1.0 as the shift would be 32 which  
means 0.
+  __ mov(source_, Operand(source_, LSL, zeros_));
+  // Compute lower part of fraction (last 12 bits).
+  __ mov(result2_, Operand(source_, LSL,  
HeapNumber::kMantissaBitsInTopWord));
+  // And the top (top 20 bits).
+  __ orr(result1_,
+         result1_,
+         Operand(source_, LSR, 32 - HeapNumber::kMantissaBitsInTopWord));
+  __ bind(&done);
+  __ Ret();
+}
+
+
+// This stub can convert a signed int32 to a heap number (double).  It does
+// not work for int32s that are in Smi range!  No GC occurs during this  
stub
+// so you don't have to set up the frame.
+class WriteInt32ToHeapNumberStub : public CodeStub {
+ public:
+  WriteInt32ToHeapNumberStub(Register the_int,
+                             Register the_heap_number,
+                             Register scratch)
+      : the_int_(the_int),
+        the_heap_number_(the_heap_number),
+        scratch_(scratch) { }
+
+ private:
+  Register the_int_;
+  Register the_heap_number_;
+  Register scratch_;
+
+  // Minor key encoding in 16 bits.
+  class ModeBits: public BitField<OverwriteMode, 0, 2> {};
+  class OpBits: public BitField<Token::Value, 2, 14> {};
+
+  Major MajorKey() { return WriteInt32ToHeapNumber; }
+  int MinorKey() {
+    // Encode the parameters in a unique 16 bit value.
+    return  the_int_.code() +
+           (the_heap_number_.code() << 4) +
+           (scratch_.code() << 8);
+  }
+
+  void Generate(MacroAssembler* masm);
+
+  const char* GetName() { return "WriteInt32ToHeapNumberStub"; }
+
+#ifdef DEBUG
+  void Print() { PrintF("WriteInt32ToHeapNumberStub\n"); }
+#endif
+};
+
+
+// See comment for class.
+void WriteInt32ToHeapNumberStub::Generate(MacroAssembler *masm) {
+  Label max_negative_int;
+  // the_int_ has the answer which is a signed int32 but not a Smi.
+  // We test for the special value that has a different exponent.  This  
test
+  // has the neat side effect of setting the flags according to the sign.
+  ASSERT(HeapNumber::kSignMask == 0x80000000u);
+  __ cmp(the_int_, Operand(0x80000000));
+  __ b(eq, &max_negative_int);
+  // Set up the correct exponent in scratch_.  All non-Smi int32s have the  
same.
+  // A non-Smi integer is 1.xxx * 2^30 so the exponent is 30 (biased).
+  uint32_t non_smi_exponent =
+      (HeapNumber::kExponentBias + 30) << HeapNumber::kExponentShift;
+  __ mov(scratch_, Operand(non_smi_exponent));
+  // Set the sign bit in scratch_ if the value was negative.
+  __ orr(scratch_, scratch_, Operand(HeapNumber::kSignMask), LeaveCC, cs);
+  // Subtract from 0 if the value was negative.
+  __ rsb(the_int_, the_int_, Operand(0), LeaveCC, cs);
+  // We should be masking the implict first digit of the mantissa away  
here,
+  // but it just ends up combining harmlessly with the last digit of the
+  // exponent that happens to be 1.  The sign bit is 0 so we shift 10 to  
get
+  // the most significant 1 to hit the last bit of the 12 bit sign and  
exponent.
+  ASSERT(((1 << HeapNumber::kExponentShift) & non_smi_exponent) != 0);
+  const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;
+  __ orr(scratch_, scratch_, Operand(the_int_, LSR, shift_distance));
+  __ str(scratch_, FieldMemOperand(the_heap_number_,
+                                   HeapNumber::kExponentOffset));
+  __ mov(scratch_, Operand(the_int_, LSL, 32 - shift_distance));
+  __ str(scratch_, FieldMemOperand(the_heap_number_,
+                                   HeapNumber::kMantissaOffset));
+  __ Ret();
+
+  __ bind(&max_negative_int);
+  // The max negative int32 is stored as a positive number in the mantissa  
of
+  // a double because it uses a sign bit instead of using two's complement.
+  // The actual mantissa bits stored are all 0 because the implicit most
+  // significant 1 bit is not stored.
+  non_smi_exponent += 1 << HeapNumber::kExponentShift;
+  __ mov(ip, Operand(HeapNumber::kSignMask | non_smi_exponent));
+  __ str(ip, FieldMemOperand(the_heap_number_,  
HeapNumber::kExponentOffset));
+  __ mov(ip, Operand(0));
+  __ str(ip, FieldMemOperand(the_heap_number_,  
HeapNumber::kMantissaOffset));
+  __ Ret();
+}
+
+
+// Allocates a heap number or jumps to the label if the young space is  
full and
+// a scavenge is needed.
  static void AllocateHeapNumber(
      MacroAssembler* masm,
      Label* need_gc,       // Jump here if young space is full.
@@ -4372,58 +4593,121 @@
  }


+// Checks that the object register (which is assumed not to be a Smi)  
points to
+// a heap number.  Jumps to the label if it is not.
+void CheckForHeapNumber(MacroAssembler* masm,
+                        Register object,
+                        Register scratch,
+                        Label* slow) {
+  // Get map of object into scratch.
+  __ ldr(scratch, FieldMemOperand(object, HeapObject::kMapOffset));
+  // Get type of object into scratch.
+  __ ldrb(scratch, FieldMemOperand(scratch, Map::kInstanceTypeOffset));
+  __ cmp(scratch, Operand(HEAP_NUMBER_TYPE));
+  __ b(ne, slow);
+}
+
+
  // We fall into this code if the operands were Smis, but the result was
  // not (eg. overflow).  We branch into this code (to the not_smi label) if
-// the operands were not both Smi.
+// the operands were not both Smi.  The operands are in r0 and r1.  In  
order
+// to call the C-implemented binary fp operation routines we need to end up
+// with the double precision floating point operands in r0 and r1 (for the
+// value in r1) and r2 and r3 (for the value in r0).
  static void HandleBinaryOpSlowCases(MacroAssembler* masm,
                                      Label* not_smi,
                                      const Builtins::JavaScript& builtin,
                                      Token::Value operation,
                                      OverwriteMode mode) {
-  Label slow;
+  Label slow, slow_pop_2_first, do_the_call;
+  Label r0_is_smi, r1_is_smi, finished_loading_r0, finished_loading_r1;
+  // Smi-smi case (overflow).
+  // Since both are Smis there is no heap number to overwrite, so allocate.
+  // The new heap number is in r5.  r6 and r7 are scratch.
+  AllocateHeapNumber(masm, &slow, r5, r6, r7);
+  // Write Smi from r0 to r3 and r2 in double format.  r6 is scratch.
+  ConvertToDoubleStub stub1(r3, r2, r0, r6);
+  __ push(lr);
+  __ Call(stub1.GetCode(), RelocInfo::CODE_TARGET);
+  // Write Smi from r1 to r1 and r0 in double format.  r6 is scratch.
+  __ mov(r7, Operand(r1));
+  ConvertToDoubleStub stub2(r1, r0, r7, r6);
+  __ Call(stub2.GetCode(), RelocInfo::CODE_TARGET);
+  __ pop(lr);
+  __ jmp(&do_the_call);  // Tail call.  No return.
+
+  // We jump to here if something goes wrong (one param is not a number of  
any
+  // sort or new-space allocation fails).
    __ bind(&slow);
    __ push(r1);
    __ push(r0);
    __ mov(r0, Operand(1));  // Set number of arguments.
-  __ InvokeBuiltin(builtin, JUMP_JS);  // Tail call.
+  __ InvokeBuiltin(builtin, JUMP_JS);  // Tail call.  No return.

+  // We branch here if at least one of r0 and r1 is not a Smi.
    __ bind(not_smi);
+  if (mode == NO_OVERWRITE) {
+    // In the case where there is no chance of an overwritable float we  
may as
+    // well do the allocation immediately while r0 and r1 are untouched.
+    AllocateHeapNumber(masm, &slow, r5, r6, r7);
+  }
+
+  // Move r0 to a double in r2-r3.
    __ tst(r0, Operand(kSmiTagMask));
-  __ b(eq, &slow);  // We can't handle a Smi-double combination yet.
-  __ tst(r1, Operand(kSmiTagMask));
-  __ b(eq, &slow);  // We can't handle a Smi-double combination yet.
-  // Get map of r0 into r2.
-  __ ldr(r2, FieldMemOperand(r0, HeapObject::kMapOffset));
-  // Get type of r0 into r3.
-  __ ldrb(r3, FieldMemOperand(r2, Map::kInstanceTypeOffset));
-  __ cmp(r3, Operand(HEAP_NUMBER_TYPE));
-  __ b(ne, &slow);
-  // Get type of r1 into r3.
-  __ ldr(r3, FieldMemOperand(r1, HeapObject::kMapOffset));
-  // Check they are both the same map (heap number map).
-  __ cmp(r2, r3);
-  __ b(ne, &slow);
-  // Both are doubles.
+  __ b(eq, &r0_is_smi);  // It's a Smi so don't check it's a heap number.
+  CheckForHeapNumber(masm, r0, r4, &slow);
+  if (mode == OVERWRITE_RIGHT) {
+    __ mov(r5, Operand(r0));  // Overwrite this heap number.
+  }
    // Calling convention says that second double is in r2 and r3.
-  __ ldr(r2, FieldMemOperand(r0, HeapNumber::kValueOffset));
-  __ ldr(r3, FieldMemOperand(r0, HeapNumber::kValueOffset + kPointerSize));
-
-  if (mode == NO_OVERWRITE) {
-    // Get address of new heap number into r5.
+  __ ldr(r2, FieldMemOperand(r0, HeapNumber::kMantissaOffset));
+  __ ldr(r3, FieldMemOperand(r0, HeapNumber::kExponentOffset));
+  __ jmp(&finished_loading_r0);
+  __ bind(&r0_is_smi);
+  if (mode == OVERWRITE_RIGHT) {
+    // We can't overwrite a Smi so get address of new heap number into r5.
      AllocateHeapNumber(masm, &slow, r5, r6, r7);
-    __ push(lr);
-    __ push(r5);
-  } else if (mode == OVERWRITE_LEFT) {
-    __ push(lr);
-    __ push(r1);
-  } else {
-    ASSERT(mode == OVERWRITE_RIGHT);
-    __ push(lr);
-    __ push(r0);
+  }
+  // Write Smi from r0 to r3 and r2 in double format.
+  __ mov(r7, Operand(r0));
+  ConvertToDoubleStub stub3(r3, r2, r7, r6);
+  __ push(lr);
+  __ Call(stub3.GetCode(), RelocInfo::CODE_TARGET);
+  __ pop(lr);
+  __ bind(&finished_loading_r0);
+
+  // Move r1 to a double in r0-r1.
+  __ tst(r1, Operand(kSmiTagMask));
+  __ b(eq, &r1_is_smi);  // It's a Smi so don't check it's a heap number.
+  CheckForHeapNumber(masm, r1, r4, &slow);
+  if (mode == OVERWRITE_LEFT) {
+    __ mov(r5, Operand(r1));  // Overwrite this heap number.
    }
    // Calling convention says that first double is in r0 and r1.
-  __ ldr(r0, FieldMemOperand(r1, HeapNumber::kValueOffset));
-  __ ldr(r1, FieldMemOperand(r1, HeapNumber::kValueOffset + kPointerSize));
+  __ ldr(r0, FieldMemOperand(r1, HeapNumber::kMantissaOffset));
+  __ ldr(r1, FieldMemOperand(r1, HeapNumber::kExponentOffset));
+  __ jmp(&finished_loading_r1);
+  __ bind(&r1_is_smi);
+  if (mode == OVERWRITE_LEFT) {
+    // We can't overwrite a Smi so get address of new heap number into r5.
+    AllocateHeapNumber(masm, &slow, r5, r6, r7);
+  }
+  // Write Smi from r1 to r1 and r0 in double format.
+  __ mov(r7, Operand(r1));
+  ConvertToDoubleStub stub4(r1, r0, r7, r6);
+  __ push(lr);
+  __ Call(stub4.GetCode(), RelocInfo::CODE_TARGET);
+  __ pop(lr);
+  __ bind(&finished_loading_r1);
+
+  __ bind(&do_the_call);
+  // r0: Left value (least significant part of mantissa).
+  // r1: Left value (sign, exponent, top of mantissa).
+  // r2: Right value (least significant part of mantissa).
+  // r3: Right value (sign, exponent, top of mantissa).
+  // r5: Address of heap number for result.
+  __ push(lr);   // For later.
+  __ push(r5);   // Address of heap number that is answer.
    // Call C routine that may not cause GC or other trouble.
    __ mov(r5, Operand(ExternalReference::double_fp_operation(operation)));
    __ Call(r5);
@@ -4437,8 +4721,8 @@
    __ stc(p1, cr8, MemOperand(r5, HeapNumber::kValueOffset));
  #else
    // Double returned in registers 0 and 1.
-  __ str(r0, FieldMemOperand(r4, HeapNumber::kValueOffset));
-  __ str(r1, FieldMemOperand(r4, HeapNumber::kValueOffset + kPointerSize));
+  __ str(r0, FieldMemOperand(r4, HeapNumber::kMantissaOffset));
+  __ str(r1, FieldMemOperand(r4, HeapNumber::kExponentOffset));
  #endif
    __ mov(r0, Operand(r4));
    // And we are done.
@@ -4446,6 +4730,183 @@
  }


+// Tries to get a signed int32 out of a double precision floating point  
heap
+// number.  Rounds towards 0.  Only succeeds for doubles that are in the  
ranges
+// -0x7fffffff to -0x40000000 or 0x40000000 to 0x7fffffff.  This  
corresponds
+// almost to the range of signed int32 values that are not Smis.  Jumps to  
the
+// label if the double isn't in the range it can cope with.
+static void GetInt32(MacroAssembler* masm,
+                     Register source,
+                     Register dest,
+                     Register scratch,
+                     Label* slow) {
+  Register scratch2 = dest;
+  // Get exponent word.
+  __ ldr(scratch, FieldMemOperand(source, HeapNumber::kExponentOffset));
+  // Get exponent alone in scratch2.
+  __ and_(scratch2, scratch, Operand(HeapNumber::kExponentMask));
+  // Check whether the exponent matches a 32 bit signed int that is not a  
Smi.
+  // A non-Smi integer is 1.xxx * 2^30 so the exponent is 30 (biased).
+  const uint32_t non_smi_exponent =
+      (HeapNumber::kExponentBias + 30) << HeapNumber::kExponentShift;
+  __ cmp(scratch2, Operand(non_smi_exponent));
+  // If not, then we go slow.
+  __ b(ne, slow);
+  // Get the top bits of the mantissa.
+  __ and_(scratch2, scratch, Operand(HeapNumber::kMantissaMask));
+  // Put back the implicit 1.
+  __ orr(scratch2, scratch2, Operand(1 << HeapNumber::kExponentShift));
+  // Shift up the mantissa bits to take up the space the exponent used to  
take.
+  // We just orred in the implicit bit so that took care of one and we  
want to
+  // leave the sign bit 0 so we subtract 2 bits from the shift distance.
+  const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;
+  __ mov(scratch2, Operand(scratch2, LSL, shift_distance));
+  // Put sign in zero flag.
+  __ tst(scratch, Operand(HeapNumber::kSignMask));
+  // Get the second half of the double.
+  __ ldr(scratch, FieldMemOperand(source, HeapNumber::kMantissaOffset));
+  // Shift down 22 bits to get the last 10 bits.
+  __ orr(dest, scratch2, Operand(scratch, LSR, 32 - shift_distance));
+  // Fix sign if sign bit was set.
+  __ rsb(dest, dest, Operand(0), LeaveCC, ne);
+}
+
+
+// For bitwise ops where the inputs are not both Smis we here try to  
determine
+// whether both inputs are either Smis or at least heap numbers that can be
+// represented by a 32 bit signed value.  We truncate towards zero as  
required
+// by the ES spec.  If this is the case we do the bitwise op and see if the
+// result is a Smi.  If so, great, otherwise we try to find a heap number  
to
+// write the answer into (either by allocating or by overwriting).
+// On entry the operands are in r0 and r1.  On exit the answer is in r0.
+void GenericBinaryOpStub::HandleNonSmiBitwiseOp(MacroAssembler* masm) {
+  Label slow, result_not_a_smi;
+  Label r0_is_smi, r1_is_smi;
+  Label done_checking_r0, done_checking_r1;
+
+  __ tst(r1, Operand(kSmiTagMask));
+  __ b(eq, &r1_is_smi);  // It's a Smi so don't check it's a heap number.
+  CheckForHeapNumber(masm, r1, r4, &slow);
+  GetInt32(masm, r1, r3, r4, &slow);
+  __ jmp(&done_checking_r1);
+  __ bind(&r1_is_smi);
+  __ mov(r3, Operand(r1, ASR, 1));
+  __ bind(&done_checking_r1);
+
+  __ tst(r0, Operand(kSmiTagMask));
+  __ b(eq, &r0_is_smi);  // It's a Smi so don't check it's a heap number.
+  CheckForHeapNumber(masm, r0, r4, &slow);
+  GetInt32(masm, r0, r2, r4, &slow);
+  __ jmp(&done_checking_r0);
+  __ bind(&r0_is_smi);
+  __ mov(r2, Operand(r0, ASR, 1));
+  __ bind(&done_checking_r0);
+
+  // r0 and r1: Original operands (Smi or heap numbers).
+  // r2 and r3: Signed int32 operands.
+  switch (op_) {
+    case Token::BIT_OR:  __ orr(r2, r2, Operand(r3)); break;
+    case Token::BIT_XOR: __ eor(r2, r2, Operand(r3)); break;
+    case Token::BIT_AND: __ and_(r2, r2, Operand(r3)); break;
+    case Token::SAR:
+      // Use only the 5 least significant bits of the shift count.
+      __ and_(r2, r2, Operand(0x1f));
+      __ mov(r2, Operand(r3, ASR, r2));
+      break;
+    case Token::SHR:
+      // Use only the 5 least significant bits of the shift count.
+      __ and_(r2, r2, Operand(0x1f));
+      __ mov(r2, Operand(r3, LSR, r2), SetCC);
+      // SHR is special because it is required to produce a positive  
answer.
+      // The code below for writing into heap numbers isn't capable of  
writing
+      // the register as an unsigned int so we go to slow case if we hit  
this
+      // case.
+      __ b(mi, &slow);
+      break;
+    case Token::SHL:
+      // Use only the 5 least significant bits of the shift count.
+      __ and_(r2, r2, Operand(0x1f));
+      __ mov(r2, Operand(r3, LSL, r2));
+      break;
+    default: UNREACHABLE();
+  }
+  // check that the *signed* result fits in a smi
+  __ add(r3, r2, Operand(0x40000000), SetCC);
+  __ b(mi, &result_not_a_smi);
+  __ mov(r0, Operand(r2, LSL, kSmiTagSize));
+  __ Ret();
+
+  Label have_to_allocate, got_a_heap_number;
+  __ bind(&result_not_a_smi);
+  switch (mode_) {
+    case OVERWRITE_RIGHT: {
+      __ tst(r0, Operand(kSmiTagMask));
+      __ b(eq, &have_to_allocate);
+      __ mov(r5, Operand(r0));
+      break;
+    }
+    case OVERWRITE_LEFT: {
+      __ tst(r1, Operand(kSmiTagMask));
+      __ b(eq, &have_to_allocate);
+      __ mov(r5, Operand(r1));
+      break;
+    }
+    case NO_OVERWRITE: {
+      // Get a new heap number in r5.  r6 and r7 are scratch.
+      AllocateHeapNumber(masm, &slow, r5, r6, r7);
+    }
+    default: break;
+  }
+  __ bind(&got_a_heap_number);
+  // r2: Answer as signed int32.
+  // r5: Heap number to write answer into.
+
+  // Nothing can go wrong now, so move the heap number to r0, which is the
+  // result.
+  __ mov(r0, Operand(r5));
+
+  // Tail call that writes the int32 in r2 to the heap number in r0, using
+  // r3 as scratch.  r0 is preserved and returned.
+  WriteInt32ToHeapNumberStub stub(r2, r0, r3);
+  __ Jump(stub.GetCode(), RelocInfo::CODE_TARGET);
+
+  if (mode_ != NO_OVERWRITE) {
+    __ bind(&have_to_allocate);
+    // Get a new heap number in r5.  r6 and r7 are scratch.
+    AllocateHeapNumber(masm, &slow, r5, r6, r7);
+    __ jmp(&got_a_heap_number);
+  }
+
+  // If all else failed then we go to the runtime system.
+  __ bind(&slow);
+  __ push(r1);  // restore stack
+  __ push(r0);
+  __ mov(r0, Operand(1));  // 1 argument (not counting receiver).
+  switch (op_) {
+    case Token::BIT_OR:
+      __ InvokeBuiltin(Builtins::BIT_OR, JUMP_JS);
+      break;
+    case Token::BIT_AND:
+      __ InvokeBuiltin(Builtins::BIT_AND, JUMP_JS);
+      break;
+    case Token::BIT_XOR:
+      __ InvokeBuiltin(Builtins::BIT_XOR, JUMP_JS);
+      break;
+    case Token::SAR:
+      __ InvokeBuiltin(Builtins::SAR, JUMP_JS);
+      break;
+    case Token::SHR:
+      __ InvokeBuiltin(Builtins::SHR, JUMP_JS);
+      break;
+    case Token::SHL:
+      __ InvokeBuiltin(Builtins::SHL, JUMP_JS);
+      break;
+    default:
+      UNREACHABLE();
+  }
+}
+
+
  void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
    // r1 : x
    // r0 : y
@@ -4518,13 +4979,16 @@
                                &not_smi,
                                Builtins::MUL,
                                Token::MUL,
-                              mode_);
+                                mode_);
        break;
      }

      case Token::BIT_OR:
      case Token::BIT_AND:
-    case Token::BIT_XOR: {
+    case Token::BIT_XOR:
+    case Token::SAR:
+    case Token::SHR:
+    case Token::SHL: {
        Label slow;
        ASSERT(kSmiTag == 0);  // adjust code below
        __ tst(r2, Operand(kSmiTagMask));
@@ -4533,84 +4997,47 @@
          case Token::BIT_OR:  __ orr(r0, r0, Operand(r1)); break;
          case Token::BIT_AND: __ and_(r0, r0, Operand(r1)); break;
          case Token::BIT_XOR: __ eor(r0, r0, Operand(r1)); break;
-        default: UNREACHABLE();
-      }
-      __ Ret();
-      __ bind(&slow);
-      __ push(r1);  // restore stack
-      __ push(r0);
-      __ mov(r0, Operand(1));  // 1 argument (not counting receiver).
-      switch (op_) {
-        case Token::BIT_OR:
-          __ InvokeBuiltin(Builtins::BIT_OR, JUMP_JS);
-          break;
-        case Token::BIT_AND:
-          __ InvokeBuiltin(Builtins::BIT_AND, JUMP_JS);
-          break;
-        case Token::BIT_XOR:
-          __ InvokeBuiltin(Builtins::BIT_XOR, JUMP_JS);
-          break;
-        default:
-          UNREACHABLE();
-      }
-      break;
-    }
-
-    case Token::SHL:
-    case Token::SHR:
-    case Token::SAR: {
-      Label slow;
-      ASSERT(kSmiTag == 0);  // adjust code below
-      __ tst(r2, Operand(kSmiTagMask));
-      __ b(ne, &slow);
-      // remove tags from operands (but keep sign)
-      __ mov(r3, Operand(r1, ASR, kSmiTagSize));  // x
-      __ mov(r2, Operand(r0, ASR, kSmiTagSize));  // y
-      // use only the 5 least significant bits of the shift count
-      __ and_(r2, r2, Operand(0x1f));
-      // perform operation
-      switch (op_) {
          case Token::SAR:
-          __ mov(r3, Operand(r3, ASR, r2));
-          // no checks of result necessary
+          // Remove tags from right operand.
+          __ mov(r2, Operand(r0, ASR, kSmiTagSize));  // y
+          // Use only the 5 least significant bits of the shift count.
+          __ and_(r2, r2, Operand(0x1f));
+          __ mov(r0, Operand(r1, ASR, r2));
+          // Smi tag result.
+          __ and_(r0, r0, Operand(~kSmiTagMask));
            break;
-
          case Token::SHR:
+          // Remove tags from operands.  We can't do this on a 31 bit  
number
+          // because then the 0s get shifted into bit 30 instead of bit 31.
+          __ mov(r3, Operand(r1, ASR, kSmiTagSize));  // x
+          __ mov(r2, Operand(r0, ASR, kSmiTagSize));  // y
+          // Use only the 5 least significant bits of the shift count.
+          __ and_(r2, r2, Operand(0x1f));
            __ mov(r3, Operand(r3, LSR, r2));
-          // check that the *unsigned* result fits in a smi
-          // neither of the two high-order bits can be set:
-          // - 0x80000000: high bit would be lost when smi tagging
-          // - 0x40000000: this number would convert to negative when
-          // smi tagging these two cases can only happen with shifts
-          // by 0 or 1 when handed a valid smi
-          __ and_(r2, r3, Operand(0xc0000000), SetCC);
+          // Unsigned shift is not allowed to produce a negative number, so
+          // check the sign bit and the sign bit after Smi tagging.
+          __ tst(r3, Operand(0xc0000000));
            __ b(ne, &slow);
+          // Smi tag result.
+          __ mov(r0, Operand(r3, LSL, kSmiTagSize));
            break;
-
          case Token::SHL:
+          // Remove tags from operands.
+          __ mov(r3, Operand(r1, ASR, kSmiTagSize));  // x
+          __ mov(r2, Operand(r0, ASR, kSmiTagSize));  // y
+          // Use only the 5 least significant bits of the shift count.
+          __ and_(r2, r2, Operand(0x1f));
            __ mov(r3, Operand(r3, LSL, r2));
-          // check that the *signed* result fits in a smi
+          // Check that the signed result fits in a Smi.
            __ add(r2, r3, Operand(0x40000000), SetCC);
            __ b(mi, &slow);
+          __ mov(r0, Operand(r3, LSL, kSmiTagSize));
            break;
-
          default: UNREACHABLE();
        }
-      // tag result and store it in r0
-      ASSERT(kSmiTag == 0);  // adjust code below
-      __ mov(r0, Operand(r3, LSL, kSmiTagSize));
        __ Ret();
-      // slow case
        __ bind(&slow);
-      __ push(r1);  // restore stack
-      __ push(r0);
-      __ mov(r0, Operand(1));  // 1 argument (not counting receiver).
-      switch (op_) {
-        case Token::SAR: __ InvokeBuiltin(Builtins::SAR, JUMP_JS); break;
-        case Token::SHR: __ InvokeBuiltin(Builtins::SHR, JUMP_JS); break;
-        case Token::SHL: __ InvokeBuiltin(Builtins::SHL, JUMP_JS); break;
-        default: UNREACHABLE();
-      }
+      HandleNonSmiBitwiseOp(masm);
        break;
      }

@@ -4642,10 +5069,11 @@
    Label undo;
    Label slow;
    Label done;
+  Label not_smi;

    // Enter runtime system if the value is not a smi.
    __ tst(r0, Operand(kSmiTagMask));
-  __ b(ne, &slow);
+  __ b(ne, &not_smi);

    // Enter runtime system if the value of the expression is zero
    // to make sure that we switch between 0 and -0.
@@ -4657,18 +5085,34 @@
    __ rsb(r1, r0, Operand(0), SetCC);
    __ b(vs, &slow);

-  // If result is a smi we are done.
-  __ tst(r1, Operand(kSmiTagMask));
-  __ mov(r0, Operand(r1), LeaveCC, eq);  // conditionally set r0 to result
-  __ b(eq, &done);
+  __ mov(r0, Operand(r1));  // Set r0 to result.
+  __ StubReturn(1);

    // Enter runtime system.
    __ bind(&slow);
    __ push(r0);
-  __ mov(r0, Operand(0));  // set number of arguments
+  __ mov(r0, Operand(0));  // Set number of arguments.
    __ InvokeBuiltin(Builtins::UNARY_MINUS, JUMP_JS);

    __ bind(&done);
+  __ StubReturn(1);
+
+  __ bind(&not_smi);
+  CheckForHeapNumber(masm, r0, r1, &slow);
+  // r0 is a heap number.  Get a new heap number in r1.
+  if (overwrite_) {
+    __ ldr(r2, FieldMemOperand(r0, HeapNumber::kExponentOffset));
+    __ eor(r2, r2, Operand(HeapNumber::kSignMask));  // Flip sign.
+    __ str(r2, FieldMemOperand(r0, HeapNumber::kExponentOffset));
+  } else {
+    AllocateHeapNumber(masm, &slow, r1, r2, r3);
+    __ ldr(r2, FieldMemOperand(r0, HeapNumber::kMantissaOffset));
+    __ str(r2, FieldMemOperand(r1, HeapNumber::kMantissaOffset));
+    __ ldr(r2, FieldMemOperand(r0, HeapNumber::kExponentOffset));
+    __ eor(r2, r2, Operand(HeapNumber::kSignMask));  // Flip sign.
+    __ str(r2, FieldMemOperand(r1, HeapNumber::kExponentOffset));
+    __ mov(r0, Operand(r1));
+  }
    __ StubReturn(1);
  }


Modified: branches/bleeding_edge/src/code-stubs.h
==============================================================================
--- branches/bleeding_edge/src/code-stubs.h     (original)
+++ branches/bleeding_edge/src/code-stubs.h     Wed Jun 10 03:20:37 2009
@@ -41,6 +41,8 @@
      SmiOp,
      Compare,
      RecordWrite,  // Last stub that allows stub calls inside.
+    ConvertToDouble,
+    WriteInt32ToHeapNumber,
      StackCheck,
      UnarySub,
      RevertToNumber,

Modified: branches/bleeding_edge/src/codegen.h
==============================================================================
--- branches/bleeding_edge/src/codegen.h        (original)
+++ branches/bleeding_edge/src/codegen.h        Wed Jun 10 03:20:37 2009
@@ -230,11 +230,13 @@

  class UnarySubStub : public CodeStub {
   public:
-  UnarySubStub() { }
+  explicit UnarySubStub(bool overwrite)
+      : overwrite_(overwrite) { }

   private:
+  bool overwrite_;
    Major MajorKey() { return UnarySub; }
-  int MinorKey() { return 0; }
+  int MinorKey() { return overwrite_ ? 1 : 0; }
    void Generate(MacroAssembler* masm);

    const char* GetName() { return "UnarySubStub"; }

Modified: branches/bleeding_edge/src/d8.js
==============================================================================
--- branches/bleeding_edge/src/d8.js    (original)
+++ branches/bleeding_edge/src/d8.js    Wed Jun 10 03:20:37 2009
@@ -25,8 +25,6 @@
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-// How crappy is it that I have to implement completely basic stuff
-// like this myself?  Answer: very.
  String.prototype.startsWith = function (str) {
    if (str.length > this.length)
      return false;

Modified: branches/bleeding_edge/src/ia32/codegen-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/ia32/codegen-ia32.cc     (original)
+++ branches/bleeding_edge/src/ia32/codegen-ia32.cc     Wed Jun 10 03:20:37 2009
@@ -5057,7 +5057,10 @@
          break;

        case Token::SUB: {
-        UnarySubStub stub;
+        bool overwrite =
+            (node->AsBinaryOperation() != NULL &&
+             node->AsBinaryOperation()->ResultOverwriteAllowed());
+        UnarySubStub stub(overwrite);
          // TODO(1222589): remove dependency of TOS being cached inside stub
          Result operand = frame_->Pop();
          Result answer = frame_->CallStub(&stub, &operand);
@@ -6594,13 +6597,21 @@
    __ mov(edx, FieldOperand(eax, HeapObject::kMapOffset));
    __ cmp(edx, Factory::heap_number_map());
    __ j(not_equal, &slow);
-  __ mov(edx, Operand(eax));
-  // edx: operand
-  FloatingPointHelper::AllocateHeapNumber(masm, &undo, ebx, ecx);
-  // eax: allocated 'empty' number
-  __ fld_d(FieldOperand(edx, HeapNumber::kValueOffset));
-  __ fchs();
-  __ fstp_d(FieldOperand(eax, HeapNumber::kValueOffset));
+  if (overwrite_) {
+    __ mov(edx, FieldOperand(eax, HeapNumber::kExponentOffset));
+    __ xor_(edx, HeapNumber::kSignMask);  // Flip sign.
+    __ mov(FieldOperand(eax, HeapNumber::kExponentOffset), edx);
+  } else {
+    __ mov(edx, Operand(eax));
+    // edx: operand
+    FloatingPointHelper::AllocateHeapNumber(masm, &undo, ebx, ecx);
+    // eax: allocated 'empty' number
+    __ mov(ecx, FieldOperand(edx, HeapNumber::kExponentOffset));
+    __ xor_(ecx, HeapNumber::kSignMask);  // Flip sign.
+    __ mov(FieldOperand(eax, HeapNumber::kExponentOffset), ecx);
+    __ mov(ecx, FieldOperand(edx, HeapNumber::kMantissaOffset));
+    __ mov(FieldOperand(eax, HeapNumber::kMantissaOffset), ecx);
+  }

    __ bind(&done);

@@ -6744,7 +6755,7 @@
        // The representation of NaN values has all exponent bits (52..62)  
set,
        // and not all mantissa bits (0..51) clear.
        // Read top bits of double representation (second word of value).
-      __ mov(eax, FieldOperand(edx, HeapNumber::kValueOffset +  
kPointerSize));
+      __ mov(eax, FieldOperand(edx, HeapNumber::kExponentOffset));
        // Test that exponent bits are all set.
        __ not_(eax);
        __ test(eax, Immediate(0x7ff00000));
@@ -6754,7 +6765,7 @@
        // Shift out flag and all exponent bits, retaining only mantissa.
        __ shl(eax, 12);
        // Or with all low-bits of mantissa.
-      __ or_(eax, FieldOperand(edx, HeapNumber::kValueOffset));
+      __ or_(eax, FieldOperand(edx, HeapNumber::kMantissaOffset));
        // Return zero equal if all bits in mantissa is zero (it's an  
Infinity)
        // and non-zero if not (it's a NaN).
        __ ret(0);

Modified: branches/bleeding_edge/src/objects.h
==============================================================================
--- branches/bleeding_edge/src/objects.h        (original)
+++ branches/bleeding_edge/src/objects.h        Wed Jun 10 03:20:37 2009
@@ -1162,7 +1162,20 @@

    // Layout description.
    static const int kValueOffset = HeapObject::kHeaderSize;
+  // IEEE doubles are two 32 bit words.  The first is just mantissa, the  
second
+  // is a mixture of sign, exponent and mantissa.  This is the ordering on  
a
+  // little endian machine with little endian double word ordering.
+  static const int kMantissaOffset = kValueOffset;
+  static const int kExponentOffset = kValueOffset + 4;
    static const int kSize = kValueOffset + kDoubleSize;
+
+  static const uint32_t kSignMask = 0x80000000u;
+  static const uint32_t kExponentMask = 0x7ff00000u;
+  static const uint32_t kMantissaMask = 0xfffffu;
+  static const int kExponentBias = 1023;
+  static const int kExponentShift = 20;
+  static const int kMantissaBitsInTopWord = 20;
+  static const int kNonMantissaBitsInTopWord = 12;

   private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(HeapNumber);

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

[v8-dev] [v8 commit] r2131 - A bunch of changes to speed up math on ARM.

Reply via email to