[v8-dev] [v8] r12646 committed - Support for SDIV and MLS ARM instructions, and implement DoModI using ...

codesite-noreply Mon, 01 Oct 2012 14:28:03 -0700

Revision: 12646
Author:   [email protected]
Date:     Mon Oct  1 14:27:33 2012

Log: Support for SDIV and MLS ARM instructions, and implement DoModIusing them.Also added support for the runtime detection to check if hardware supportsSDIV/UDIVOther new opportunities to exploit SDIV/UDIV will be done in separateissues.


Review URL: https://chromiumcodereview.appspot.com/10977051
Patch from Subrato K De <[email protected]>.
http://code.google.com/p/v8/source/detail?r=12646

Modified:
 /branches/bleeding_edge/src/arm/assembler-arm.cc
 /branches/bleeding_edge/src/arm/assembler-arm.h
 /branches/bleeding_edge/src/arm/disasm-arm.cc
 /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc
 /branches/bleeding_edge/src/arm/simulator-arm.cc
 /branches/bleeding_edge/src/flag-definitions.h
 /branches/bleeding_edge/src/platform-linux.cc
 /branches/bleeding_edge/src/v8globals.h

=======================================

--- /branches/bleeding_edge/src/arm/assembler-arm.cc Tue Sep 25 07:32:072012+++ /branches/bleeding_edge/src/arm/assembler-arm.cc Mon Oct 1 14:27:332012

@@ -110,6 +110,10 @@
   if (FLAG_enable_armv7) {
     supported_ |= 1u << ARMv7;
   }
+
+  if (FLAG_enable_sudiv) {
+    supported_ |= 1u << SUDIV;
+  }
 #else  // __arm__
   // Probe for additional features not already known to be available.
   if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
@@ -124,6 +128,10 @@
   if (!IsSupported(ARMv7) && OS::ArmCpuHasFeature(ARMv7)) {
     found_by_runtime_probing_ |= 1u << ARMv7;
   }
+
+  if (!IsSupported(SUDIV) && OS::ArmCpuHasFeature(SUDIV)) {
+    found_by_runtime_probing_ |= 1u << SUDIV;
+  }

   supported_ |= found_by_runtime_probing_;
 #endif
@@ -1205,6 +1213,22 @@
   emit(cond | A | s | dst.code()*B16 | srcA.code()*B12 |
        src2.code()*B8 | B7 | B4 | src1.code());
 }
+
+

+void Assembler::mls(Register dst, Register src1, Register src2, RegistersrcA,

+                    Condition cond) {
+  ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc));
+  emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 |
+       src2.code()*B8 | B7 | B4 | src1.code());
+}
+
+
+void Assembler::sdiv(Register dst, Register src1, Register src2,
+                     Condition cond) {
+  ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
+  emit(cond | B26 | B25| B24 | B20 | dst.code()*B16 | 0xf * B12 |
+       src2.code()*B8 | B4 | src1.code());
+}


 void Assembler::mul(Register dst, Register src1, Register src2,
=======================================
--- /branches/bleeding_edge/src/arm/assembler-arm.h     Tue Sep 25 07:32:07 2012
+++ /branches/bleeding_edge/src/arm/assembler-arm.h     Mon Oct  1 14:27:33 2012
@@ -511,6 +511,7 @@
     ASSERT(initialized_);
     if (f == VFP3 && !FLAG_enable_vfp3) return false;
     if (f == VFP2 && !FLAG_enable_vfp2) return false;
+    if (f == SUDIV && !FLAG_enable_sudiv) return false;
     return (supported_ & (1u << f)) != 0;
   }

@@ -869,6 +870,12 @@
   void mla(Register dst, Register src1, Register src2, Register srcA,
            SBit s = LeaveCC, Condition cond = al);

+  void mls(Register dst, Register src1, Register src2, Register srcA,
+           Condition cond = al);
+
+  void sdiv(Register dst, Register src1, Register src2,
+            Condition cond = al);
+
   void mul(Register dst, Register src1, Register src2,
            SBit s = LeaveCC, Condition cond = al);

=======================================
--- /branches/bleeding_edge/src/arm/disasm-arm.cc       Wed Jan 25 04:43:32 2012
+++ /branches/bleeding_edge/src/arm/disasm-arm.cc       Mon Oct  1 14:27:33 2012
@@ -692,11 +692,19 @@
             // Rn field to encode it.
             Format(instr, "mul'cond's 'rn, 'rm, 'rs");
           } else {

- // The MLA instruction description (A 4.1.28) refers to theorder- // of registers as "Rd, Rm, Rs, Rn". But confusingly it usesthe- // Rn field to encode the Rd register and the Rd field toencode

-            // the Rn register.
-            Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+            if (instr->Bit(22) == 0) {

+ // The MLA instruction description (A 4.1.28) refers to theorder+ // of registers as "Rd, Rm, Rs, Rn". But confusingly it usesthe+ // Rn field to encode the Rd register and the Rd field toencode

+              // the Rn register.
+              Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+            } else {

+ // The MLS instruction description (A 4.1.29) refers to theorder+ // of registers as "Rd, Rm, Rs, Rn". But confusingly it usesthe+ // Rn field to encode the Rd register and the Rd field toencode

+              // the Rn register.
+              Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
+            }
           }
         } else {

// The signed/long multiply instructions use the terms RdHi andRdLo

@@ -974,6 +982,17 @@
       break;
     }
     case db_x: {
+      if (FLAG_enable_sudiv) {
+        if (!instr->HasW()) {
+          if (instr->Bits(5, 4) == 0x1) {
+            if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
+              // SDIV (in V8 notation matching ARM ISA format) rn = rm/rs
+              Format(instr, "sdiv'cond'b 'rn, 'rm, 'rs");
+              break;
+            }
+          }
+        }
+      }
       Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
       break;
     }
=======================================

--- /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc Tue Sep 2507:32:07 2012+++ /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc Mon Oct 114:27:33 2012

@@ -979,109 +979,132 @@
   Register left = ToRegister(instr->left());
   Register right = ToRegister(instr->right());
   Register result = ToRegister(instr->result());
+  Label done;

-  Register scratch = scratch0();
-  Register scratch2 = ToRegister(instr->temp());
-  DwVfpRegister dividend = ToDoubleRegister(instr->temp2());
-  DwVfpRegister divisor = ToDoubleRegister(instr->temp3());
-  DwVfpRegister quotient = double_scratch0();
+  if (CpuFeatures::IsSupported(SUDIV)) {
+    CpuFeatures::Scope scope(SUDIV);
+    // Check for x % 0.
+    if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
+      __ cmp(right, Operand(0));
+      DeoptimizeIf(eq, instr->environment());
+    }

-  ASSERT(!dividend.is(divisor));
-  ASSERT(!dividend.is(quotient));
-  ASSERT(!divisor.is(quotient));
-  ASSERT(!scratch.is(left));
-  ASSERT(!scratch.is(right));
-  ASSERT(!scratch.is(result));
+    // For  r3 = r1 % r2; we can have the following ARM code
+    // sdiv r3, r1, r2
+    // mls r3, r3, r2, r1

-  Label done, vfp_modulo, both_positive, right_negative;
+    __ sdiv(result, left, right);
+    __ mls(result, result, right, left);
+    __ cmp(result, Operand(0));
+    __ b(ne, &done);

-  // Check for x % 0.
-  if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
-    __ cmp(right, Operand(0));
-    DeoptimizeIf(eq, instr->environment());
-  }
+    if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
+       __ cmp(left, Operand(0));
+       DeoptimizeIf(lt, instr->environment());
+    }
+  } else {
+    Register scratch = scratch0();
+    Register scratch2 = ToRegister(instr->temp());
+    DwVfpRegister dividend = ToDoubleRegister(instr->temp2());
+    DwVfpRegister divisor = ToDoubleRegister(instr->temp3());
+    DwVfpRegister quotient = double_scratch0();

-  __ Move(result, left);
+    ASSERT(!dividend.is(divisor));
+    ASSERT(!dividend.is(quotient));
+    ASSERT(!divisor.is(quotient));
+    ASSERT(!scratch.is(left));
+    ASSERT(!scratch.is(right));
+    ASSERT(!scratch.is(result));

-  // (0 % x) must yield 0 (if x is finite, which is the case here).
-  __ cmp(left, Operand(0));
-  __ b(eq, &done);
-  // Preload right in a vfp register.
-  __ vmov(divisor.low(), right);
-  __ b(lt, &vfp_modulo);
+    Label done, vfp_modulo, both_positive, right_negative;

-  __ cmp(left, Operand(right));
-  __ b(lt, &done);
+    // Check for x % 0.
+    if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
+      __ cmp(right, Operand(0));
+      DeoptimizeIf(eq, instr->environment());
+    }

-  // Check for (positive) power of two on the right hand side.
-  __ JumpIfNotPowerOfTwoOrZeroAndNeg(right,
-                                     scratch,
-                                     &right_negative,
-                                     &both_positive);
-  // Perform modulo operation (scratch contains right - 1).
-  __ and_(result, scratch, Operand(left));
-  __ b(&done);
+    __ Move(result, left);

-  __ bind(&right_negative);
-  // Negate right. The sign of the divisor does not matter.
-  __ rsb(right, right, Operand(0));
+    // (0 % x) must yield 0 (if x is finite, which is the case here).
+    __ cmp(left, Operand(0));
+    __ b(eq, &done);
+    // Preload right in a vfp register.
+    __ vmov(divisor.low(), right);
+    __ b(lt, &vfp_modulo);

-  __ bind(&both_positive);
-  const int kUnfolds = 3;
-  // If the right hand side is smaller than the (nonnegative)
-  // left hand side, the left hand side is the result.
-  // Else try a few subtractions of the left hand side.
-  __ mov(scratch, left);
-  for (int i = 0; i < kUnfolds; i++) {
-    // Check if the left hand side is less or equal than the
-    // the right hand side.
-    __ cmp(scratch, Operand(right));
-    __ mov(result, scratch, LeaveCC, lt);
+    __ cmp(left, Operand(right));
     __ b(lt, &done);
-    // If not, reduce the left hand side by the right hand
-    // side and check again.
-    if (i < kUnfolds - 1) __ sub(scratch, scratch, right);
-  }

-  __ bind(&vfp_modulo);
-  // Load the arguments in VFP registers.

- // The divisor value is preloaded before. Be careful that 'right' isonly live

-  // on entry.
-  __ vmov(dividend.low(), left);

- // From here on don't use right as it may have been reallocated (forexample

-  // to scratch2).
-  right = no_reg;
+    // Check for (positive) power of two on the right hand side.
+    __ JumpIfNotPowerOfTwoOrZeroAndNeg(right,
+                                       scratch,
+                                       &right_negative,
+                                       &both_positive);
+    // Perform modulo operation (scratch contains right - 1).
+    __ and_(result, scratch, Operand(left));
+    __ b(&done);

-  __ vcvt_f64_s32(dividend, dividend.low());
-  __ vcvt_f64_s32(divisor, divisor.low());
+    __ bind(&right_negative);
+    // Negate right. The sign of the divisor does not matter.
+    __ rsb(right, right, Operand(0));

-  // We do not care about the sign of the divisor.
-  __ vabs(divisor, divisor);
-  // Compute the quotient and round it to a 32bit integer.
-  __ vdiv(quotient, dividend, divisor);
-  __ vcvt_s32_f64(quotient.low(), quotient);
-  __ vcvt_f64_s32(quotient, quotient.low());
+    __ bind(&both_positive);
+    const int kUnfolds = 3;
+    // If the right hand side is smaller than the (nonnegative)
+    // left hand side, the left hand side is the result.
+    // Else try a few subtractions of the left hand side.
+    __ mov(scratch, left);
+    for (int i = 0; i < kUnfolds; i++) {
+      // Check if the left hand side is less or equal than the
+      // the right hand side.
+      __ cmp(scratch, Operand(right));
+      __ mov(result, scratch, LeaveCC, lt);
+      __ b(lt, &done);
+      // If not, reduce the left hand side by the right hand
+      // side and check again.
+      if (i < kUnfolds - 1) __ sub(scratch, scratch, right);
+    }

-  // Compute the remainder in result.
-  DwVfpRegister double_scratch = dividend;
-  __ vmul(double_scratch, divisor, quotient);
-  __ vcvt_s32_f64(double_scratch.low(), double_scratch);
-  __ vmov(scratch, double_scratch.low());
+    __ bind(&vfp_modulo);
+    // Load the arguments in VFP registers.
+    // The divisor value is preloaded before. Be careful that 'right'
+    // is only live on entry.
+    __ vmov(dividend.low(), left);
+    // From here on don't use right as it may have been reallocated
+    // (for example to scratch2).
+    right = no_reg;

-  if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
-    __ sub(result, left, scratch);
-  } else {
-    Label ok;
-    // Check for -0.
-    __ sub(scratch2, left, scratch, SetCC);
-    __ b(ne, &ok);
-    __ cmp(left, Operand(0));
-    DeoptimizeIf(mi, instr->environment());
-    __ bind(&ok);
-    // Load the result and we are done.
-    __ mov(result, scratch2);
-  }
+    __ vcvt_f64_s32(dividend, dividend.low());
+    __ vcvt_f64_s32(divisor, divisor.low());

+    // We do not care about the sign of the divisor.
+    __ vabs(divisor, divisor);
+    // Compute the quotient and round it to a 32bit integer.
+    __ vdiv(quotient, dividend, divisor);
+    __ vcvt_s32_f64(quotient.low(), quotient);
+    __ vcvt_f64_s32(quotient, quotient.low());
+
+    // Compute the remainder in result.
+    DwVfpRegister double_scratch = dividend;
+    __ vmul(double_scratch, divisor, quotient);
+    __ vcvt_s32_f64(double_scratch.low(), double_scratch);
+    __ vmov(scratch, double_scratch.low());
+
+    if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
+      __ sub(result, left, scratch);
+    } else {
+      Label ok;
+      // Check for -0.
+      __ sub(scratch2, left, scratch, SetCC);
+      __ b(ne, &ok);
+      __ cmp(left, Operand(0));
+      DeoptimizeIf(mi, instr->environment());
+      __ bind(&ok);
+      // Load the result and we are done.
+      __ mov(result, scratch2);
+    }
+  }
   __ bind(&done);
 }

=======================================

--- /branches/bleeding_edge/src/arm/simulator-arm.cc Wed Aug 22 08:44:172012+++ /branches/bleeding_edge/src/arm/simulator-arm.cc Mon Oct 1 14:27:332012

@@ -1986,11 +1986,23 @@
               SetNZFlags(alu_out);
             }
           } else {

- // The MLA instruction description (A 4.1.28) refers to theorder- // of registers as "Rd, Rm, Rs, Rn". But confusingly it usesthe- // Rn field to encode the Rd register and the Rd field toencode

-            // the Rn register.
-            Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+            int rd = instr->RdValue();
+            int32_t acc_value = get_register(rd);
+            if (instr->Bit(22) == 0) {

+ // The MLA instruction description (A 4.1.28) refers to theorder+ // of registers as "Rd, Rm, Rs, Rn". But confusingly it usesthe+ // Rn field to encode the Rd register and the Rd field toencode

+              // the Rn register.
+              // Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+              int32_t mul_out = rm_val * rs_val;
+              int32_t result = acc_value + mul_out;
+              set_register(rn, result);
+            } else {
+              // Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
+              int32_t mul_out = rm_val * rs_val;
+              int32_t result = acc_value - mul_out;
+              set_register(rn, result);
+            }
           }
         } else {

// The signed/long multiply instructions use the terms RdHi andRdLo

@@ -2546,6 +2558,25 @@
       break;
     }
     case db_x: {
+      if (FLAG_enable_sudiv) {
+        if (!instr->HasW()) {
+          if (instr->Bits(5, 4) == 0x1) {
+             if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
+               // sdiv (in V8 notation matching ARM ISA format) rn = rm/rs
+               // Format(instr, "'sdiv'cond'b 'rn, 'rm, 'rs);
+               int rm = instr->RmValue();
+               int32_t rm_val = get_register(rm);
+               int rs = instr->RsValue();
+               int32_t rs_val = get_register(rs);
+               int32_t ret_val = 0;
+               ASSERT(rs_val != 0);
+               ret_val = rm_val/rs_val;
+               set_register(rn, ret_val);
+               return;
+             }
+           }
+         }
+       }
       // Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
       addr = rn_val - shifter_operand;
       if (instr->HasW()) {
=======================================
--- /branches/bleeding_edge/src/flag-definitions.h      Fri Sep 14 04:38:45 2012
+++ /branches/bleeding_edge/src/flag-definitions.h      Mon Oct  1 14:27:33 2012
@@ -284,6 +284,8 @@
             "enable use of VFP2 instructions if available")
 DEFINE_bool(enable_armv7, true,
             "enable use of ARMv7 instructions if available (ARM only)")
+DEFINE_bool(enable_sudiv, true,

+ "enable use of SDIV and UDIV instructions if available (ARMonly)")

 DEFINE_bool(enable_fpu, true,
             "enable use of MIPS FPU instructions if available (MIPS only)")

=======================================
--- /branches/bleeding_edge/src/platform-linux.cc       Mon Oct  1 05:11:06 2012
+++ /branches/bleeding_edge/src/platform-linux.cc       Mon Oct  1 14:27:33 2012
@@ -148,6 +148,9 @@
     case ARMv7:
       search_string = "ARMv7";
       break;
+    case SUDIV:
+      search_string = "idiva";
+      break;
     default:
       UNREACHABLE();
   }
=======================================
--- /branches/bleeding_edge/src/v8globals.h     Wed Aug 29 02:19:53 2012
+++ /branches/bleeding_edge/src/v8globals.h     Mon Oct  1 14:27:33 2012
@@ -438,6 +438,7 @@
                   VFP3 = 1,    // ARM
                   ARMv7 = 2,   // ARM
                   VFP2 = 3,    // ARM
+                  SUDIV = 4,   // ARM
                   SAHF = 0,    // x86
                   FPU = 1};    // MIPS

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

[v8-dev] [v8] r12646 committed - Support for SDIV and MLS ARM instructions, and implement DoModI using ...

Reply via email to