[v8-dev] Re: r10210 committed - Port Math.pow inlining to ARM....

Alexandre Rames Fri, 09 Dec 2011 02:20:58 -0800

Hello,

This commit breaks the v8 raytrace benchmark on ARM.
Could you have a look? I'll have a look as well.


Thanks,

Alexandre

On Wed, Dec 7, 2011 at 5:09 PM,  <[email protected]> wrote:
> Revision: 10210
> Author:   [email protected]
> Date:     Wed Dec  7 08:55:00 2011
> Log:      Port Math.pow inlining to ARM.
>
> TEST=math-pow.js
>
> Review URL: http://codereview.chromium.org/8840008
> http://code.google.com/p/v8/source/detail?r=10210
>
> Modified:
>  /branches/bleeding_edge/src/arm/code-stubs-arm.cc
>  /branches/bleeding_edge/src/arm/full-codegen-arm.cc
>  /branches/bleeding_edge/src/arm/lithium-arm.cc
>  /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc
>
> =======================================
> --- /branches/bleeding_edge/src/arm/code-stubs-arm.cc   Tue Nov 29 04:09:06
> 2011
> +++ /branches/bleeding_edge/src/arm/code-stubs-arm.cc   Wed Dec  7 08:55:00
> 2011
> @@ -3455,110 +3455,198 @@
>
>
>  void MathPowStub::Generate(MacroAssembler* masm) {
> -  Label call_runtime;
> -
> -  if (CpuFeatures::IsSupported(VFP3)) {
> -    CpuFeatures::Scope scope(VFP3);
> -
> -    Label base_not_smi;
> -    Label exponent_not_smi;
> -    Label convert_exponent;
> -
> -    const Register base = r0;
> -    const Register exponent = r1;
> -    const Register heapnumbermap = r5;
> -    const Register heapnumber = r6;
> -    const DoubleRegister double_base = d0;
> -    const DoubleRegister double_exponent = d1;
> -    const DoubleRegister double_result = d2;
> -    const SwVfpRegister single_scratch = s0;
> -    const Register scratch = r9;
> -    const Register scratch2 = r7;
> -
> -    __ LoadRoot(heapnumbermap, Heap::kHeapNumberMapRootIndex);
> +  CpuFeatures::Scope vfp3_scope(VFP3);
> +  const Register base = r1;
> +  const Register exponent = r2;
> +  const Register heapnumbermap = r5;
> +  const Register heapnumber = r0;
> +  const DoubleRegister double_base = d1;
> +  const DoubleRegister double_exponent = d2;
> +  const DoubleRegister double_result = d3;
> +  const DoubleRegister double_scratch = d0;
> +  const SwVfpRegister single_scratch = s0;
> +  const Register scratch = r9;
> +  const Register scratch2 = r7;
> +
> +  Label call_runtime, done, exponent_not_smi, int_exponent;
> +  if (exponent_type_ == ON_STACK) {
> +    Label base_is_smi, unpack_exponent;
> +    // The exponent and base are supplied as arguments on the stack.
> +    // This can only happen if the stub is called from non-optimized code.
> +    // Load input parameters from stack to double registers.
>     __ ldr(base, MemOperand(sp, 1 * kPointerSize));
>     __ ldr(exponent, MemOperand(sp, 0 * kPointerSize));
>
> -    // Convert base to double value and store it in d0.
> -    __ JumpIfNotSmi(base, &base_not_smi);
> -    // Base is a Smi. Untag and convert it.
> -    __ SmiUntag(base);
> -    __ vmov(single_scratch, base);
> -    __ vcvt_f64_s32(double_base, single_scratch);
> -    __ b(&convert_exponent);
> -
> -    __ bind(&base_not_smi);
> +    __ LoadRoot(heapnumbermap, Heap::kHeapNumberMapRootIndex);
> +
> +    __ JumpIfSmi(base, &base_is_smi);
>     __ ldr(scratch, FieldMemOperand(base, JSObject::kMapOffset));
>     __ cmp(scratch, heapnumbermap);
>     __ b(ne, &call_runtime);
> -    // Base is a heapnumber. Load it into double register.
> +
>     __ vldr(double_base, FieldMemOperand(base, HeapNumber::kValueOffset));
> -
> -    __ bind(&convert_exponent);
> +    __ jmp(&unpack_exponent);
> +
> +    __ bind(&base_is_smi);
> +    __ SmiUntag(base);
> +    __ vmov(single_scratch, base);
> +    __ vcvt_f64_s32(double_base, single_scratch);
> +    __ bind(&unpack_exponent);
> +
>     __ JumpIfNotSmi(exponent, &exponent_not_smi);
>     __ SmiUntag(exponent);
> -
> -    // The base is in a double register and the exponent is
> -    // an untagged smi. Allocate a heap number and call a
> -    // C function for integer exponents. The register containing
> -    // the heap number is callee-saved.
> -    __ AllocateHeapNumber(heapnumber,
> -                          scratch,
> -                          scratch2,
> -                          heapnumbermap,
> -                          &call_runtime);
> -    __ push(lr);
> -    __ PrepareCallCFunction(1, 1, scratch);
> -    __ SetCallCDoubleArguments(double_base, exponent);
> -    {
> -      AllowExternalCallThatCantCauseGC scope(masm);
> -      __ CallCFunction(
> -          ExternalReference::power_double_int_function(masm->isolate()),
> -          1, 1);
> -      __ pop(lr);
> -      __ GetCFunctionDoubleResult(double_result);
> -    }
> -    __ vstr(double_result,
> -            FieldMemOperand(heapnumber, HeapNumber::kValueOffset));
> -    __ mov(r0, heapnumber);
> -    __ Ret(2 * kPointerSize);
> +    __ jmp(&int_exponent);
>
>     __ bind(&exponent_not_smi);
>     __ ldr(scratch, FieldMemOperand(exponent, JSObject::kMapOffset));
>     __ cmp(scratch, heapnumbermap);
>     __ b(ne, &call_runtime);
> -    // Exponent is a heapnumber. Load it into double register.
>     __ vldr(double_exponent,
>             FieldMemOperand(exponent, HeapNumber::kValueOffset));
> -
> -    // The base and the exponent are in double registers.
> -    // Allocate a heap number and call a C function for
> -    // double exponents. The register containing
> -    // the heap number is callee-saved.
> -    __ AllocateHeapNumber(heapnumber,
> -                          scratch,
> -                          scratch2,
> -                          heapnumbermap,
> -                          &call_runtime);
> +  } else if (exponent_type_ == TAGGED) {
> +    // Base is already in double_base.
> +    __ JumpIfNotSmi(exponent, &exponent_not_smi);
> +    __ SmiUntag(exponent);
> +    __ jmp(&int_exponent);
> +
> +    __ bind(&exponent_not_smi);
> +    __ vldr(double_exponent,
> +            FieldMemOperand(exponent, HeapNumber::kValueOffset));
> +  }
> +
> +  if (exponent_type_ != INTEGER) {
> +    // Detect integer exponents stored as double.
> +    __ vcvt_u32_f64(single_scratch, double_exponent);
> +    // We do not check for NaN or Infinity here because comparing numbers
> on
> +    // ARM correctly distinguishes NaNs.  We end up calling the built-in.
> +    __ vcvt_f64_u32(double_scratch, single_scratch);
> +    __ VFPCompareAndSetFlags(double_scratch, double_exponent);
> +    __ vmov(exponent, single_scratch, eq);
> +    __ b(eq, &int_exponent);
> +
> +    if (exponent_type_ == ON_STACK) {
> +      // Detect square root case.  Crankshaft detects constant +/-0.5 at
> +      // compile time and uses DoMathPowHalf instead.  We then skip this
> check
> +      // for non-constant cases of +/-0.5 as these hardly occur.
> +      Label not_plus_half;
> +
> +      // Test for 0.5.
> +      __ vmov(double_scratch, 0.5);
> +      __ VFPCompareAndSetFlags(double_exponent, double_scratch);
> +      __ b(ne, &not_plus_half);
> +
> +      // Calculates square root of base.  Check for the special case of
> +      // Math.pow(-Infinity, 0.5) == Infinity (ECMA spec, 15.8.2.13).
> +      __ vmov(double_scratch, -V8_INFINITY);
> +      __ VFPCompareAndSetFlags(double_base, double_scratch);
> +      __ vneg(double_result, double_scratch, eq);
> +      __ b(eq, &done);
> +
> +      // Add +0 to convert -0 to +0.
> +      __ vadd(double_scratch, double_base, kDoubleRegZero);
> +      __ vsqrt(double_result, double_scratch);
> +      __ jmp(&done);
> +
> +      __ bind(&not_plus_half);
> +      __ vmov(double_scratch, -0.5);
> +      __ VFPCompareAndSetFlags(double_exponent, double_scratch);
> +      __ b(ne, &call_runtime);
> +
> +      // Calculates square root of base.  Check for the special case of
> +      // Math.pow(-Infinity, -0.5) == 0 (ECMA spec, 15.8.2.13).
> +      __ vmov(double_scratch, -V8_INFINITY);
> +      __ VFPCompareAndSetFlags(double_base, double_scratch);
> +      __ vmov(double_result, kDoubleRegZero, eq);
> +      __ b(eq, &done);
> +
> +      // Add +0 to convert -0 to +0.
> +      __ vadd(double_scratch, double_base, kDoubleRegZero);
> +      __ vmov(double_result, 1);
> +      __ vsqrt(double_scratch, double_scratch);
> +      __ vdiv(double_result, double_result, double_scratch);
> +      __ jmp(&done);
> +    }
> +
>     __ push(lr);
> -    __ PrepareCallCFunction(0, 2, scratch);
> -    __ SetCallCDoubleArguments(double_base, double_exponent);
>     {
>       AllowExternalCallThatCantCauseGC scope(masm);
> +      __ PrepareCallCFunction(0, 2, scratch);
> +      __ SetCallCDoubleArguments(double_base, double_exponent);
>       __ CallCFunction(
>           ExternalReference::power_double_double_function(masm->isolate()),
>           0, 2);
> -      __ pop(lr);
> -      __ GetCFunctionDoubleResult(double_result);
> -    }
> +    }
> +    __ pop(lr);
> +    __ GetCFunctionDoubleResult(double_result);
> +    __ jmp(&done);
> +  }
> +
> +  // Calculate power with integer exponent.
> +  __ bind(&int_exponent);
> +
> +  __ mov(scratch, exponent);  // Back up exponent.
> +  __ vmov(double_scratch, double_base);  // Back up base.
> +  __ vmov(double_result, 1.0);
> +
> +  // Get absolute value of exponent.
> +  __ cmp(scratch, Operand(0));
> +  __ mov(scratch2, Operand(0), LeaveCC, mi);
> +  __ sub(scratch, scratch2, scratch, LeaveCC, mi);
> +
> +  Label while_true;
> +  __ bind(&while_true);
> +  __ mov(scratch, Operand(scratch, ASR, 1), SetCC);
> +  __ vmul(double_result, double_result, double_scratch, cs);
> +  __ vmul(double_scratch, double_scratch, double_scratch, ne);
> +  __ b(ne, &while_true);
> +
> +  __ cmp(exponent, Operand(0));
> +  __ b(ge, &done);
> +  __ vmov(double_scratch, 1.0);
> +  __ vdiv(double_result, double_scratch, double_result);
> +  // Test whether result is zero.  Bail out to check for subnormal result.
> +  // Due to subnormals, x^-y == (1/x)^y does not hold in all cases.
> +  __ VFPCompareAndSetFlags(double_result, 0.0);
> +  __ b(ne, &done);
> +  // double_exponent may not containe the exponent value if the input was a
> +  // smi.  We set it with exponent value before bailing out.
> +  __ vmov(single_scratch, exponent);
> +  __ vcvt_f64_s32(double_exponent, single_scratch);
> +
> +  // Returning or bailing out.
> +  Counters* counters = masm->isolate()->counters();
> +  if (exponent_type_ == ON_STACK) {
> +    // The arguments are still on the stack.
> +    __ bind(&call_runtime);
> +    __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);
> +
> +    // The stub is called from non-optimized code, which expects the result
> +    // as heap number in exponent.
> +    __ bind(&done);
> +    __ AllocateHeapNumber(
> +        heapnumber, scratch, scratch2, heapnumbermap, &call_runtime);
>     __ vstr(double_result,
>             FieldMemOperand(heapnumber, HeapNumber::kValueOffset));
> -    __ mov(r0, heapnumber);
> +    ASSERT(heapnumber.is(r0));
> +    __ IncrementCounter(counters->math_pow(), 1, scratch, scratch2);
>     __ Ret(2 * kPointerSize);
> -  }
> -
> -  __ bind(&call_runtime);
> -  __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);
> +  } else {
> +    __ push(lr);
> +    {
> +      AllowExternalCallThatCantCauseGC scope(masm);
> +      __ PrepareCallCFunction(0, 2, scratch);
> +      __ SetCallCDoubleArguments(double_base, double_exponent);
> +      __ CallCFunction(
> +          ExternalReference::power_double_double_function(masm->isolate()),
> +          0, 2);
> +    }
> +    __ pop(lr);
> +    __ GetCFunctionDoubleResult(double_result);
> +
> +    __ bind(&done);
> +    __ IncrementCounter(counters->math_pow(), 1, scratch, scratch2);
> +    __ Ret();
> +  }
>  }
>
>
> =======================================
> --- /branches/bleeding_edge/src/arm/full-codegen-arm.cc Fri Dec  2 00:06:37
> 2011
> +++ /branches/bleeding_edge/src/arm/full-codegen-arm.cc Wed Dec  7 08:55:00
> 2011
> @@ -2938,8 +2938,12 @@
>   ASSERT(args->length() == 2);
>   VisitForStackValue(args->at(0));
>   VisitForStackValue(args->at(1));
> -  MathPowStub stub(MathPowStub::ON_STACK);
> -  __ CallStub(&stub);
> +  if (CpuFeatures::IsSupported(VFP3)) {
> +    MathPowStub stub(MathPowStub::ON_STACK);
> +    __ CallStub(&stub);
> +  } else {
> +    __ CallRuntime(Runtime::kMath_pow, 2);
> +  }
>   context()->Plug(r0);
>  }
>
> =======================================
> --- /branches/bleeding_edge/src/arm/lithium-arm.cc      Tue Dec  6 01:37:50
> 2011
> +++ /branches/bleeding_edge/src/arm/lithium-arm.cc      Wed Dec  7 08:55:00
> 2011
> @@ -1405,7 +1405,7 @@
>   LOperand* left = UseFixedDouble(instr->left(), d1);
>   LOperand* right = exponent_type.IsDouble() ?
>       UseFixedDouble(instr->right(), d2) :
> -      UseFixed(instr->right(), r0);
> +      UseFixed(instr->right(), r2);
>   LPower* result = new LPower(left, right);
>   return MarkAsCall(DefineFixedDouble(result, d3),
>                     instr,
> =======================================
> --- /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc      Wed Dec  7
> 02:13:46 2011
> +++ /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc      Wed Dec  7
> 08:55:00 2011
> @@ -3122,61 +3122,34 @@
>
>
>  void LCodeGen::DoPower(LPower* instr) {
> -  LOperand* left = instr->InputAt(0);
> -  LOperand* right = instr->InputAt(1);
> -  Register scratch = scratch0();
> -  DoubleRegister result_reg = ToDoubleRegister(instr->result());
>   Representation exponent_type =
> instr->hydrogen()->right()->representation();
> -  if (exponent_type.IsDouble()) {
> -    // Prepare arguments and call C function.
> -    __ PrepareCallCFunction(0, 2, scratch);
> -    __ SetCallCDoubleArguments(ToDoubleRegister(left),
> -                               ToDoubleRegister(right));
> -    __ CallCFunction(
> -        ExternalReference::power_double_double_function(isolate()), 0, 2);
> -  } else if (exponent_type.IsInteger32()) {
> -    ASSERT(ToRegister(right).is(r0));
> -    // Prepare arguments and call C function.
> -    __ PrepareCallCFunction(1, 1, scratch);
> -    __ SetCallCDoubleArguments(ToDoubleRegister(left), ToRegister(right));
> -    __ CallCFunction(
> -        ExternalReference::power_double_int_function(isolate()), 1, 1);
> -  } else {
> -    ASSERT(exponent_type.IsTagged());
> -    ASSERT(instr->hydrogen()->left()->representation().IsDouble());
> -
> -    Register right_reg = ToRegister(right);
> -
> -    // Check for smi on the right hand side.
> -    Label non_smi, call;
> -    __ JumpIfNotSmi(right_reg, &non_smi);
> -
> -    // Untag smi and convert it to a double.
> -    __ SmiUntag(right_reg);
> -    SwVfpRegister single_scratch = double_scratch0().low();
> -    __ vmov(single_scratch, right_reg);
> -    __ vcvt_f64_s32(result_reg, single_scratch);
> -    __ jmp(&call);
> -
> -    // Heap number map check.
> -    __ bind(&non_smi);
> -    __ ldr(scratch, FieldMemOperand(right_reg, HeapObject::kMapOffset));
> +  // Having marked this as a call, we can use any registers.
> +  // Just make sure that the input/output registers are the expected ones.
> +  ASSERT(!instr->InputAt(1)->IsDoubleRegister() ||
> +         ToDoubleRegister(instr->InputAt(1)).is(d2));
> +  ASSERT(!instr->InputAt(1)->IsRegister() ||
> +         ToRegister(instr->InputAt(1)).is(r2));
> +  ASSERT(ToDoubleRegister(instr->InputAt(0)).is(d1));
> +  ASSERT(ToDoubleRegister(instr->result()).is(d3));
> +
> +  if (exponent_type.IsTagged()) {
> +    Label no_deopt;
> +    __ JumpIfSmi(r2, &no_deopt);
> +    __ ldr(r7, FieldMemOperand(r2, HeapObject::kMapOffset));
>     __ LoadRoot(ip, Heap::kHeapNumberMapRootIndex);
> -    __ cmp(scratch, Operand(ip));
> +    __ cmp(r7, Operand(ip));
>     DeoptimizeIf(ne, instr->environment());
> -    int32_t value_offset = HeapNumber::kValueOffset - kHeapObjectTag;
> -    __ add(scratch, right_reg, Operand(value_offset));
> -    __ vldr(result_reg, scratch, 0);
> -
> -    // Prepare arguments and call C function.
> -    __ bind(&call);
> -    __ PrepareCallCFunction(0, 2, scratch);
> -    __ SetCallCDoubleArguments(ToDoubleRegister(left), result_reg);
> -    __ CallCFunction(
> -        ExternalReference::power_double_double_function(isolate()), 0, 2);
> -  }
> -  // Store the result in the result register.
> -  __ GetCFunctionDoubleResult(result_reg);
> +    __ bind(&no_deopt);
> +    MathPowStub stub(MathPowStub::TAGGED);
> +    __ CallStub(&stub);
> +  } else if (exponent_type.IsInteger32()) {
> +    MathPowStub stub(MathPowStub::INTEGER);
> +    __ CallStub(&stub);
> +  } else {
> +    ASSERT(exponent_type.IsDouble());
> +    MathPowStub stub(MathPowStub::DOUBLE);
> +    __ CallStub(&stub);
> +  }
>  }
>
>
> --
> v8-dev mailing list
> [email protected]
> http://groups.google.com/group/v8-dev

-- 
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

[v8-dev] Re: r10210 committed - Port Math.pow inlining to ARM....

Reply via email to