Hello, This commit breaks the v8 raytrace benchmark on ARM. Could you have a look? I'll have a look as well.
Thanks, Alexandre On Wed, Dec 7, 2011 at 5:09 PM, <[email protected]> wrote: > Revision: 10210 > Author: [email protected] > Date: Wed Dec 7 08:55:00 2011 > Log: Port Math.pow inlining to ARM. > > TEST=math-pow.js > > Review URL: http://codereview.chromium.org/8840008 > http://code.google.com/p/v8/source/detail?r=10210 > > Modified: > /branches/bleeding_edge/src/arm/code-stubs-arm.cc > /branches/bleeding_edge/src/arm/full-codegen-arm.cc > /branches/bleeding_edge/src/arm/lithium-arm.cc > /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc > > ======================================= > --- /branches/bleeding_edge/src/arm/code-stubs-arm.cc Tue Nov 29 04:09:06 > 2011 > +++ /branches/bleeding_edge/src/arm/code-stubs-arm.cc Wed Dec 7 08:55:00 > 2011 > @@ -3455,110 +3455,198 @@ > > > void MathPowStub::Generate(MacroAssembler* masm) { > - Label call_runtime; > - > - if (CpuFeatures::IsSupported(VFP3)) { > - CpuFeatures::Scope scope(VFP3); > - > - Label base_not_smi; > - Label exponent_not_smi; > - Label convert_exponent; > - > - const Register base = r0; > - const Register exponent = r1; > - const Register heapnumbermap = r5; > - const Register heapnumber = r6; > - const DoubleRegister double_base = d0; > - const DoubleRegister double_exponent = d1; > - const DoubleRegister double_result = d2; > - const SwVfpRegister single_scratch = s0; > - const Register scratch = r9; > - const Register scratch2 = r7; > - > - __ LoadRoot(heapnumbermap, Heap::kHeapNumberMapRootIndex); > + CpuFeatures::Scope vfp3_scope(VFP3); > + const Register base = r1; > + const Register exponent = r2; > + const Register heapnumbermap = r5; > + const Register heapnumber = r0; > + const DoubleRegister double_base = d1; > + const DoubleRegister double_exponent = d2; > + const DoubleRegister double_result = d3; > + const DoubleRegister double_scratch = d0; > + const SwVfpRegister single_scratch = s0; > + const Register scratch = r9; > + const Register scratch2 = r7; > + > + Label call_runtime, done, exponent_not_smi, int_exponent; > + if (exponent_type_ == ON_STACK) { > + Label base_is_smi, unpack_exponent; > + // The exponent and base are supplied as arguments on the stack. > + // This can only happen if the stub is called from non-optimized code. > + // Load input parameters from stack to double registers. > __ ldr(base, MemOperand(sp, 1 * kPointerSize)); > __ ldr(exponent, MemOperand(sp, 0 * kPointerSize)); > > - // Convert base to double value and store it in d0. > - __ JumpIfNotSmi(base, &base_not_smi); > - // Base is a Smi. Untag and convert it. > - __ SmiUntag(base); > - __ vmov(single_scratch, base); > - __ vcvt_f64_s32(double_base, single_scratch); > - __ b(&convert_exponent); > - > - __ bind(&base_not_smi); > + __ LoadRoot(heapnumbermap, Heap::kHeapNumberMapRootIndex); > + > + __ JumpIfSmi(base, &base_is_smi); > __ ldr(scratch, FieldMemOperand(base, JSObject::kMapOffset)); > __ cmp(scratch, heapnumbermap); > __ b(ne, &call_runtime); > - // Base is a heapnumber. Load it into double register. > + > __ vldr(double_base, FieldMemOperand(base, HeapNumber::kValueOffset)); > - > - __ bind(&convert_exponent); > + __ jmp(&unpack_exponent); > + > + __ bind(&base_is_smi); > + __ SmiUntag(base); > + __ vmov(single_scratch, base); > + __ vcvt_f64_s32(double_base, single_scratch); > + __ bind(&unpack_exponent); > + > __ JumpIfNotSmi(exponent, &exponent_not_smi); > __ SmiUntag(exponent); > - > - // The base is in a double register and the exponent is > - // an untagged smi. Allocate a heap number and call a > - // C function for integer exponents. The register containing > - // the heap number is callee-saved. > - __ AllocateHeapNumber(heapnumber, > - scratch, > - scratch2, > - heapnumbermap, > - &call_runtime); > - __ push(lr); > - __ PrepareCallCFunction(1, 1, scratch); > - __ SetCallCDoubleArguments(double_base, exponent); > - { > - AllowExternalCallThatCantCauseGC scope(masm); > - __ CallCFunction( > - ExternalReference::power_double_int_function(masm->isolate()), > - 1, 1); > - __ pop(lr); > - __ GetCFunctionDoubleResult(double_result); > - } > - __ vstr(double_result, > - FieldMemOperand(heapnumber, HeapNumber::kValueOffset)); > - __ mov(r0, heapnumber); > - __ Ret(2 * kPointerSize); > + __ jmp(&int_exponent); > > __ bind(&exponent_not_smi); > __ ldr(scratch, FieldMemOperand(exponent, JSObject::kMapOffset)); > __ cmp(scratch, heapnumbermap); > __ b(ne, &call_runtime); > - // Exponent is a heapnumber. Load it into double register. > __ vldr(double_exponent, > FieldMemOperand(exponent, HeapNumber::kValueOffset)); > - > - // The base and the exponent are in double registers. > - // Allocate a heap number and call a C function for > - // double exponents. The register containing > - // the heap number is callee-saved. > - __ AllocateHeapNumber(heapnumber, > - scratch, > - scratch2, > - heapnumbermap, > - &call_runtime); > + } else if (exponent_type_ == TAGGED) { > + // Base is already in double_base. > + __ JumpIfNotSmi(exponent, &exponent_not_smi); > + __ SmiUntag(exponent); > + __ jmp(&int_exponent); > + > + __ bind(&exponent_not_smi); > + __ vldr(double_exponent, > + FieldMemOperand(exponent, HeapNumber::kValueOffset)); > + } > + > + if (exponent_type_ != INTEGER) { > + // Detect integer exponents stored as double. > + __ vcvt_u32_f64(single_scratch, double_exponent); > + // We do not check for NaN or Infinity here because comparing numbers > on > + // ARM correctly distinguishes NaNs. We end up calling the built-in. > + __ vcvt_f64_u32(double_scratch, single_scratch); > + __ VFPCompareAndSetFlags(double_scratch, double_exponent); > + __ vmov(exponent, single_scratch, eq); > + __ b(eq, &int_exponent); > + > + if (exponent_type_ == ON_STACK) { > + // Detect square root case. Crankshaft detects constant +/-0.5 at > + // compile time and uses DoMathPowHalf instead. We then skip this > check > + // for non-constant cases of +/-0.5 as these hardly occur. > + Label not_plus_half; > + > + // Test for 0.5. > + __ vmov(double_scratch, 0.5); > + __ VFPCompareAndSetFlags(double_exponent, double_scratch); > + __ b(ne, ¬_plus_half); > + > + // Calculates square root of base. Check for the special case of > + // Math.pow(-Infinity, 0.5) == Infinity (ECMA spec, 15.8.2.13). > + __ vmov(double_scratch, -V8_INFINITY); > + __ VFPCompareAndSetFlags(double_base, double_scratch); > + __ vneg(double_result, double_scratch, eq); > + __ b(eq, &done); > + > + // Add +0 to convert -0 to +0. > + __ vadd(double_scratch, double_base, kDoubleRegZero); > + __ vsqrt(double_result, double_scratch); > + __ jmp(&done); > + > + __ bind(¬_plus_half); > + __ vmov(double_scratch, -0.5); > + __ VFPCompareAndSetFlags(double_exponent, double_scratch); > + __ b(ne, &call_runtime); > + > + // Calculates square root of base. Check for the special case of > + // Math.pow(-Infinity, -0.5) == 0 (ECMA spec, 15.8.2.13). > + __ vmov(double_scratch, -V8_INFINITY); > + __ VFPCompareAndSetFlags(double_base, double_scratch); > + __ vmov(double_result, kDoubleRegZero, eq); > + __ b(eq, &done); > + > + // Add +0 to convert -0 to +0. > + __ vadd(double_scratch, double_base, kDoubleRegZero); > + __ vmov(double_result, 1); > + __ vsqrt(double_scratch, double_scratch); > + __ vdiv(double_result, double_result, double_scratch); > + __ jmp(&done); > + } > + > __ push(lr); > - __ PrepareCallCFunction(0, 2, scratch); > - __ SetCallCDoubleArguments(double_base, double_exponent); > { > AllowExternalCallThatCantCauseGC scope(masm); > + __ PrepareCallCFunction(0, 2, scratch); > + __ SetCallCDoubleArguments(double_base, double_exponent); > __ CallCFunction( > ExternalReference::power_double_double_function(masm->isolate()), > 0, 2); > - __ pop(lr); > - __ GetCFunctionDoubleResult(double_result); > - } > + } > + __ pop(lr); > + __ GetCFunctionDoubleResult(double_result); > + __ jmp(&done); > + } > + > + // Calculate power with integer exponent. > + __ bind(&int_exponent); > + > + __ mov(scratch, exponent); // Back up exponent. > + __ vmov(double_scratch, double_base); // Back up base. > + __ vmov(double_result, 1.0); > + > + // Get absolute value of exponent. > + __ cmp(scratch, Operand(0)); > + __ mov(scratch2, Operand(0), LeaveCC, mi); > + __ sub(scratch, scratch2, scratch, LeaveCC, mi); > + > + Label while_true; > + __ bind(&while_true); > + __ mov(scratch, Operand(scratch, ASR, 1), SetCC); > + __ vmul(double_result, double_result, double_scratch, cs); > + __ vmul(double_scratch, double_scratch, double_scratch, ne); > + __ b(ne, &while_true); > + > + __ cmp(exponent, Operand(0)); > + __ b(ge, &done); > + __ vmov(double_scratch, 1.0); > + __ vdiv(double_result, double_scratch, double_result); > + // Test whether result is zero. Bail out to check for subnormal result. > + // Due to subnormals, x^-y == (1/x)^y does not hold in all cases. > + __ VFPCompareAndSetFlags(double_result, 0.0); > + __ b(ne, &done); > + // double_exponent may not containe the exponent value if the input was a > + // smi. We set it with exponent value before bailing out. > + __ vmov(single_scratch, exponent); > + __ vcvt_f64_s32(double_exponent, single_scratch); > + > + // Returning or bailing out. > + Counters* counters = masm->isolate()->counters(); > + if (exponent_type_ == ON_STACK) { > + // The arguments are still on the stack. > + __ bind(&call_runtime); > + __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1); > + > + // The stub is called from non-optimized code, which expects the result > + // as heap number in exponent. > + __ bind(&done); > + __ AllocateHeapNumber( > + heapnumber, scratch, scratch2, heapnumbermap, &call_runtime); > __ vstr(double_result, > FieldMemOperand(heapnumber, HeapNumber::kValueOffset)); > - __ mov(r0, heapnumber); > + ASSERT(heapnumber.is(r0)); > + __ IncrementCounter(counters->math_pow(), 1, scratch, scratch2); > __ Ret(2 * kPointerSize); > - } > - > - __ bind(&call_runtime); > - __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1); > + } else { > + __ push(lr); > + { > + AllowExternalCallThatCantCauseGC scope(masm); > + __ PrepareCallCFunction(0, 2, scratch); > + __ SetCallCDoubleArguments(double_base, double_exponent); > + __ CallCFunction( > + ExternalReference::power_double_double_function(masm->isolate()), > + 0, 2); > + } > + __ pop(lr); > + __ GetCFunctionDoubleResult(double_result); > + > + __ bind(&done); > + __ IncrementCounter(counters->math_pow(), 1, scratch, scratch2); > + __ Ret(); > + } > } > > > ======================================= > --- /branches/bleeding_edge/src/arm/full-codegen-arm.cc Fri Dec 2 00:06:37 > 2011 > +++ /branches/bleeding_edge/src/arm/full-codegen-arm.cc Wed Dec 7 08:55:00 > 2011 > @@ -2938,8 +2938,12 @@ > ASSERT(args->length() == 2); > VisitForStackValue(args->at(0)); > VisitForStackValue(args->at(1)); > - MathPowStub stub(MathPowStub::ON_STACK); > - __ CallStub(&stub); > + if (CpuFeatures::IsSupported(VFP3)) { > + MathPowStub stub(MathPowStub::ON_STACK); > + __ CallStub(&stub); > + } else { > + __ CallRuntime(Runtime::kMath_pow, 2); > + } > context()->Plug(r0); > } > > ======================================= > --- /branches/bleeding_edge/src/arm/lithium-arm.cc Tue Dec 6 01:37:50 > 2011 > +++ /branches/bleeding_edge/src/arm/lithium-arm.cc Wed Dec 7 08:55:00 > 2011 > @@ -1405,7 +1405,7 @@ > LOperand* left = UseFixedDouble(instr->left(), d1); > LOperand* right = exponent_type.IsDouble() ? > UseFixedDouble(instr->right(), d2) : > - UseFixed(instr->right(), r0); > + UseFixed(instr->right(), r2); > LPower* result = new LPower(left, right); > return MarkAsCall(DefineFixedDouble(result, d3), > instr, > ======================================= > --- /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc Wed Dec 7 > 02:13:46 2011 > +++ /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc Wed Dec 7 > 08:55:00 2011 > @@ -3122,61 +3122,34 @@ > > > void LCodeGen::DoPower(LPower* instr) { > - LOperand* left = instr->InputAt(0); > - LOperand* right = instr->InputAt(1); > - Register scratch = scratch0(); > - DoubleRegister result_reg = ToDoubleRegister(instr->result()); > Representation exponent_type = > instr->hydrogen()->right()->representation(); > - if (exponent_type.IsDouble()) { > - // Prepare arguments and call C function. > - __ PrepareCallCFunction(0, 2, scratch); > - __ SetCallCDoubleArguments(ToDoubleRegister(left), > - ToDoubleRegister(right)); > - __ CallCFunction( > - ExternalReference::power_double_double_function(isolate()), 0, 2); > - } else if (exponent_type.IsInteger32()) { > - ASSERT(ToRegister(right).is(r0)); > - // Prepare arguments and call C function. > - __ PrepareCallCFunction(1, 1, scratch); > - __ SetCallCDoubleArguments(ToDoubleRegister(left), ToRegister(right)); > - __ CallCFunction( > - ExternalReference::power_double_int_function(isolate()), 1, 1); > - } else { > - ASSERT(exponent_type.IsTagged()); > - ASSERT(instr->hydrogen()->left()->representation().IsDouble()); > - > - Register right_reg = ToRegister(right); > - > - // Check for smi on the right hand side. > - Label non_smi, call; > - __ JumpIfNotSmi(right_reg, &non_smi); > - > - // Untag smi and convert it to a double. > - __ SmiUntag(right_reg); > - SwVfpRegister single_scratch = double_scratch0().low(); > - __ vmov(single_scratch, right_reg); > - __ vcvt_f64_s32(result_reg, single_scratch); > - __ jmp(&call); > - > - // Heap number map check. > - __ bind(&non_smi); > - __ ldr(scratch, FieldMemOperand(right_reg, HeapObject::kMapOffset)); > + // Having marked this as a call, we can use any registers. > + // Just make sure that the input/output registers are the expected ones. > + ASSERT(!instr->InputAt(1)->IsDoubleRegister() || > + ToDoubleRegister(instr->InputAt(1)).is(d2)); > + ASSERT(!instr->InputAt(1)->IsRegister() || > + ToRegister(instr->InputAt(1)).is(r2)); > + ASSERT(ToDoubleRegister(instr->InputAt(0)).is(d1)); > + ASSERT(ToDoubleRegister(instr->result()).is(d3)); > + > + if (exponent_type.IsTagged()) { > + Label no_deopt; > + __ JumpIfSmi(r2, &no_deopt); > + __ ldr(r7, FieldMemOperand(r2, HeapObject::kMapOffset)); > __ LoadRoot(ip, Heap::kHeapNumberMapRootIndex); > - __ cmp(scratch, Operand(ip)); > + __ cmp(r7, Operand(ip)); > DeoptimizeIf(ne, instr->environment()); > - int32_t value_offset = HeapNumber::kValueOffset - kHeapObjectTag; > - __ add(scratch, right_reg, Operand(value_offset)); > - __ vldr(result_reg, scratch, 0); > - > - // Prepare arguments and call C function. > - __ bind(&call); > - __ PrepareCallCFunction(0, 2, scratch); > - __ SetCallCDoubleArguments(ToDoubleRegister(left), result_reg); > - __ CallCFunction( > - ExternalReference::power_double_double_function(isolate()), 0, 2); > - } > - // Store the result in the result register. > - __ GetCFunctionDoubleResult(result_reg); > + __ bind(&no_deopt); > + MathPowStub stub(MathPowStub::TAGGED); > + __ CallStub(&stub); > + } else if (exponent_type.IsInteger32()) { > + MathPowStub stub(MathPowStub::INTEGER); > + __ CallStub(&stub); > + } else { > + ASSERT(exponent_type.IsDouble()); > + MathPowStub stub(MathPowStub::DOUBLE); > + __ CallStub(&stub); > + } > } > > > -- > v8-dev mailing list > [email protected] > http://groups.google.com/group/v8-dev -- v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev
