On Wed, Jun 07, 2017 at 12:38:37PM +0100, Tamar Christina wrote: > Hi All, > > > This patch adds support for creating floating point constants > using mov immediate instructions. The movi SIMD instruction can > be used for HFmode and SFmode constants, eg. for -0.0f we generate: > > movi v0.2s, 0x80, lsl 24 > > More complex constants can be generated using an integer MOV or > MOV+MOVK: > > mov w0, 48128 > movk w0, 0x47f0, lsl 16 > fmov s0, w0 > > We allow up to 3 instructions as this allows all HF, SF and most DF > constants to be generated without a literal load, and is overall best > for codesize. > > > Regression tested on aarch64-none-linux-gnu and no regressions. > > OK for trunk? > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..7f107672882b13809be01355ffafbc2807cc5adb > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1167,66 +1167,120 @@ > } > ) > > -(define_insn "*movhf_aarch64" > - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r") > - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))] > +(define_insn_and_split "*movhf_aarch64" > + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w > ,w,m,r,m ,r") > + (match_operand:HF 1 "general_operand" "Y ,?rY, > w,w,Ufc,Uvi,m,w,m,rY,r"))] > "TARGET_FLOAT && (register_operand (operands[0], HFmode) > - || aarch64_reg_or_fp_zero (operands[1], HFmode))" > + || aarch64_reg_or_fp_float (operands[1], HFmode))" > "@ > movi\\t%0.4h, #0 > - mov\\t%0.h[0], %w1 > + fmov\\t%s0, %w1
Should this not be %h0? > umov\\t%w0, %1.h[0] > mov\\t%0.h[0], %1.h[0] > + fmov\\t%s0, %1 Likewise, and much more important for correctness as it changes the way the bit pattern ends up in the register (see table C2-1 in release B.a of the ARM Architecture Reference Manual for ARMv8-A), here. > + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); > ldr\\t%h0, %1 > str\\t%h1, %0 > ldrh\\t%w0, %1 > strh\\t%w1, %0 > mov\\t%w0, %w1" > - [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\ > - f_loads,f_stores,load1,store1,mov_reg") > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] > + "&& can_create_pseudo_p () > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode) > + && !aarch64_float_const_representable_p (operands[1]) > + && aarch64_float_const_rtx_p (operands[1])" > + [(const_int 0)] > + "{ > + unsigned HOST_WIDE_INT ival; > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) > + FAIL; > + > + rtx tmp = gen_reg_rtx (SImode); > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival)); > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0); > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp)); > + DONE; > + }" > + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \ > + neon_move,f_loads,f_stores,load1,store1,mov_reg") > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")] > ) Thanks, James