Hi, Here's a revised version of the patch that addresses Segher's comments. Bootstrapped and tested on powerpc64le-unknown-linux-gnu. Ok for trunk?
Thanks, Bill [gcc] 2014-10-02 Bill Schmidt <wschm...@linux.vnet.ibm.com> * altivec.md (altivec_lvsl): New define_expand. (altivec_lvsl_direct): Rename define_insn from altivec_lvsl. (altivec_lvsr): New define_expand. (altivec_lvsr_direct): Rename define_insn from altivec_lvsr. * rs6000.c (rs6000_expand_builtin): Change to use altivec_lvs[lr]_direct; remove commented-out code. [gcc/testsuite] 2014-10-02 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.target/powerpc/lvsl-lvsr.c: New test. Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 215689) +++ gcc/config/rs6000/altivec.md (working copy) @@ -2297,7 +2297,31 @@ "dststt %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_lvsl" +(define_expand "altivec_lvsl" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsl_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsl_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] UNSPEC_LVSL))] @@ -2305,7 +2329,31 @@ "lvsl %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvsr" +(define_expand "altivec_lvsr" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsr_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsr_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] UNSPEC_LVSR))] Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 215689) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -13898,8 +13898,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s case ALTIVEC_BUILTIN_MASK_FOR_LOAD: case ALTIVEC_BUILTIN_MASK_FOR_STORE: { - int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr - : (int) CODE_FOR_altivec_lvsl); + int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct + : (int) CODE_FOR_altivec_lvsl_direct); enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode = insn_data[icode].operand[1].mode; tree arg; @@ -13927,7 +13927,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx s || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); - /*pat = gen_altivec_lvsr (target, op);*/ pat = GEN_FCN (icode) (target, op); if (!pat) return 0; Index: gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c (working copy) @@ -0,0 +1,21 @@ +/* Test expected code generation for lvsl and lvsr on little endian. + Note that lvsl and lvsr are each produced once, but the filename + causes them to appear twice in the file. */ + +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-options "-O0 -Wno-deprecated" } */ +/* { dg-final { scan-assembler-times "lvsl" 2 } } */ +/* { dg-final { scan-assembler-times "lvsr" 2 } } */ +/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */ +/* { dg-final { scan-assembler-times "vperm" 2 } } */ + + +#include <altivec.h> + +float f[20]; + +void foo () +{ + vector unsigned char a = vec_lvsl (4, f); + vector unsigned char b = vec_lvsr (8, f); +}