Hi,The patch fixes an ICE in gcc/config/arm/arm.c:arm_expand_neon_args (). When the destination address for vst1q_lane_u64 is not aligned, calling expand_normal will get a REG, which is not expected by arm_expand_neon_args, resulting in an assertion failure. Now, call expand_expr with EXPAND_MEMORY to tell the expand that we really want a MEM in the case of NEON_ARG_MEMORY.
OK for the trunk and 4.8 branch? Thanks, Yufeng gcc/ * config/arm/arm.c (arm_expand_neon_args): Call expand_expr with EXPAND_MEMORY for NEON_ARG_MEMORY; check if the returned rtx is const0_rtx or not. gcc/testsuite/ * gcc.target/arm/neon/vst1Q_laneu64-1.c: New test.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8fea2a6..a3b2796 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -24526,7 +24526,11 @@ arm_expand_neon_args (rtx target, int icode, int have_retval, type_mode); } - op[argc] = expand_normal (arg[argc]); + /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P + be returned. */ + op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode, + (thisarg == NEON_ARG_MEMORY + ? EXPAND_MEMORY : EXPAND_NORMAL)); switch (thisarg) { @@ -24545,6 +24549,9 @@ arm_expand_neon_args (rtx target, int icode, int have_retval, break; case NEON_ARG_MEMORY: + /* Check if expand failed. */ + if (op[argc] == const0_rtx) + return 0; gcc_assert (MEM_P (op[argc])); PUT_MODE (op[argc], mode[argc]); /* ??? arm_neon.h uses the same built-in functions for signed diff --git a/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64-1.c b/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64-1.c new file mode 100644 index 0000000..5f4c927 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64-1.c @@ -0,0 +1,25 @@ +/* Test the `vst1Q_laneu64' ARM Neon intrinsic. */ + +/* Detect ICE in the case of unaligned memory address. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +unsigned char dummy_store[1000]; + +void +foo (char* addr) +{ + uint8x16_t vdata = vld1q_u8 (addr); + vst1q_lane_u64 ((uint64_t*) &dummy_store, vreinterpretq_u64_u8 (vdata), 0); +} + +uint64_t +bar (uint64x2_t vdata) +{ + vdata = vld1q_lane_u64 ((uint64_t*) &dummy_store, vdata, 0); + return vgetq_lane_u64 (vdata, 0); +}