+(define_expand "aarch64_ld2_lane<VQ:mode>"
+  [(match_operand:OI 0 "register_operand" "=w")
+       (match_operand:DI 1 "register_operand" "w")
+       (match_operand:OI 2 "register_operand" "0")
+       (match_operand:SI 3 "immediate_operand" "i")
+       (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_TWO_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+

The endianess lane correction breaks this for BE.

You don't need the endianess lane correction here - we always call neon intrinsics with the architectural lane number - irrespective of endianness. Unless ofcourse you flip it somewhere to make it a part of RTL vec_select lane patterns, which you don't here.

You could also do some lane-bounds checking here in the expander.

+  emit_insn (gen_vec_load_lanesoi_lane<VQ:mode> (operands[0],
+                                                 mem,
+                                                 operands[2],
+                                                 operands[3]));
+  DONE;
+})
+
+(define_expand "aarch64_ld3_lane<VQ:mode>"
+  [(match_operand:CI 0 "register_operand" "=w")
+       (match_operand:DI 1 "register_operand" "w")
+       (match_operand:CI 2 "register_operand" "0")
+       (match_operand:SI 3 "immediate_operand" "i")
+       (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_THREE_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+

No endianness correction for lanes necessary.

+  emit_insn (gen_vec_load_lanesci_lane<VQ:mode> (operands[0],
+                                                 mem,
+                                                 operands[2],
+                                                 operands[3]));
+  DONE;
+})
+
+(define_expand "aarch64_ld4_lane<VQ:mode>"
+  [(match_operand:XI 0 "register_operand" "=w")
+       (match_operand:DI 1 "register_operand" "w")
+       (match_operand:XI 2 "register_operand" "0")
+       (match_operand:SI 3 "immediate_operand" "i")
+       (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_FOUR_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+

Same.

+  emit_insn (gen_vec_load_lanesxi_lane<VQ:mode> (operands[0],
+                                                 mem,
+                                                 operands[2],
+                                                 operands[3]));
+  DONE;
+})
+
+
+
  ;; Expanders for builtins to extract vector registers from large
  ;; opaque integer modes.

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c60038a..ea924ab 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -92,6 +92,9 @@
      UNSPEC_LD2
      UNSPEC_LD3
      UNSPEC_LD4
+    UNSPEC_LD2_LANE
+    UNSPEC_LD3_LANE
+    UNSPEC_LD4_LANE
      UNSPEC_MB
      UNSPEC_NOP
      UNSPEC_PRLG_STK



Thanks,
Tejas.

Reply via email to