Hi t.p.northover,

Hi Tim and reviewers,

Some ACLE pair builtin functions are only defined in arm_neon.h but not handled 
except that. So we can't generate coresponding IR and code when calling such 
intrinsics. 
For example:
    "int32_t vminv_s32(int32x2_t a)" should be matched to an instruction "SMINP 
Vd.2S,Vn.2S,Vm.2S"
But we only have "vminv_s32" defined in arm_neon.h and missing other process to 
compile it. Tests about vminv_s32 will be failed.

This patch implements such intrinsics.
Review. Please.

Thanks,
-Hao



http://llvm-reviews.chandlerc.com/D2348

Files:
  lib/Target/AArch64/AArch64InstrNEON.td
  test/CodeGen/AArch64/neon-add-pairwise.ll
  test/CodeGen/AArch64/neon-max-min-pairwise.ll
  test/CodeGen/AArch64/neon-misc.ll
  tools/clang/lib/CodeGen/CGBuiltin.cpp
  tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
Index: lib/Target/AArch64/AArch64InstrNEON.td
===================================================================
--- lib/Target/AArch64/AArch64InstrNEON.td
+++ lib/Target/AArch64/AArch64InstrNEON.td
@@ -978,6 +978,20 @@
                                       int_aarch64_neon_vmulx,
                                       v2f32, v4f32, v2f64, 1>;
 
+// Patterns to match llvm.aarch64.* intrinsic for 
+// ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
+class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
+  : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
+        (EXTRACT_SUBREG
+             (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
+             sub_32)>;
+
+def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
+def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
+def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
+def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
+def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
+
 // Vector Immediate Instructions
 
 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
@@ -7695,6 +7709,11 @@
 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
                                 int_arm_neon_vpaddlu>;
 
+def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
+          (SADDLP2s1d $Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
+          (UADDLP2s1d $Rn)>;
+
 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                              SDPatternOperator Neon_Padd> {
   let Constraints = "$src = $Rd" in {
Index: test/CodeGen/AArch64/neon-add-pairwise.ll
===================================================================
--- test/CodeGen/AArch64/neon-add-pairwise.ll
+++ test/CodeGen/AArch64/neon-add-pairwise.ll
@@ -90,3 +90,12 @@
         ret <2 x double> %val
 }
 
+define i32 @test_vaddv.v2i32(<2 x i32> %a) {
+; CHECK-LABEL: test_vaddv.v2i32
+; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %1 = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32> %a)
+  %2 = extractelement <1 x i32> %1, i32 0
+  ret i32 %2
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32>)
\ No newline at end of file
Index: test/CodeGen/AArch64/neon-max-min-pairwise.ll
===================================================================
--- test/CodeGen/AArch64/neon-max-min-pairwise.ll
+++ test/CodeGen/AArch64/neon-max-min-pairwise.ll
@@ -308,3 +308,39 @@
         ret <2 x double> %val
 }
 
+define i32 @test_vminv_s32(<2 x i32> %a) {
+; CHECK-LABEL: test_vminv_s32
+; CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %1 = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32> %a)
+  %2 = extractelement <1 x i32> %1, i32 0
+  ret i32 %2
+}
+
+define i32 @test_vminv_u32(<2 x i32> %a) {
+; CHECK-LABEL: test_vminv_u32
+; CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %1 = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32> %a)
+  %2 = extractelement <1 x i32> %1, i32 0
+  ret i32 %2
+}
+
+define i32 @test_vmaxv_s32(<2 x i32> %a) {
+; CHECK-LABEL: test_vmaxv_s32
+; CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %1 = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32> %a)
+  %2 = extractelement <1 x i32> %1, i32 0
+  ret i32 %2
+}
+
+define i32 @test_vmaxv_u32(<2 x i32> %a) {
+; CHECK-LABEL: test_vmaxv_u32
+; CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %1 = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32> %a)
+  %2 = extractelement <1 x i32> %1, i32 0
+  ret i32 %2
+}
+
+declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32>)
\ No newline at end of file
Index: test/CodeGen/AArch64/neon-misc.ll
===================================================================
--- test/CodeGen/AArch64/neon-misc.ll
+++ test/CodeGen/AArch64/neon-misc.ll
@@ -1796,4 +1796,23 @@
 declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
 declare <1 x double> @llvm.sqrt.v1f64(<1 x double>)
 declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
-declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
\ No newline at end of file
+declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
+
+define i64 @test_vaddlv_s32(<2 x i32> %a) {
+; CHECK-LABEL: test_vaddlv_s32
+; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a)
+  %2 = extractelement <1 x i64> %1, i32 0
+  ret i64 %2
+}
+
+define i64 @test_vaddlv_u32(<2 x i32> %a) {
+; CHECK-LABEL: test_vaddlv_u32
+; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a)
+  %2 = extractelement <1 x i64> %1, i32 0
+  ret i64 %2
+}
+
+declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>)
+declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>)
\ No newline at end of file
Index: tools/clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2026,6 +2026,7 @@
   // The followings are intrinsics with scalar results generated AcrossVec vectors
   case AArch64::BI__builtin_neon_vaddlv_s8:
   case AArch64::BI__builtin_neon_vaddlv_s16:
+  case AArch64::BI__builtin_neon_vaddlv_s32:
   case AArch64::BI__builtin_neon_vaddlvq_s8:
   case AArch64::BI__builtin_neon_vaddlvq_s16:
   case AArch64::BI__builtin_neon_vaddlvq_s32:
@@ -2033,6 +2034,7 @@
     AcrossVec = true; ExtendEle = true; s = "saddlv"; break;
   case AArch64::BI__builtin_neon_vaddlv_u8:
   case AArch64::BI__builtin_neon_vaddlv_u16:
+  case AArch64::BI__builtin_neon_vaddlv_u32:
   case AArch64::BI__builtin_neon_vaddlvq_u8:
   case AArch64::BI__builtin_neon_vaddlvq_u16:
   case AArch64::BI__builtin_neon_vaddlvq_u32:
@@ -2040,6 +2042,7 @@
     AcrossVec = true; ExtendEle = true; s = "uaddlv"; break;
   case AArch64::BI__builtin_neon_vmaxv_s8:
   case AArch64::BI__builtin_neon_vmaxv_s16:
+  case AArch64::BI__builtin_neon_vmaxv_s32:
   case AArch64::BI__builtin_neon_vmaxvq_s8:
   case AArch64::BI__builtin_neon_vmaxvq_s16:
   case AArch64::BI__builtin_neon_vmaxvq_s32:
@@ -2047,6 +2050,7 @@
     AcrossVec = true; ExtendEle = false; s = "smaxv"; break;
   case AArch64::BI__builtin_neon_vmaxv_u8:
   case AArch64::BI__builtin_neon_vmaxv_u16:
+  case AArch64::BI__builtin_neon_vmaxv_u32:
   case AArch64::BI__builtin_neon_vmaxvq_u8:
   case AArch64::BI__builtin_neon_vmaxvq_u16:
   case AArch64::BI__builtin_neon_vmaxvq_u32:
@@ -2054,6 +2058,7 @@
     AcrossVec = true; ExtendEle = false; s = "umaxv"; break;
   case AArch64::BI__builtin_neon_vminv_s8:
   case AArch64::BI__builtin_neon_vminv_s16:
+  case AArch64::BI__builtin_neon_vminv_s32:
   case AArch64::BI__builtin_neon_vminvq_s8:
   case AArch64::BI__builtin_neon_vminvq_s16:
   case AArch64::BI__builtin_neon_vminvq_s32:
@@ -2061,6 +2066,7 @@
     AcrossVec = true; ExtendEle = false; s = "sminv"; break;
   case AArch64::BI__builtin_neon_vminv_u8:
   case AArch64::BI__builtin_neon_vminv_u16:
+  case AArch64::BI__builtin_neon_vminv_u32:
   case AArch64::BI__builtin_neon_vminvq_u8:
   case AArch64::BI__builtin_neon_vminvq_u16:
   case AArch64::BI__builtin_neon_vminvq_u32:
@@ -2068,12 +2074,14 @@
     AcrossVec = true; ExtendEle = false; s = "uminv"; break;
   case AArch64::BI__builtin_neon_vaddv_s8:
   case AArch64::BI__builtin_neon_vaddv_s16:
+  case AArch64::BI__builtin_neon_vaddv_s32:
   case AArch64::BI__builtin_neon_vaddvq_s8:
   case AArch64::BI__builtin_neon_vaddvq_s16:
   case AArch64::BI__builtin_neon_vaddvq_s32:
   case AArch64::BI__builtin_neon_vaddvq_s64:
   case AArch64::BI__builtin_neon_vaddv_u8:
   case AArch64::BI__builtin_neon_vaddv_u16:
+  case AArch64::BI__builtin_neon_vaddv_u32:
   case AArch64::BI__builtin_neon_vaddvq_u8:
   case AArch64::BI__builtin_neon_vaddvq_u16:
   case AArch64::BI__builtin_neon_vaddvq_u32:
Index: tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
===================================================================
--- tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ tools/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -11723,3 +11723,51 @@
   return vrsqrts_f64(a, b);
   // CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
 }
+
+int32_t test_vminv_s32(int32x2_t a) {
+  // CHECK-LABEL: test_vminv_s32
+  return vminv_s32(a);
+  // CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint32_t test_vminv_u32(uint32x2_t a) {
+  // CHECK-LABEL: test_vminv_u32
+  return vminv_u32(a);
+  // CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int32_t test_vmaxv_s32(int32x2_t a) {
+  // CHECK-LABEL: test_vmaxv_s32
+  return vmaxv_s32(a);
+  // CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint32_t test_vmaxv_u32(uint32x2_t a) {
+  // CHECK-LABEL: test_vmaxv_u32
+  return vmaxv_u32(a);
+  // CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int32_t test_vaddv_s32(int32x2_t a) {
+  // CHECK-LABEL: test_vaddv_s32
+  return vaddv_s32(a);
+  // CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint32_t test_vaddv_u32(uint32x2_t a) {
+  // CHECK-LABEL: test_vaddv_u32
+  return vaddv_u32(a);
+  // CHECK: addp {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64_t test_vaddlv_s32(int32x2_t a) {
+  // CHECK-LABEL: test_vaddlv_s32
+  return vaddlv_s32(a);
+  // CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
+}
+
+uint64_t test_vaddlv_u32(uint32x2_t a) {
+  // CHECK-LABEL: test_vaddlv_u32
+  return vaddlv_u32(a);
+  // CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
+}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to