[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
This revision was automatically updated to reflect the committed changes. Closed by commit rG55448ab540de: [AArch64] Adding Neon Polynomial vadd Intrinsics (authored by ctetreau). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5371,7 +5371,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -5665,7 +5668,10 @@ NEONMAP0(splatq_laneq_v), NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), @@ -6302,6 +6308,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops,
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
ctetreau accepted this revision. ctetreau added a comment. LGTM. I'll land it for you. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
rsanthir.quic updated this revision to Diff 325034. rsanthir.quic added a comment. rebased on main CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5371,7 +5371,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -5665,7 +5668,10 @@ NEONMAP0(splatq_laneq_v), NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), @@ -6302,6 +6308,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__builtin_neon_vaddq_v: { +llvm::Type *VTy =
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
rsanthir.quic updated this revision to Diff 325027. rsanthir.quic added a comment. Windows builds were failing due to missing builtins in Intrinsics map CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5368,7 +5368,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -5662,7 +5665,10 @@ NEONMAP0(splatq_laneq_v), NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), @@ -6290,6 +6296,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
thakis added a comment. Thanks for the revert! FWIW the failure repros for me locally if I just do `ninja check-clang` (as long as I have the aarch64 target enabled, which it is by default in the cmake build). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
thakis added a comment. Looks like this breaks check-clang: http://45.33.8.238/win/33493/step_7.txt Please take a look, and revert for now if it takes a while to fix. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
This revision was automatically updated to reflect the committed changes. Closed by commit rGd9645059c5de: [AArch64] Adding Neon Polynomial vadd Intrinsics (authored by pzheng). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5371,7 +5371,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -6302,6 +6305,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__builtin_neon_vaddq_v: { +llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8); +Ops[0] = Builder.CreateBitCast(Ops[0], VTy); +Ops[1] = Builder.CreateBitCast(Ops[1], VTy); +Ops[0] = Builder.CreateXor(Ops[0], Ops[1]); +return Builder.CreateBitCast(Ops[0], Ty); + } case
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
DavidSpickett accepted this revision. DavidSpickett added a comment. LGTM CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
rsanthir.quic updated this revision to Diff 324336. rsanthir.quic marked an inline comment as done. rsanthir.quic retitled this revision from "[AArch64] Adding Polynomial vadd Intrinsics support" to "[AArch64] Adding Neon Polynomial vadd Intrinsics". rsanthir.quic added a comment. removed unnecessary flag in test and changed commit message. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5368,7 +5368,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -6299,6 +6302,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__builtin_neon_vaddq_v: { +llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8); +Ops[0] = Builder.CreateBitCast(Ops[0], VTy); +Ops[1] =