llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangir Author: Jiahao Guo (E00N777) <details> <summary>Changes</summary> ### summary part of : https://github.com/llvm/llvm-project/issues/185382 Adds ClangIR support for all AArch64 NEON pairwise-maximum intrinsics (vpmax*) in https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#pairwise-maximum Also enables the scalar pairwise reductions vpmaxs_f32, vpmaxqd_f64, vpmaxnms_f32, and vpmaxnmqd_f64 by routing them through the common NEON SISD path (fmaxv / fmaxnmv). --- Patch is 27.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/201495.diff 3 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+8-4) - (modified) clang/test/CodeGen/AArch64/neon-intrinsics.c (-240) - (modified) clang/test/CodeGen/AArch64/neon/intrinsics.c (+264) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 0b4581e13f0a1..cd3b15db9a23a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -436,6 +436,10 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr( case NEON::BI__builtin_neon_vqrshrnd_n_u64: case NEON::BI__builtin_neon_vsrid_n_s64: case NEON::BI__builtin_neon_vsrid_n_u64: + case NEON::BI__builtin_neon_vpmaxs_f32: + case NEON::BI__builtin_neon_vpmaxqd_f64: + case NEON::BI__builtin_neon_vpmaxnms_f32: + case NEON::BI__builtin_neon_vpmaxnmqd_f64: break; } @@ -2734,10 +2738,10 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); case NEON::BI__builtin_neon_vpmax_v: case NEON::BI__builtin_neon_vpmaxq_v: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AArch64 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return mlir::Value{}; + intrName = usgn ? "aarch64.neon.umaxp" : "aarch64.neon.smaxp"; + if (cir::isFPOrVectorOfFPType(ty)) + intrName = "aarch64.neon.fmaxp"; + return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc); case NEON::BI__builtin_neon_vminnm_v: case NEON::BI__builtin_neon_vminnmq_v: intrName = "aarch64.neon.fminnm"; diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 6c514cee8fdc2..b5cfc96196b85 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -4458,206 +4458,6 @@ uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { return vqrshlq_u64(a, b); } -// CHECK-LABEL: define dso_local <8 x i8> @test_vpmax_s8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VPMAX_I]] -// -int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { - return vpmax_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vpmax_s16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) -// CHECK-NEXT: ret <4 x i16> [[VPMAX2_I]] -// -int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { - return vpmax_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vpmax_s32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) -// CHECK-NEXT: ret <2 x i32> [[VPMAX2_I]] -// -int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { - return vpmax_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vpmax_u8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VPMAX_I]] -// -uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { - return vpmax_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vpmax_u16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) -// CHECK-NEXT: ret <4 x i16> [[VPMAX2_I]] -// -uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { - return vpmax_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vpmax_u32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) -// CHECK-NEXT: ret <2 x i32> [[VPMAX2_I]] -// -uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { - return vpmax_u32(a, b); -} - -// CHECK-LABEL: define dso_local <2 x float> @test_vpmax_f32( -// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) -// CHECK-NEXT: ret <2 x float> [[VPMAX2_I]] -// -float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { - return vpmax_f32(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vpmaxq_s8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) -// CHECK-NEXT: ret <16 x i8> [[VPMAX_I]] -// -int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { - return vpmaxq_s8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vpmaxq_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) -// CHECK-NEXT: ret <8 x i16> [[VPMAX2_I]] -// -int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) { - return vpmaxq_s16(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vpmaxq_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) -// CHECK-NEXT: ret <4 x i32> [[VPMAX2_I]] -// -int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) { - return vpmaxq_s32(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vpmaxq_u8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) -// CHECK-NEXT: ret <16 x i8> [[VPMAX_I]] -// -uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) { - return vpmaxq_u8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vpmaxq_u16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) -// CHECK-NEXT: ret <8 x i16> [[VPMAX2_I]] -// -uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) { - return vpmaxq_u16(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vpmaxq_u32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) -// CHECK-NEXT: ret <4 x i32> [[VPMAX2_I]] -// -uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) { - return vpmaxq_u32(a, b); -} - -// CHECK-LABEL: define dso_local <4 x float> @test_vpmaxq_f32( -// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]]) -// CHECK-NEXT: ret <4 x float> [[VPMAX2_I]] -// -float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) { - return vpmaxq_f32(a, b); -} - -// CHECK-LABEL: define dso_local <2 x double> @test_vpmaxq_f64( -// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8> -// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double> -// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]]) -// CHECK-NEXT: ret <2 x double> [[VPMAX2_I]] -// -float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) { - return vpmaxq_f64(a, b); -} - // CHECK-LABEL: define dso_local <2 x float> @test_vpmaxnm_f32( // CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -9096,46 +8896,6 @@ float64_t test_vpaddd_f64(float64x2_t a) { return vpaddd_f64(a); } -// CHECK-LABEL: define dso_local float @test_vpmaxnms_f32( -// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[A]]) -// CHECK-NEXT: ret float [[VPMAXNMS_F32_I]] -// -float32_t test_vpmaxnms_f32(float32x2_t a) { - return vpmaxnms_f32(a); -} - -// CHECK-LABEL: define dso_local double @test_vpmaxnmqd_f64( -// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[A]]) -// CHECK-NEXT: ret double [[VPMAXNMQD_F64_I]] -// -float64_t test_vpmaxnmqd_f64(float64x2_t a) { - return vpmaxnmqd_f64(a); -} - -// CHECK-LABEL: define dso_local float @test_vpmaxs_f32( -// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[A]]) -// CHECK-NEXT: ret float [[VPMAXS_F32_I]] -// -float32_t test_vpmaxs_f32(float32x2_t a) { - return vpmaxs_f32(a); -} - -// CHECK-LABEL: define dso_local double @test_vpmaxqd_f64( -// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[A]]) -// CHECK-NEXT: ret double [[VPMAXQD_F64_I]] -// -float64_t test_vpmaxqd_f64(float64x2_t a) { - return vpmaxqd_f64(a); -} - // CHECK-LABEL: define dso_local i16 @test_vqdmulhh_s16( // CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index c783ea57deff0..423fd275ade7c 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -6129,3 +6129,267 @@ poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) { // LLVM: ret <2 x i64> [[VSRI_N2]] return vsriq_n_p64(a, b, 64); } + +//===----------------------------------------------------------------------===// +// 2.1.1.12.3. Pairwise maximum +// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#pairwise-maximum +//===----------------------------------------------------------------------===// + +// LLVM-LABEL: @test_vpmax_s8( +// CIR-LABEL: @vpmax_s8( +int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]]) +// LLVM-NEXT: ret <8 x i8> [[VPMAX_I]] + return vpmax_s8(a, b); +} + +// LLVM-LABEL: @test_vpmax_s16( +// CIR-LABEL: @vpmax_s16( +int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s16i>, !cir.vector<4 x !s16i>) -> !cir.vector<4 x !s16i> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> +// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) +// LLVM-NEXT: ret <4 x i16> [[VPMAX2_I]] + return vpmax_s16(a, b); +} + +// LLVM-LABEL: @test_vpmax_s32( +// CIR-LABEL: @vpmax_s32( +int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{{.*}} : (!cir.vector<2 x !s32i>, !cir.vector<2 x !s32i>) -> !cir.vector<2 x !s32i> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> +// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> +// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) +// LLVM-NEXT: ret <2 x i32> [[VPMAX2_I]] + return vpmax_s32(a, b); +} + +// LLVM-LABEL: @test_vpmax_u8( +// CIR-LABEL: @vpmax_u8( +uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]]) +// LLVM-NEXT: ret <8 x i8> [[VPMAX_I]] + return vpmax_u8(a, b); +} + +// LLVM-LABEL: @test_vpmax_u16( +// CIR-LABEL: @vpmax_u16( +uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<4 x !u16i>, !cir.vector<4 x !u16i>) -> !cir.vector<4 x !u16i> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> +// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) +// LLVM-NEXT: ret <4 x i16> [[VPMAX2_I]] + return vpmax_u16(a, b); +} + +// LLVM-LABEL: @test_vpmax_u32( +// CIR-LABEL: @vpmax_u32( +uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.umaxp" %{{.*}}, %{{.*}} : (!cir.vector<2 x !u32i>, !cir.vector<2 x !u32i>) -> !cir.vector<2 x !u32i> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> +// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> +// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) +// LLVM-NEXT: ret <2 x i32> [[VPMAX2_I]] + return vpmax_u32(a, b); +} + +// LLVM-LABEL: @test_vpmax_f32( +// CIR-LABEL: @vpmax_f32( +float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.fmaxp" %{{.*}}, %{{.*}} : (!cir.vector<2 x !cir.float>, !cir.vector<2 x !cir.float>) -> !cir.vector<2 x !cir.float> + +// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]], <2 x float> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> +// LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8> +// LLVM-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> +// LLVM-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float> +// LLVM-NEXT: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) +// LLVM-NEXT: ret <2 x float> [[VPMAX2_I]] + return vpmax_f32(a, b); +} + +// LLVM-LABEL: @test_vpmaxq_s8( +// CIR-LABEL: @vpmaxq_s8( +int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { +// CIR: cir.call_llvm_intrinsic "aarch64.neon.smaxp" %{{.*}}, %{... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/201495 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
