[PATCH] D71469: [AArch64] Add IR intrinsics for sq(r)dmulh_lane(q)
This revision was automatically updated to reflect the committed changes. Closed by commit rG2939fc13c8f6: [AArch64] Add IR intrinsics for sq(r)dmulh_lane(q) (authored by sanwou01). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71469/new/ https://reviews.llvm.org/D71469 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-2velem.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp llvm/lib/Target/AArch64/AArch64RegisterInfo.td llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll Index: llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll === --- llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -9,20 +9,36 @@ declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32) declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32) declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32) declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32) declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32) +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32) declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32) declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32) +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32) declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32) declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) @@ -1515,6 +1531,37 @@ ret <4 x i16> %vqdmulh2.i } +define <4 x i16> @test_vqdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_lane_s16_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:// kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT:sqdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT:ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16> %a, <4 x i16> %v, i32 3) + ret <4 x i16> %vqdmulh2.i +} + +define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_lo(<4 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sqdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT:ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 3) + ret <4 x i16> %vqdmulh2.i +} + +define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_hi(<4 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sqdmulh v0.4h, v0.4h, v1.h[7] +; CHECK-NEXT:ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 7) + ret <4 x i16> %vqdmulh2.i +} + define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s16: ; CHECK: // %bb.0: // %entry @@ -1527,6 +1574,37 @@ ret <8 x i16> %vqdmulh2.i } +define <8 x i16> @test_vqdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqdmulhq_lane_s16_intrinsic: +; CHECK: // %bb.0: // %entry +;
[PATCH] D71469: [AArch64] Add IR intrinsics for sq(r)dmulh_lane(q)
efriedma accepted this revision. efriedma added a comment. This revision is now accepted and ready to land. LGTM Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71469/new/ https://reviews.llvm.org/D71469 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D71469: [AArch64] Add IR intrinsics for sq(r)dmulh_lane(q)
sanwou01 updated this revision to Diff 240902. sanwou01 retitled this revision from "[AArch64] Add sq(r)dmulh_lane(q) LLVM IR intrinsics" to "[AArch64] Add IR intrinsics for sq(r)dmulh_lane(q)". sanwou01 edited the summary of this revision. sanwou01 added a comment. Address Eli's feedback; clarified commit message. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71469/new/ https://reviews.llvm.org/D71469 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-2velem.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp llvm/lib/Target/AArch64/AArch64RegisterInfo.td llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll Index: llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll === --- llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -9,20 +9,36 @@ declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32) declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32) declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32) declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32) declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32>, <2 x i32>, i32) +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32>, <4 x i32>, i32) declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32>, <4 x i32>, i32) declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16>, <4 x i16>, i32) +declare <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16>, <8 x i16>, i32) declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16>, <4 x i16>, i32) +declare <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16>, <8 x i16>, i32) declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) @@ -1515,6 +1531,37 @@ ret <4 x i16> %vqdmulh2.i } +define <4 x i16> @test_vqdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_lane_s16_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:// kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT:sqdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT:ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16> %a, <4 x i16> %v, i32 3) + ret <4 x i16> %vqdmulh2.i +} + +define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_lo(<4 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_lo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sqdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT:ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 3) + ret <4 x i16> %vqdmulh2.i +} + +define <4 x i16> @test_vqdmulh_laneq_s16_intrinsic_hi(<4 x i16> %a, <8 x i16> %v) { +; CHECK-LABEL: test_vqdmulh_laneq_s16_intrinsic_hi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sqdmulh v0.4h, v0.4h, v1.h[7] +; CHECK-NEXT:ret +entry: + %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> %a, <8 x i16> %v, i32 7) + ret <4 x i16> %vqdmulh2.i +} + define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s16: ; CHECK: // %bb.0: // %entry @@ -1527,6 +1574,37 @@ ret <8 x i16> %vqdmulh2.i } +define <8 x i16>