Author: Sander de Smalen Date: 2023-07-24T14:29:45Z New Revision: a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f
URL: https://github.com/llvm/llvm-project/commit/a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f DIFF: https://github.com/llvm/llvm-project/commit/a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f.diff LOG: [Clang][AArch64] svldr_vnum/svstr_vnum should use cntsb iso vscale for the offset The specification for LDR/STR says that: The ZA array vector is selected by the sum of the vector select register and immediate offset, modulo the number of bytes in a Streaming SVE vector. [..] This instruction does not require the PE to be in Streaming SVE mode When the instruction is used outside of streaming mode, 'vscale' will result in the wrong value being used for the offset because LLVM's code-generator will emit the non-streaming 'RDVL/ADDVL' instead of the 'RDSVL/ADDSVL' instructions which are used to get the Streaming-SVE vector length. Reviewed By: bryanpkc Differential Revision: https://reviews.llvm.org/D156121 Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b7fbafda0e5336..d5ad35c0c93602 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9508,11 +9508,11 @@ Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { - Function *Vscale = CGM.getIntrinsic(Intrinsic::vscale, Int64Ty); - llvm::Value *VscaleCall = Builder.CreateCall(Vscale, {}, "vscale"); + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); llvm::Value *MulVL = Builder.CreateMul( - VscaleCall, - Builder.getInt64(16 * cast<llvm::ConstantInt>(Ops[1])->getZExtValue()), + CntsbCall, + Builder.getInt64(cast<llvm::ConstantInt>(Ops[1])->getZExtValue()), "mulvl"); Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL); Ops[0] = EmitTileslice(Ops[1], Ops[0]); diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index c96df003da6efc..7efa8b1556857c 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -18,8 +18,8 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { // CHECK-C-LABEL: @test_svldr_vnum_za_1( // CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv( // CHECK-NEXT: entry: -// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]]) diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c index d4b1b9fa908acb..12aa298858a18e 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -18,8 +18,8 @@ void test_svstr_vnum_za(uint32_t slice_base, void *ptr) { // CHECK-C-LABEL: @test_svstr_vnum_za_1( // CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_1jPv( // CHECK-NEXT: entry: -// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240 +// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]]) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits