llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) <details> <summary>Changes</summary> When we know that one operand of an addition is a constant, we might was well put it on the right-hand side and avoid the work to canonicalize it in a later pass. --- Full diff: https://github.com/llvm/llvm-project/pull/157810.diff 4 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll (+3-3) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index bb77cdff778c0..7dbe1235a98b5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -478,7 +478,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, ConstantInt *ConstIndex = ConstantInt::get(OffsetType, IndexQuot.getSExtValue()); - Value *IndexAdd = Builder.CreateAdd(ConstIndex, Offset); + Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex); if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd)) NewInsts.push_back(NewInst); return IndexAdd; diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll index d72f158763c61..63622e67e7d0b 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll @@ -312,7 +312,7 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out) ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 6, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 6 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 @@ -464,7 +464,7 @@ define amdgpu_kernel void @i16_2d_load_store(ptr %out, i32 %sel) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <6 x i16> [[TMP3]], i16 3, i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i16> [[TMP4]], i16 4, i32 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i16> [[TMP5]], i16 5, i32 5 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SEL]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i16> [[TMP6]], i32 [[TMP1]] ; CHECK-NEXT: store i16 [[TMP2]], ptr [[OUT]], align 2 ; CHECK-NEXT: ret void @@ -498,7 +498,7 @@ define amdgpu_kernel void @float_2d_load_store(ptr %out, i32 %sel) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <6 x float> [[TMP3]], float 3.000000e+00, i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x float> [[TMP4]], float 4.000000e+00, i32 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x float> [[TMP5]], float 5.000000e+00, i32 5 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SEL]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x float> [[TMP6]], i32 [[TMP1]] ; CHECK-NEXT: store float [[TMP2]], ptr [[OUT]], align 4 ; CHECK-NEXT: ret void @@ -538,7 +538,7 @@ define amdgpu_kernel void @ptr_2d_load_store(ptr %out, i32 %sel) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <6 x ptr> [[TMP3]], ptr [[PTR_3]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x ptr> [[TMP4]], ptr [[PTR_4]], i32 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x ptr> [[TMP5]], ptr [[PTR_5]], i32 5 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 3, [[SEL]] +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SEL]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x ptr> [[TMP6]], i32 [[TMP7]] ; CHECK-NEXT: store ptr [[TMP8]], ptr [[OUT]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll index 1b6ac0bd93c19..a865bf5058d6a 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll @@ -11,7 +11,7 @@ define amdgpu_kernel void @negative_index_byte(ptr %out, i64 %offset) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 1, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 2, i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> [[TMP3]], i8 3, i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 -1, [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i64 [[TMP5]] ; CHECK-NEXT: store i8 [[TMP6]], ptr [[OUT:%.*]], align 1 ; CHECK-NEXT: ret void @@ -39,7 +39,7 @@ define amdgpu_kernel void @negative_index_word(ptr %out, i64 %offset) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 1, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 2, i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 3, i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 -1, [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i64 [[TMP5]] ; CHECK-NEXT: store i32 [[TMP6]], ptr [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll index a24f041a17857..f95a6a8ec9b45 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll @@ -10,7 +10,7 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep(i32 %idx, ptr ad ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <20 x i32> [[TMP1]], i32 2, i32 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[IDX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 1, [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <20 x i32> [[TMP3]], i32 [[TMP5]] ; CHECK-NEXT: store i32 [[TMP6]], ptr addrspace(1) [[OUTPUT]], align 4 ; CHECK-NEXT: ret void @@ -31,12 +31,12 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep3(i32 %idx, ptr a ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <16 x i32> poison ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[IDX]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 8, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[ALLOCA]], i32 10, i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP2]], i32 20, i32 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[IDX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 9, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 9 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP4]], i32 [[TMP6]] ; CHECK-NEXT: store i32 [[TMP7]], ptr addrspace(1) [[OUTPUT]], align 4 ; CHECK-NEXT: ret void `````````` </details> https://github.com/llvm/llvm-project/pull/157810 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits