================ @@ -203,17 +203,17 @@ define i32 @reverse_store_with_partial_reduction(ptr noalias %dst, ptr noalias % ; CHECK-NEXT: [[N_MOD_VF10:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC11:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF10]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i64 [[N]], [[N_VEC11]] -; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI13:%.*]] = phi <2 x i32> [ [[TMP33]], %[[VEC_EPILOG_PH]] ], [ [[PARTIAL_REDUCE16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI13:%.*]] = phi <4 x i32> [ [[TMP26]], %[[VEC_EPILOG_PH]] ], [ [[TMP33:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX12]] ; CHECK-NEXT: [[TMP34:%.*]] = load i16, ptr [[SRC]], align 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i16> poison, i16 [[TMP34]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT14]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP35:%.*]] = sext <4 x i16> [[BROADCAST_SPLAT15]] to <4 x i32> -; CHECK-NEXT: [[PARTIAL_REDUCE16]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v4i32(<2 x i32> [[VEC_PHI13]], <4 x i32> [[TMP35]]) +; CHECK-NEXT: [[TMP33]] = add <4 x i32> [[VEC_PHI13]], [[TMP35]] ---------------- fhahn wrote:
what makes the partial reduction here not profitable? https://github.com/llvm/llvm-project/pull/181706 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
