llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-powerpc Author: None (Himadhith) <details> <summary>Changes</summary> NFC patch to clean up extra lines of code in the file `llvm/test/CodeGen/PowerPC/check-zero-vector.ll` as the current one has loop unrolled. Also removing the file `clang/test/CodeGen/PowerPC/check-zero-vector.c` as the patch affects only the backend. --- Patch is 24.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151971.diff 2 Files Affected: - (removed) clang/test/CodeGen/PowerPC/check-zero-vector.c (-143) - (modified) llvm/test/CodeGen/PowerPC/check-zero-vector.ll (+74-226) ``````````diff diff --git a/clang/test/CodeGen/PowerPC/check-zero-vector.c b/clang/test/CodeGen/PowerPC/check-zero-vector.c deleted file mode 100644 index cb6c826641366..0000000000000 --- a/clang/test/CodeGen/PowerPC/check-zero-vector.c +++ /dev/null @@ -1,143 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64 -// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64LE -// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_32 - -// POWERPC_64-LABEL: define signext i32 @test_Greater_than( -// POWERPC_64-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] { -// POWERPC_64-NEXT: [[ENTRY:.*:]] -// POWERPC_64-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8 -// POWERPC_64-NEXT: [[RESULT:%.*]] = alloca i16, align 2 -// POWERPC_64-NEXT: [[I:%.*]] = alloca i32, align 4 -// POWERPC_64-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8 -// POWERPC_64-NEXT: store i16 0, ptr [[RESULT]], align 2 -// POWERPC_64-NEXT: store i32 0, ptr [[I]], align 4 -// POWERPC_64-NEXT: br label %[[FOR_COND:.*]] -// POWERPC_64: [[FOR_COND]]: -// POWERPC_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 -// POWERPC_64-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] -// POWERPC_64: [[FOR_BODY]]: -// POWERPC_64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8 -// POWERPC_64-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 -// POWERPC_64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]] -// POWERPC_64-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// POWERPC_64-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32 -// POWERPC_64-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 -// POWERPC_64-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] -// POWERPC_64: [[IF_THEN]]: -// POWERPC_64-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2 -// POWERPC_64-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1 -// POWERPC_64-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2 -// POWERPC_64-NEXT: br label %[[IF_END]] -// POWERPC_64: [[IF_END]]: -// POWERPC_64-NEXT: br label %[[FOR_INC:.*]] -// POWERPC_64: [[FOR_INC]]: -// POWERPC_64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_64-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1 -// POWERPC_64-NEXT: store i32 [[INC3]], ptr [[I]], align 4 -// POWERPC_64-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] -// POWERPC_64: [[FOR_END]]: -// POWERPC_64-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2 -// POWERPC_64-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32 -// POWERPC_64-NEXT: ret i32 [[CONV4]] -// -// POWERPC_64LE-LABEL: define dso_local signext i32 @test_Greater_than( -// POWERPC_64LE-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] { -// POWERPC_64LE-NEXT: [[ENTRY:.*:]] -// POWERPC_64LE-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8 -// POWERPC_64LE-NEXT: [[RESULT:%.*]] = alloca i16, align 2 -// POWERPC_64LE-NEXT: [[I:%.*]] = alloca i32, align 4 -// POWERPC_64LE-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8 -// POWERPC_64LE-NEXT: store i16 0, ptr [[RESULT]], align 2 -// POWERPC_64LE-NEXT: store i32 0, ptr [[I]], align 4 -// POWERPC_64LE-NEXT: br label %[[FOR_COND:.*]] -// POWERPC_64LE: [[FOR_COND]]: -// POWERPC_64LE-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_64LE-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 -// POWERPC_64LE-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] -// POWERPC_64LE: [[FOR_BODY]]: -// POWERPC_64LE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8 -// POWERPC_64LE-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_64LE-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 -// POWERPC_64LE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]] -// POWERPC_64LE-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// POWERPC_64LE-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32 -// POWERPC_64LE-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 -// POWERPC_64LE-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] -// POWERPC_64LE: [[IF_THEN]]: -// POWERPC_64LE-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2 -// POWERPC_64LE-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1 -// POWERPC_64LE-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2 -// POWERPC_64LE-NEXT: br label %[[IF_END]] -// POWERPC_64LE: [[IF_END]]: -// POWERPC_64LE-NEXT: br label %[[FOR_INC:.*]] -// POWERPC_64LE: [[FOR_INC]]: -// POWERPC_64LE-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_64LE-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1 -// POWERPC_64LE-NEXT: store i32 [[INC3]], ptr [[I]], align 4 -// POWERPC_64LE-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] -// POWERPC_64LE: [[FOR_END]]: -// POWERPC_64LE-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2 -// POWERPC_64LE-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32 -// POWERPC_64LE-NEXT: ret i32 [[CONV4]] -// -// POWERPC_32-LABEL: define i32 @test_Greater_than( -// POWERPC_32-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] { -// POWERPC_32-NEXT: [[ENTRY:.*:]] -// POWERPC_32-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 4 -// POWERPC_32-NEXT: [[RESULT:%.*]] = alloca i16, align 2 -// POWERPC_32-NEXT: [[I:%.*]] = alloca i32, align 4 -// POWERPC_32-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 4 -// POWERPC_32-NEXT: store i16 0, ptr [[RESULT]], align 2 -// POWERPC_32-NEXT: store i32 0, ptr [[I]], align 4 -// POWERPC_32-NEXT: br label %[[FOR_COND:.*]] -// POWERPC_32: [[FOR_COND]]: -// POWERPC_32-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 -// POWERPC_32-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] -// POWERPC_32: [[FOR_BODY]]: -// POWERPC_32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 4 -// POWERPC_32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 [[TMP2]] -// POWERPC_32-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// POWERPC_32-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32 -// POWERPC_32-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 -// POWERPC_32-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] -// POWERPC_32: [[IF_THEN]]: -// POWERPC_32-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2 -// POWERPC_32-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1 -// POWERPC_32-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2 -// POWERPC_32-NEXT: br label %[[IF_END]] -// POWERPC_32: [[IF_END]]: -// POWERPC_32-NEXT: br label %[[FOR_INC:.*]] -// POWERPC_32: [[FOR_INC]]: -// POWERPC_32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 -// POWERPC_32-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1 -// POWERPC_32-NEXT: store i32 [[INC3]], ptr [[I]], align 4 -// POWERPC_32-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] -// POWERPC_32: [[FOR_END]]: -// POWERPC_32-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2 -// POWERPC_32-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32 -// POWERPC_32-NEXT: ret i32 [[CONV4]] -// -int test_Greater_than(unsigned short *colauths) { - unsigned short result = 0; - for (int i = 0; i < 4; i++) { - if (colauths[i] > 0) { - result++; - } - } - return result; -} -//. -// POWERPC_64: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} -// POWERPC_64: [[META3]] = !{!"llvm.loop.mustprogress"} -//. -// POWERPC_64LE: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} -// POWERPC_64LE: [[META3]] = !{!"llvm.loop.mustprogress"} -//. -// POWERPC_32: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} -// POWERPC_32: [[META3]] = !{!"llvm.loop.mustprogress"} -//. diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll index 59173e22edf26..23371535edadd 100644 --- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll +++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE @@ -7,240 +8,87 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ ; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32 -define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) { -; This testcase is manually reduced to isolate the critical code blocks. -; It is designed to check for vector comparison specifically for zero vectors. -; In the vector.body section, we are expecting a comparison instruction (vcmpequh), -; merge instructions (vmrghh and vmrglh) which use exactly 2 vectors. -; The output of the merge instruction is being used by xxland and finally -; accumulated by vadduwm instruction. - +define dso_local signext range(i32 0, 5) i32 @test_Greater_than(ptr noundef readonly captures(none) %colauths) { ; POWERPC_64LE-LABEL: test_Greater_than: -; POWERPC_64LE: .LBB0_6: # %vector.body -; POWERPC_64LE-NEXT: # -; POWERPC_64LE-NEXT: lxv [[R1:[0-9]+]], -64(4) -; POWERPC_64LE-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] -; POWERPC_64LE-NEXT: xxlnor [[R1]], [[R1]], [[R1]] -; POWERPC_64LE-NEXT: vmrghh [[R4:[0-9]+]], [[R2]], [[R2]] -; POWERPC_64LE-NEXT: vmrglh [[R2]], [[R2]], [[R2]] -; POWERPC_64LE-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] -; POWERPC_64LE-NEXT: xxland [[R1]], [[R1]], [[R6]] -; POWERPC_64LE-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] -; POWERPC_64LE: .LBB0_10: # %vec.epilog.vector.body -; POWERPC_64LE-NEXT: # -; POWERPC_64LE-NEXT: lxv [[R8:[0-9]+]], 0(4) -; POWERPC_64LE-NEXT: addi 4, 4, 16 -; POWERPC_64LE-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]] -; POWERPC_64LE-NEXT: xxlnor [[R8]], [[R8]], [[R8]] -; POWERPC_64LE-NEXT: vmrglh [[R11:[0-9]+]], [[R9]], [[R9]] -; POWERPC_64LE-NEXT: vmrghh [[R9]], [[R9]], [[R9]] -; POWERPC_64LE-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] -; POWERPC_64LE-NEXT: xxland [[R8]], [[R8]], [[R6]] -; POWERPC_64LE-NEXT: vadduwm [[R7]], [[R7]], [[R9]] -; POWERPC_64LE-NEXT: vadduwm [[R3]], [[R3]], [[R11]] -; POWERPC_64LE-NEXT: bdnz .LBB0_10 -; POWERPC_64LE: blr +; POWERPC_64LE: # %bb.0: # %entry +; POWERPC_64LE-NEXT: lfd 0, 0(3) +; POWERPC_64LE-NEXT: xxlxor 35, 35, 35 +; POWERPC_64LE-NEXT: li 4, 0 +; POWERPC_64LE-NEXT: li 3, 4 +; POWERPC_64LE-NEXT: xxswapd 34, 0 +; POWERPC_64LE-NEXT: vcmpequh 2, 2, 3 +; POWERPC_64LE-NEXT: xxlnor 34, 34, 34 +; POWERPC_64LE-NEXT: vmrglh 3, 2, 2 +; POWERPC_64LE-NEXT: vextuwrx 4, 4, 2 +; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3 +; POWERPC_64LE-NEXT: clrlwi 4, 4, 31 +; POWERPC_64LE-NEXT: rlwimi 4, 3, 1, 30, 30 +; POWERPC_64LE-NEXT: mfvsrwz 3, 35 +; POWERPC_64LE-NEXT: rlwimi 4, 3, 2, 29, 29 +; POWERPC_64LE-NEXT: li 3, 12 +; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3 +; POWERPC_64LE-NEXT: rlwimi 4, 3, 3, 28, 28 +; POWERPC_64LE-NEXT: stb 4, -1(1) +; POWERPC_64LE-NEXT: lbz 3, -1(1) +; POWERPC_64LE-NEXT: popcntd 3, 3 +; POWERPC_64LE-NEXT: blr ; ; POWERPC_64-LABEL: test_Greater_than: -; POWERPC_64: L..BB0_6: # %vector.body -; POWERPC_64-NEXT: # -; POWERPC_64-NEXT: lxv [[R1:[0-9]+]], -64(4) -; POWERPC_64-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] -; POWERPC_64-NEXT: xxlnor [[R1]], [[R1]], [[R1]] -; POWERPC_64-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]] -; POWERPC_64-NEXT: vmrghh [[R2]], [[R2]], [[R2]] -; POWERPC_64-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] -; POWERPC_64-NEXT: xxland [[R1]], [[R1]], [[R6]] -; POWERPC_64-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] -; POWERPC_64: L..BB0_10: # %vec.epilog.vector.body -; POWERPC_64-NEXT: # -; POWERPC_64-NEXT: lxv [[R8:[0-9]+]], 0(4) -; POWERPC_64-NEXT: addi 4, 4, 16 -; POWERPC_64-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]] -; POWERPC_64-NEXT: xxlnor [[R8]], [[R8]], [[R8]] -; POWERPC_64-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]] -; POWERPC_64-NEXT: vmrglh [[R9]], [[R9]], [[R9]] -; POWERPC_64-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] -; POWERPC_64-NEXT: xxland [[R8]], [[R8]], [[R6]] -; POWERPC_64-NEXT: vadduwm [[R7]], [[R7]], [[R9]] -; POWERPC_64-NEXT: vadduwm [[R3]], [[R3]], [[R11]] -; POWERPC_64-NEXT: bdnz L..BB0_10 -; POWERPC_64: blr +; POWERPC_64: # %bb.0: # %entry +; POWERPC_64-NEXT: lxsd 2, 0(3) +; POWERPC_64-NEXT: xxlxor 35, 35, 35 +; POWERPC_64-NEXT: li 4, 12 +; POWERPC_64-NEXT: li 3, 8 +; POWERPC_64-NEXT: vcmpequh 2, 2, 3 +; POWERPC_64-NEXT: xxlnor 34, 34, 34 +; POWERPC_64-NEXT: vmrghh 2, 2, 2 +; POWERPC_64-NEXT: vextuwlx 4, 4, 2 +; POWERPC_64-NEXT: vextuwlx 3, 3, 2 +; POWERPC_64-NEXT: clrlwi 4, 4, 31 +; POWERPC_64-NEXT: rlwimi 4, 3, 1, 30, 30 +; POWERPC_64-NEXT: mfvsrwz 3, 34 +; POWERPC_64-NEXT: rlwimi 4, 3, 2, 29, 29 +; POWERPC_64-NEXT: li 3, 0 +; POWERPC_64-NEXT: vextuwlx 3, 3, 2 +; POWERPC_64-NEXT: rlwimi 4, 3, 3, 28, 28 +; POWERPC_64-NEXT: stb 4, -1(1) +; POWERPC_64-NEXT: lbz 3, -1(1) +; POWERPC_64-NEXT: popcntd 3, 3 +; POWERPC_64-NEXT: blr ; ; POWERPC_32-LABEL: test_Greater_than: -; POWERPC_32: L..BB0_7: # %vector.body -; POWERPC_32-NEXT: # -; POWERPC_32-NEXT: lxv [[R1:[0-9]+]], 0(10) -; POWERPC_32-NEXT: addic [[R13:[0-9]+]], [[R13]], 64 -; POWERPC_32-NEXT: addze [[R14:[0-9]+]], [[R14]] -; POWERPC_32-NEXT: xor [[R15:[0-9]+]], [[R13]], [[R16:[0-9]+]] -; POWERPC_32-NEXT: or. [[R15]], [[R15]], [[R14]] -; POWERPC_32-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] -; POWERPC_32-NEXT: xxlnor [[R1]], [[R1]], [[R1]] -; POWERPC_32-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]] -; POWERPC_32-NEXT: vmrghh [[R2]], [[R2]], [[R2]] -; POWERPC_32-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] -; POWERPC_32-NEXT: xxland [[R1]], [[R1]], [[R6]] -; POWERPC_32-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] -; POWERPC_32: L..BB0_11: # %vec.epilog.vector.body -; POWERPC_32-NEXT: # -; POWERPC_32-NEXT: slwi [[R14]], [[R13]], 1 -; POWERPC_32-NEXT: addic [[R13]], [[R13]], 8 -; POWERPC_32-NEXT: addze [[R17:[0-9]+]], [[R17]] -; POWERPC_32-NEXT: lxvx [[R8:[0-9]+]], [[R18:[0-9]+]], [[R14]] -; POWERPC_32-NEXT: xor [[R14]], [[R13]], [[R16]] -; POWERPC_32-NEXT: or. [[R14]], [[R14]], [[R17]] -; POWERPC_32-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R3]] -; POWERPC_32-NEXT: xxlnor [[R8]], [[R8]], [[R8]] -; POWERPC_32-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]] -; POWERPC_32-NEXT: vmrglh [[R9]], [[R9]], [[R9]] -; POWERPC_32-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] -; POWERPC_32-NEXT: xxland [[R8]], [[R8]], [[R6]] -; POWERPC_32-NEXT: vadduwm [[R7]], [[R7]], [[R9]] -; POWERPC_32-NEXT: vadduwm [[R19:[0-9]+]], [[R19]], [[R11]] -; POWERPC_32-NEXT: bne 0, L..BB0_11 -; POWERPC_32: blr - entry: - %cmp5 = icmp sgt i32 %ncols, 0 - br i1 %cmp5, label %iter.check, label %for.cond.cleanup - -iter.check: ; preds = %entry - %wide.trip.count = zext nneg i32 %ncols to i64 - %min.iters.check = icmp ult i32 %ncols, 8 - br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check - -for.body.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check - %indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec31, %vec.epilog.middle.block ] - %num_cols_needed.06.ph = phi i32 [ 0, %iter.check ], [ %33, %vec.epilog.iter.check ], [ %40, %vec.epilog.middle.block ] - br label %for.body - -vector.main.loop.iter.check: ; preds = %iter.check - %min.iters.check9 = icmp ult i32 %ncols, 64 - br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph - -vector.ph: ; preds = %vector.main.loop.iter.check - %n.vec = and i64 %wide.trip.count, 2147483584 - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %vec.phi = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %24, %vector.body ] - %vec.phi10 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %25, %vector.body ] - %vec.phi11 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %26, %vector.body ] - %vec.phi12 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %27, %vector.body ] - %vec.phi13 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %28, %vector.body ] - %vec.phi14 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %29, %vector.body ] - %vec.phi15 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %30, %vector.body ] - %vec.phi16 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ] - %0 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index - %1 = getelementptr inbounds nuw i8, ptr %0, i64 16 - %2 = getelementptr inbounds nuw i8, ptr %0, i64 32 - %3 = getelementptr inbounds nuw i8, ptr %0, i64 48 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 64 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 80 - %6 = getelementptr inbounds nuw i8, ptr %0, i64 96 - %7 = getelementptr inbounds nuw i8, ptr %0, i64 112 - %wide.load = load <8 x i16>, ptr %0, align 2, !tbaa !5 - %wide.load17 = load <8 x i16>, ptr %1, align 2, !tbaa !5 - %wide.load18 = load <8 x i16>, ptr %2, align 2, !tbaa !5 - %wide.load19 = load <8 x i16>, ptr %3, align 2, !tbaa !5 - %wide.load20 = load <8 x i16>, ptr %4, align 2, !tbaa !5 - %wide.load21 = load <8 x i16>, ptr %5, align 2, !tbaa !5 - %wide.load22 = load <8 x i16>, ptr %6, align 2, !tbaa !5 - %wide.load23 = load <8 x i16>, ptr %7, align 2, !tbaa !5 - %8 = icmp ne <8 x i16> %wide.load, zeroinitializer - %9 = icmp ne <8 x i16> %wide.load17, zeroinitializer - %10 = icmp ne <8 x i16> %wide.load18, zeroinitializer - %11 = icmp ne <8 x i16> %wide.load19, zeroinitializer - %12 = icmp ne <8 x i16> %wide.load20, zeroinitializer - %13 = icmp ne <8 x i16> %wide.load21, zeroinitializer - %14 = icmp ne <8 x i16> %wide.load22, zeroinitializer - %15 = icmp ne <8 x i16> %wide.load23, zeroinitializer - %16 = zext <8 x i1> %8 to <8 x i32> - %17 = zext <8 x i1> %9 to <8 x i32> - %18 = zext <8 x i1> %10 to <8 x i32> - %19 = zext <8 x i1> %11 to <8 x i32> - %20 = zext <8 x i1> %12 to <8 x i32> - %21 = zext <8 x i1> %13 to <8 x i32> - %22 = zext <8 x i1> %14 to <8 x i32> - %23 = zext <8 x i1> %15 to <8 x i32> - %24 = add <8 x i32> %vec.phi, %16 - %25 = add <8 x i32> %vec.phi10, %17 - %26 = add <8 x i32> %vec.phi11, %18 - %27 = add <8 x i32> %vec.phi12, %19 - %28 = add <8 x i32> %vec.phi13, %20 - %29 = add <8 x i32> %vec.phi14, %21 - %30 = add <8 x i32> %vec.phi15, %22 - %31 = add <8 x i32> %vec.phi16, %23 - %index.next = add nuw i64 %index, 64 - %32 = icmp eq i64 %index.next, %n.vec - br i1 %32, label %middle.block, label %vector.body, !llvm.loop !9 - -middle.block: ; preds = %vector.body - %bin.rdx = add <8 x i32> %25, %24 - %bin.rdx24 = add <8 x i32> %26, %bin.rdx - %bin.rdx25 = add <8 x i32> %27, %bin.rdx24 - %bin.rdx26 = add <8 x i32> %28, %bin.rdx25 - %bin.rdx27 = add <8 x i32> %29, %bin.rdx26 - %bin.rdx28 = add <8 x i32> %30, %bin.rdx27 - %bin.rdx29 = add <8 x i32> %31, %bin.rdx28 - %33 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %bin.rdx29) - %cmp.n = icmp eq i64 %n.vec, %wide.trip.count - br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check - -vec.epilog.iter.check: ; preds = %middle.block - %n.vec.remaining = and i64 %wide.trip.count, 56 - %min... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/151971 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits