llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-powerpc Author: None (llvmbot) <details> <summary>Changes</summary> Backport 994a6a39e13dcc335247a127a5da05905d1ac541 Requested by: @<!-- -->RKSimon --- Full diff: https://github.com/llvm/llvm-project/pull/159286.diff 4 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+9-2) - (added) llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll (+36) - (added) llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg (+2) - (added) llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll (+22) ``````````diff diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 639f8686a271e..ea9cbed0117b9 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1829,12 +1829,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) { IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy))); uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType()); uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1; + uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy); + Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits); + Value *Mask = ConstantInt::get(PackedTy, EltBitMask); for (User *U : Ext->users()) { auto *Extract = cast<ExtractElementInst>(U); uint64_t Idx = cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue(); - Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits); - Value *And = Builder.CreateAnd(LShr, EltBitMask); + uint64_t ShiftAmt = + DL->isBigEndian() + ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits) + : (Idx * SrcEltSizeInBits); + Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt); + Value *And = Builder.CreateAnd(LShr, Mask); U->replaceAllUsesWith(And); } return true; diff --git a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll new file mode 100644 index 0000000000000..9796faf2e6feb --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='vector-combine' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE +; RUN: opt -passes='vector-combine' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE + +define i64 @g(<8 x i8> %v) { +; LE-LABEL: @g( +; LE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; LE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; LE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 56 +; LE-NEXT: [[TMP4:%.*]] = and i64 [[TMP2]], 255 +; LE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; LE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; LE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 +; LE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; LE-NEXT: ret i64 [[SUM]] +; +; BE-LABEL: @g( +; BE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 +; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; BE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; BE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 +; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; BE-NEXT: ret i64 [[SUM]] +; + %z = zext <8 x i8> %v to <8 x i64> + %e0 = extractelement <8 x i64> %z, i32 0 + %e7 = extractelement <8 x i64> %z, i32 7 + %sum = add i64 %e0, %e7 + ret i64 %sum +} + + + diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg new file mode 100644 index 0000000000000..15af315f104fc --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if 'PowerPC' not in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll new file mode 100644 index 0000000000000..a9b719920c341 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='vector-combine' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE + +define i64 @g(<8 x i8> %v) { +; BE-LABEL: @g( +; BE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 +; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; BE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; BE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 +; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; BE-NEXT: ret i64 [[SUM]] +; + %z = zext <8 x i8> %v to <8 x i64> + %e0 = extractelement <8 x i64> %z, i32 0 + %e7 = extractelement <8 x i64> %z, i32 7 + %sum = add i64 %e0, %e7 + ret i64 %sum +} + `````````` </details> https://github.com/llvm/llvm-project/pull/159286 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits