https://github.com/AmrDeveloper updated https://github.com/llvm/llvm-project/pull/141411
>From 8e662a8ccbde1fce77399af5e7b0c24249c1bd96 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Sun, 25 May 2025 17:21:34 +0200 Subject: [PATCH 1/4] [CIR] Upstream ShuffleDynamicOp for VectorType --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 33 ++++++++++++ clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 14 +++++ clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 14 +++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 51 ++++++++++++++++++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 11 ++++ clang/test/CIR/CodeGen/vector-ext.cpp | 42 +++++++++++++++ clang/test/CIR/CodeGen/vector.cpp | 43 ++++++++++++++++ clang/test/CIR/IR/vector.cir | 22 ++++++++ 8 files changed, 229 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 5ce03c19369cb..a8229d4c45308 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2141,4 +2141,37 @@ def VecCmpOp : CIR_Op<"vec.cmp", [Pure, SameTypeOperands]> { }]; } +//===----------------------------------------------------------------------===// +// VecShuffleDynamicOp +//===----------------------------------------------------------------------===// + +def VecShuffleDynamicOp : CIR_Op<"vec.shuffle.dynamic", + [Pure, AllTypesMatch<["vec", "result"]>]> { + let summary = "Shuffle a vector using indices in another vector"; + let description = [{ + The `cir.vec.shuffle.dynamic` operation implements the undocumented form of + Clang's __builtin_shufflevector, where the indices of the shuffled result + can be runtime values. + + There are two input vectors, which must have the same number of elements. + The second input vector must have an integral element type. The elements of + the second vector are interpreted as indices into the first vector. The + result vector is constructed by taking the elements from the first input + vector from the indices indicated by the elements of the second vector. + + ```mlir + %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<4 x !s32i> + ``` + }]; + + let arguments = (ins CIR_VectorType:$vec, IntegerVector:$indices); + let results = (outs CIR_VectorType:$result); + let assemblyFormat = [{ + $vec `:` qualified(type($vec)) `,` $indices `:` qualified(type($indices)) + attr-dict + }]; + + let hasVerifier = 1; +} + #endif // CLANG_CIR_DIALECT_IR_CIROPS_TD diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 058015ca55729..bdb12bf86d1bf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -171,6 +171,20 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> { return emitLoadOfLValue(e); } + mlir::Value VisitShuffleVectorExpr(ShuffleVectorExpr *e) { + if (e->getNumSubExprs() == 2) { + // The undocumented form of __builtin_shufflevector. + mlir::Value inputVec = Visit(e->getExpr(0)); + mlir::Value indexVec = Visit(e->getExpr(1)); + return cgf.builder.create<cir::VecShuffleDynamicOp>( + cgf.getLoc(e->getSourceRange()), inputVec, indexVec); + } + + cgf.getCIRGenModule().errorNYI(e->getSourceRange(), + "ShuffleVectorExpr with indices"); + return {}; + } + mlir::Value VisitMemberExpr(MemberExpr *e); mlir::Value VisitInitListExpr(InitListExpr *e); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 9e2b2908b22d8..c7cc27561c87c 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1564,6 +1564,20 @@ OpFoldResult cir::VecExtractOp::fold(FoldAdaptor adaptor) { return elements[index]; } +//===----------------------------------------------------------------------===// +// VecShuffleDynamicOp +//===----------------------------------------------------------------------===// + +LogicalResult cir::VecShuffleDynamicOp::verify() { + // The number of elements in the two input vectors must match. + if (getVec().getType().getSize() != + mlir::cast<cir::VectorType>(getIndices().getType()).getSize()) { + return emitOpError() << ": the number of elements in " << getVec().getType() + << " and " << getIndices().getType() << " don't match"; + } + return success(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 8e82af7e62bc0..67590b90b0325 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1717,7 +1717,8 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMVecCreateOpLowering, CIRToLLVMVecExtractOpLowering, CIRToLLVMVecInsertOpLowering, - CIRToLLVMVecCmpOpLowering + CIRToLLVMVecCmpOpLowering, + CIRToLLVMVecShuffleDynamicOpLowering // clang-format on >(converter, patterns.getContext()); @@ -1871,6 +1872,54 @@ mlir::LogicalResult CIRToLLVMVecCmpOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite( + cir::VecShuffleDynamicOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + // LLVM IR does not have an operation that corresponds to this form of + // the built-in. + // __builtin_shufflevector(V, I) + // is implemented as this pseudocode, where the for loop is unrolled + // and N is the number of elements: + // masked = I & (N-1) + // for (i in 0 <= i < N) + // result[i] = V[masked[i]] + mlir::Location loc = op.getLoc(); + mlir::Value input = adaptor.getVec(); + mlir::Type llvmIndexVecType = + getTypeConverter()->convertType(op.getIndices().getType()); + mlir::Type llvmIndexType = getTypeConverter()->convertType( + elementTypeIfVector(op.getIndices().getType())); + uint64_t numElements = + mlir::cast<cir::VectorType>(op.getVec().getType()).getSize(); + mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>( + loc, llvmIndexType, + mlir::IntegerAttr::get(llvmIndexType, numElements - 1)); + mlir::Value maskVector = + rewriter.create<mlir::LLVM::UndefOp>(loc, llvmIndexVecType); + for (uint64_t i = 0; i < numElements; ++i) { + mlir::Value iValue = + rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i); + maskVector = rewriter.create<mlir::LLVM::InsertElementOp>( + loc, maskVector, maskValue, iValue); + } + mlir::Value maskedIndices = rewriter.create<mlir::LLVM::AndOp>( + loc, llvmIndexVecType, adaptor.getIndices(), maskVector); + mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>( + loc, getTypeConverter()->convertType(op.getVec().getType())); + for (uint64_t i = 0; i < numElements; ++i) { + mlir::Value iValue = + rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i); + mlir::Value indexValue = rewriter.create<mlir::LLVM::ExtractElementOp>( + loc, maskedIndices, iValue); + mlir::Value valueAtIndex = + rewriter.create<mlir::LLVM::ExtractElementOp>(loc, input, indexValue); + result = rewriter.create<mlir::LLVM::InsertElementOp>(loc, result, + valueAtIndex, iValue); + } + rewriter.replaceOp(op, result); + return mlir::success(); +} + std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() { return std::make_unique<ConvertCIRToLLVMPass>(); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 053e77f03648e..6b8862db2c8be 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -352,6 +352,17 @@ class CIRToLLVMVecCmpOpLowering mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMVecShuffleDynamicOpLowering + : public mlir::OpConversionPattern<cir::VecShuffleDynamicOp> { +public: + using mlir::OpConversionPattern< + cir::VecShuffleDynamicOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::VecShuffleDynamicOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + } // namespace direct } // namespace cir diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index aab723f041edf..4ff14b12b43cf 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -988,3 +988,45 @@ void foo14() { // OGCG: %[[TMP_B:.*]] = load <4 x float>, ptr %[[VEC_B]], align 16 // OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]] // OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32> + +void foo15() { + vi4 a; + vi4 b; + vi4 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index f5a4fcacac4d4..27e669915ee60 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -967,3 +967,46 @@ void foo14() { // OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]] // OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32> // OGCG: store <4 x i32> %[[RES]], ptr {{.*}}, align 16 + +void foo15() { + vi4 a; + vi4 b; + vi4 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + diff --git a/clang/test/CIR/IR/vector.cir b/clang/test/CIR/IR/vector.cir index 6ad008e8d0e9f..a455acf92ab6f 100644 --- a/clang/test/CIR/IR/vector.cir +++ b/clang/test/CIR/IR/vector.cir @@ -165,4 +165,26 @@ cir.func @vector_compare_test() { // CHECK: cir.return // CHECK: } +cir.func @vector_shuffle_dynamic_test() { + %0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] + %1 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"] + %2 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["r", init] + %3 = cir.load align(16) %0 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> + %4 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> + %5 = cir.vec.shuffle.dynamic %3 : !cir.vector<4 x !s32i>, %4 : !cir.vector<4 x !s32i> + cir.store align(16) %5, %2 : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + cir.return +} + +// CHECK: cir.func @vector_shuffle_dynamic_test() { +// CHECK: %[[VEC_A:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CHECK: %[[VEC_B:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"] +// CHECK: %[[RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["r", init] +// CHECK: %[[TMP_A:.*]] = cir.load{{.*}} %[[VEC_A]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CHECK: %[[TMP_B:.*]] = cir.load{{.*}} %[[VEC_B]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CHECK: %[[VEC_SHUF:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> +// CHECK: cir.store{{.*}} %[[VEC_SHUF]], %[[RES]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> +// CHECK: cir.return +// CHECK: } + } >From 77916a6311b43751be33739b7441696ecaba5b39 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Wed, 28 May 2025 19:44:40 +0200 Subject: [PATCH 2/4] Add test for the verifier --- ...alid-vector-shuffle-dyn-wrong-operands.cir | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir diff --git a/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir b/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir new file mode 100644 index 0000000000000..2115e769e1773 --- /dev/null +++ b/clang/test/CIR/IR/invalid-vector-shuffle-dyn-wrong-operands.cir @@ -0,0 +1,19 @@ +// RUN: cir-opt %s -verify-diagnostics -split-input-file + +!s32i = !cir.int<s, 32> +!s64i = !cir.int<s, 64> + +module { + cir.func @foo() { + %1 = cir.const #cir.int<1> : !s32i + %2 = cir.const #cir.int<2> : !s32i + %3 = cir.const #cir.int<3> : !s32i + %4 = cir.const #cir.int<4> : !s32i + %vec = cir.vec.create(%1, %2, %3, %4 : !s32i, !s32i, !s32i, !s32i) : !cir.vector<4 x !s32i> + %indices = cir.vec.create(%1, %2 : !s32i, !s32i) : !cir.vector<2 x !s32i> + + // expected-error @below {{the number of elements in '!cir.vector<4 x !cir.int<s, 32>>' and '!cir.vector<2 x !cir.int<s, 32>>' don't match}} + %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<2 x !s32i> + cir.return + } +} >From bcb66cb722c43bf3584b28f6b8b91d2113f417b2 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Thu, 29 May 2025 00:19:45 +0200 Subject: [PATCH 3/4] Address code review comments --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 3 ++- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index a8229d4c45308..cfa766105812f 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2160,7 +2160,8 @@ def VecShuffleDynamicOp : CIR_Op<"vec.shuffle.dynamic", vector from the indices indicated by the elements of the second vector. ```mlir - %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<4 x !s32i> + %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices + : !cir.vector<4 x !s32i> ``` }]; diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 67590b90b0325..3340969275f0e 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1891,6 +1891,11 @@ mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite( elementTypeIfVector(op.getIndices().getType())); uint64_t numElements = mlir::cast<cir::VectorType>(op.getVec().getType()).getSize(); + + if (!llvm::isPowerOf2_64(numElements)) + return op.emitError() << "unsupported VecShuffleDynamic for VectorType " + "with size not power of 2"; + mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>( loc, llvmIndexType, mlir::IntegerAttr::get(llvmIndexType, numElements - 1)); >From f6552364831f03ba0e431e916ffd6105105fcdc4 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Thu, 29 May 2025 12:31:40 +0200 Subject: [PATCH 4/4] Update VecShuffleDynamicOp maskBits logic --- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 23 +++++----- clang/test/CIR/CodeGen/vector-ext.cpp | 43 +++++++++++++++++++ clang/test/CIR/CodeGen/vector.cpp | 42 ++++++++++++++++++ 3 files changed, 97 insertions(+), 11 deletions(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 3340969275f0e..efa5ed5961bdd 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1880,9 +1880,12 @@ mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite( // __builtin_shufflevector(V, I) // is implemented as this pseudocode, where the for loop is unrolled // and N is the number of elements: - // masked = I & (N-1) - // for (i in 0 <= i < N) - // result[i] = V[masked[i]] + // + // result = undef + // maskbits = NextPowerOf2(N - 1) + // masked = I & maskbits + // for (i in 0 <= i < N) + // result[i] = V[masked[i]] mlir::Location loc = op.getLoc(); mlir::Value input = adaptor.getVec(); mlir::Type llvmIndexVecType = @@ -1892,21 +1895,19 @@ mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite( uint64_t numElements = mlir::cast<cir::VectorType>(op.getVec().getType()).getSize(); - if (!llvm::isPowerOf2_64(numElements)) - return op.emitError() << "unsupported VecShuffleDynamic for VectorType " - "with size not power of 2"; - + uint64_t maskBits = llvm::NextPowerOf2(numElements - 1) - 1; mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>( - loc, llvmIndexType, - mlir::IntegerAttr::get(llvmIndexType, numElements - 1)); + loc, llvmIndexType, rewriter.getIntegerAttr(llvmIndexType, maskBits)); mlir::Value maskVector = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmIndexVecType); + for (uint64_t i = 0; i < numElements; ++i) { - mlir::Value iValue = + mlir::Value idxValue = rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i); maskVector = rewriter.create<mlir::LLVM::InsertElementOp>( - loc, maskVector, maskValue, iValue); + loc, maskVector, maskValue, idxValue); } + mlir::Value maskedIndices = rewriter.create<mlir::LLVM::AndOp>( loc, llvmIndexVecType, adaptor.getIndices(), maskVector); mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>( diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index 4ff14b12b43cf..9316c0c2c61eb 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -6,6 +6,7 @@ // RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG typedef int vi4 __attribute__((ext_vector_type(4))); +typedef int vi6 __attribute__((ext_vector_type(6))); typedef unsigned int uvi4 __attribute__((ext_vector_type(4))); typedef int vi3 __attribute__((ext_vector_type(3))); typedef int vi2 __attribute__((ext_vector_type(2))); @@ -1030,3 +1031,45 @@ void foo15() { // OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 // OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] // OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +void foo16() { + vi6 a; + vi6 b; + vi6 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index 27e669915ee60..24a30171d59c4 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -6,6 +6,7 @@ // RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG typedef int vi4 __attribute__((vector_size(16))); +typedef int vi6 __attribute__((vector_size(24))); typedef unsigned int uvi4 __attribute__((vector_size(16))); typedef float vf4 __attribute__((vector_size(16))); typedef double vd2 __attribute__((vector_size(16))); @@ -1010,3 +1011,44 @@ void foo15() { // OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] // OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 +void foo16() { + vi6 a; + vi6 b; + vi6 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32 +// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits