https://github.com/AmrDeveloper created https://github.com/llvm/llvm-project/pull/141411
This change adds support for the Dynamic Shuffle op for VectorType Issue https://github.com/llvm/llvm-project/issues/136487 >From 12295ee3a5613d7473ab33a4579015a2217617a0 Mon Sep 17 00:00:00 2001 From: AmrDeveloper <am...@programmer.net> Date: Sun, 25 May 2025 17:21:34 +0200 Subject: [PATCH] [CIR] Upstream ShuffleDynamicOp for VectorType --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 33 ++++++++++++ clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 14 +++++ clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 14 +++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 51 ++++++++++++++++++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 11 ++++ clang/test/CIR/CodeGen/vector-ext.cpp | 41 +++++++++++++++ clang/test/CIR/CodeGen/vector.cpp | 43 ++++++++++++++++ clang/test/CIR/IR/vector.cir | 22 ++++++++ 8 files changed, 228 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 5ce03c19369cb..a8229d4c45308 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2141,4 +2141,37 @@ def VecCmpOp : CIR_Op<"vec.cmp", [Pure, SameTypeOperands]> { }]; } +//===----------------------------------------------------------------------===// +// VecShuffleDynamicOp +//===----------------------------------------------------------------------===// + +def VecShuffleDynamicOp : CIR_Op<"vec.shuffle.dynamic", + [Pure, AllTypesMatch<["vec", "result"]>]> { + let summary = "Shuffle a vector using indices in another vector"; + let description = [{ + The `cir.vec.shuffle.dynamic` operation implements the undocumented form of + Clang's __builtin_shufflevector, where the indices of the shuffled result + can be runtime values. + + There are two input vectors, which must have the same number of elements. + The second input vector must have an integral element type. The elements of + the second vector are interpreted as indices into the first vector. The + result vector is constructed by taking the elements from the first input + vector from the indices indicated by the elements of the second vector. + + ```mlir + %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<4 x !s32i> + ``` + }]; + + let arguments = (ins CIR_VectorType:$vec, IntegerVector:$indices); + let results = (outs CIR_VectorType:$result); + let assemblyFormat = [{ + $vec `:` qualified(type($vec)) `,` $indices `:` qualified(type($indices)) + attr-dict + }]; + + let hasVerifier = 1; +} + #endif // CLANG_CIR_DIALECT_IR_CIROPS_TD diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 058015ca55729..bdb12bf86d1bf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -171,6 +171,20 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> { return emitLoadOfLValue(e); } + mlir::Value VisitShuffleVectorExpr(ShuffleVectorExpr *e) { + if (e->getNumSubExprs() == 2) { + // The undocumented form of __builtin_shufflevector. + mlir::Value inputVec = Visit(e->getExpr(0)); + mlir::Value indexVec = Visit(e->getExpr(1)); + return cgf.builder.create<cir::VecShuffleDynamicOp>( + cgf.getLoc(e->getSourceRange()), inputVec, indexVec); + } + + cgf.getCIRGenModule().errorNYI(e->getSourceRange(), + "ShuffleVectorExpr with indices"); + return {}; + } + mlir::Value VisitMemberExpr(MemberExpr *e); mlir::Value VisitInitListExpr(InitListExpr *e); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 36dcbc6a4be4a..627066755fe97 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1548,6 +1548,20 @@ OpFoldResult cir::VecExtractOp::fold(FoldAdaptor adaptor) { return elements[index]; } +//===----------------------------------------------------------------------===// +// VecShuffleDynamicOp +//===----------------------------------------------------------------------===// + +LogicalResult cir::VecShuffleDynamicOp::verify() { + // The number of elements in the two input vectors must match. + if (getVec().getType().getSize() != + mlir::cast<cir::VectorType>(getIndices().getType()).getSize()) { + return emitOpError() << ": the number of elements in " << getVec().getType() + << " and " << getIndices().getType() << " don't match"; + } + return success(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 8e82af7e62bc0..67590b90b0325 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1717,7 +1717,8 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMVecCreateOpLowering, CIRToLLVMVecExtractOpLowering, CIRToLLVMVecInsertOpLowering, - CIRToLLVMVecCmpOpLowering + CIRToLLVMVecCmpOpLowering, + CIRToLLVMVecShuffleDynamicOpLowering // clang-format on >(converter, patterns.getContext()); @@ -1871,6 +1872,54 @@ mlir::LogicalResult CIRToLLVMVecCmpOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite( + cir::VecShuffleDynamicOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + // LLVM IR does not have an operation that corresponds to this form of + // the built-in. + // __builtin_shufflevector(V, I) + // is implemented as this pseudocode, where the for loop is unrolled + // and N is the number of elements: + // masked = I & (N-1) + // for (i in 0 <= i < N) + // result[i] = V[masked[i]] + mlir::Location loc = op.getLoc(); + mlir::Value input = adaptor.getVec(); + mlir::Type llvmIndexVecType = + getTypeConverter()->convertType(op.getIndices().getType()); + mlir::Type llvmIndexType = getTypeConverter()->convertType( + elementTypeIfVector(op.getIndices().getType())); + uint64_t numElements = + mlir::cast<cir::VectorType>(op.getVec().getType()).getSize(); + mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>( + loc, llvmIndexType, + mlir::IntegerAttr::get(llvmIndexType, numElements - 1)); + mlir::Value maskVector = + rewriter.create<mlir::LLVM::UndefOp>(loc, llvmIndexVecType); + for (uint64_t i = 0; i < numElements; ++i) { + mlir::Value iValue = + rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i); + maskVector = rewriter.create<mlir::LLVM::InsertElementOp>( + loc, maskVector, maskValue, iValue); + } + mlir::Value maskedIndices = rewriter.create<mlir::LLVM::AndOp>( + loc, llvmIndexVecType, adaptor.getIndices(), maskVector); + mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>( + loc, getTypeConverter()->convertType(op.getVec().getType())); + for (uint64_t i = 0; i < numElements; ++i) { + mlir::Value iValue = + rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i); + mlir::Value indexValue = rewriter.create<mlir::LLVM::ExtractElementOp>( + loc, maskedIndices, iValue); + mlir::Value valueAtIndex = + rewriter.create<mlir::LLVM::ExtractElementOp>(loc, input, indexValue); + result = rewriter.create<mlir::LLVM::InsertElementOp>(loc, result, + valueAtIndex, iValue); + } + rewriter.replaceOp(op, result); + return mlir::success(); +} + std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() { return std::make_unique<ConvertCIRToLLVMPass>(); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 053e77f03648e..6b8862db2c8be 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -352,6 +352,17 @@ class CIRToLLVMVecCmpOpLowering mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMVecShuffleDynamicOpLowering + : public mlir::OpConversionPattern<cir::VecShuffleDynamicOp> { +public: + using mlir::OpConversionPattern< + cir::VecShuffleDynamicOp>::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::VecShuffleDynamicOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + } // namespace direct } // namespace cir diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index aabb5a43370e5..5d246c9670163 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -934,3 +934,44 @@ void foo14() { // OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]] // OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32> +void foo15() { + vi4 a; + vi4 b; + vi4 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index cdfd0b05deddb..ae769d09e3981 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -922,3 +922,46 @@ void foo14() { // OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]] // OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32> // OGCG: store <4 x i32> %[[RES]], ptr {{.*}}, align 16 + +void foo15() { + vi4 a; + vi4 b; + vi4 r = __builtin_shufflevector(a, b); +} + +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> + +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0 +// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16 +// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3) +// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0 +// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]] +// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0 +// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1 +// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]] +// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1 +// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2 +// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]] +// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2 +// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3 +// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]] +// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3 + diff --git a/clang/test/CIR/IR/vector.cir b/clang/test/CIR/IR/vector.cir index 6ad008e8d0e9f..a455acf92ab6f 100644 --- a/clang/test/CIR/IR/vector.cir +++ b/clang/test/CIR/IR/vector.cir @@ -165,4 +165,26 @@ cir.func @vector_compare_test() { // CHECK: cir.return // CHECK: } +cir.func @vector_shuffle_dynamic_test() { + %0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] + %1 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"] + %2 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["r", init] + %3 = cir.load align(16) %0 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> + %4 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> + %5 = cir.vec.shuffle.dynamic %3 : !cir.vector<4 x !s32i>, %4 : !cir.vector<4 x !s32i> + cir.store align(16) %5, %2 : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + cir.return +} + +// CHECK: cir.func @vector_shuffle_dynamic_test() { +// CHECK: %[[VEC_A:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CHECK: %[[VEC_B:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"] +// CHECK: %[[RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["r", init] +// CHECK: %[[TMP_A:.*]] = cir.load{{.*}} %[[VEC_A]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CHECK: %[[TMP_B:.*]] = cir.load{{.*}} %[[VEC_B]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CHECK: %[[VEC_SHUF:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i> +// CHECK: cir.store{{.*}} %[[VEC_SHUF]], %[[RES]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> +// CHECK: cir.return +// CHECK: } + } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits