Author: Matthias Springer Date: 2024-01-31T09:32:54+01:00 New Revision: 23734917da16bdfbadf4d75e657f7ff6deabd618
URL: https://github.com/llvm/llvm-project/commit/23734917da16bdfbadf4d75e657f7ff6deabd618 DIFF: https://github.com/llvm/llvm-project/commit/23734917da16bdfbadf4d75e657f7ff6deabd618.diff LOG: Revert "[mlir][memref] `memref.subview`: Verify result strides" Added: Modified: mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp mlir/test/Dialect/GPU/decompose-memrefs.mlir mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir mlir/test/Dialect/MemRef/invalid.mlir mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir Removed: ################################################################################ diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index f43217f6f27ae..8b5765b7f8dba 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -917,7 +917,7 @@ static std::map<int64_t, unsigned> getNumOccurences(ArrayRef<int64_t> vals) { /// This accounts for cases where there are multiple unit-dims, but only a /// subset of those are dropped. For MemRefTypes these can be disambiguated /// using the strides. If a dimension is dropped the stride must be dropped too. -static FailureOr<llvm::SmallBitVector> +static std::optional<llvm::SmallBitVector> computeMemRefRankReductionMask(MemRefType originalType, MemRefType reducedType, ArrayRef<OpFoldResult> sizes) { llvm::SmallBitVector unusedDims(originalType.getRank()); @@ -941,7 +941,7 @@ computeMemRefRankReductionMask(MemRefType originalType, MemRefType reducedType, getStridesAndOffset(originalType, originalStrides, originalOffset)) || failed( getStridesAndOffset(reducedType, candidateStrides, candidateOffset))) - return failure(); + return std::nullopt; // For memrefs, a dimension is truly dropped if its corresponding stride is // also dropped. This is particularly important when more than one of the dims @@ -976,22 +976,22 @@ computeMemRefRankReductionMask(MemRefType originalType, MemRefType reducedType, candidateStridesNumOccurences[originalStride]) { // This should never happen. Cant have a stride in the reduced rank type // that wasnt in the original one. - return failure(); + return std::nullopt; } } if ((int64_t)unusedDims.count() + reducedType.getRank() != originalType.getRank()) - return failure(); + return std::nullopt; return unusedDims; } llvm::SmallBitVector SubViewOp::getDroppedDims() { MemRefType sourceType = getSourceType(); MemRefType resultType = getType(); - FailureOr<llvm::SmallBitVector> unusedDims = + std::optional<llvm::SmallBitVector> unusedDims = computeMemRefRankReductionMask(sourceType, resultType, getMixedSizes()); - assert(succeeded(unusedDims) && "unable to find unused dims of subview"); + assert(unusedDims && "unable to find unused dims of subview"); return *unusedDims; } @@ -2745,7 +2745,7 @@ void SubViewOp::build(OpBuilder &b, OperationState &result, Value source, /// For ViewLikeOpInterface. Value SubViewOp::getViewSource() { return getSource(); } -/// Return true if `t1` and `t2` have equal offsets (both dynamic or of same +/// Return true if t1 and t2 have equal offsets (both dynamic or of same /// static value). static bool haveCompatibleOffsets(MemRefType t1, MemRefType t2) { int64_t t1Offset, t2Offset; @@ -2755,41 +2755,56 @@ static bool haveCompatibleOffsets(MemRefType t1, MemRefType t2) { return succeeded(res1) && succeeded(res2) && t1Offset == t2Offset; } -/// Return true if `t1` and `t2` have equal strides (both dynamic or of same -/// static value). -static bool haveCompatibleStrides(MemRefType t1, MemRefType t2) { - int64_t t1Offset, t2Offset; - SmallVector<int64_t> t1Strides, t2Strides; - auto res1 = getStridesAndOffset(t1, t1Strides, t1Offset); - auto res2 = getStridesAndOffset(t2, t2Strides, t2Offset); - if (failed(res1) || failed(res2)) - return false; - for (auto [s1, s2] : llvm::zip_equal(t1Strides, t2Strides)) - if (s1 != s2) - return false; - return true; +/// Checks if `original` Type type can be rank reduced to `reduced` type. +/// This function is slight variant of `is subsequence` algorithm where +/// not matching dimension must be 1. +static SliceVerificationResult +isRankReducedMemRefType(MemRefType originalType, + MemRefType candidateRankReducedType, + ArrayRef<OpFoldResult> sizes) { + auto partialRes = isRankReducedType(originalType, candidateRankReducedType); + if (partialRes != SliceVerificationResult::Success) + return partialRes; + + auto optionalUnusedDimsMask = computeMemRefRankReductionMask( + originalType, candidateRankReducedType, sizes); + + // Sizes cannot be matched in case empty vector is returned. + if (!optionalUnusedDimsMask) + return SliceVerificationResult::LayoutMismatch; + + if (originalType.getMemorySpace() != + candidateRankReducedType.getMemorySpace()) + return SliceVerificationResult::MemSpaceMismatch; + + // No amount of stride dropping can reconcile incompatible offsets. + if (!haveCompatibleOffsets(originalType, candidateRankReducedType)) + return SliceVerificationResult::LayoutMismatch; + + return SliceVerificationResult::Success; } +template <typename OpTy> static LogicalResult produceSubViewErrorMsg(SliceVerificationResult result, - Operation *op, Type expectedType) { + OpTy op, Type expectedType) { auto memrefType = llvm::cast<ShapedType>(expectedType); switch (result) { case SliceVerificationResult::Success: return success(); case SliceVerificationResult::RankTooLarge: - return op->emitError("expected result rank to be smaller or equal to ") + return op.emitError("expected result rank to be smaller or equal to ") << "the source rank. "; case SliceVerificationResult::SizeMismatch: - return op->emitError("expected result type to be ") + return op.emitError("expected result type to be ") << expectedType << " or a rank-reduced version. (mismatch of result sizes) "; case SliceVerificationResult::ElemTypeMismatch: - return op->emitError("expected result element type to be ") + return op.emitError("expected result element type to be ") << memrefType.getElementType(); case SliceVerificationResult::MemSpaceMismatch: - return op->emitError("expected result and source memory spaces to match."); + return op.emitError("expected result and source memory spaces to match."); case SliceVerificationResult::LayoutMismatch: - return op->emitError("expected result type to be ") + return op.emitError("expected result type to be ") << expectedType << " or a rank-reduced version. (mismatch of result layout) "; } @@ -2811,46 +2826,13 @@ LogicalResult SubViewOp::verify() { if (!isStrided(baseType)) return emitError("base type ") << baseType << " is not strided"; - // Compute the expected result type, assuming that there are no rank - // reductions. - auto expectedType = cast<MemRefType>(SubViewOp::inferResultType( - baseType, getStaticOffsets(), getStaticSizes(), getStaticStrides())); - - // Verify all properties of a shaped type: rank, element type and dimension - // sizes. This takes into account potential rank reductions. - auto shapedTypeVerification = isRankReducedType( - /*originalType=*/expectedType, /*candidateReducedType=*/subViewType); - if (shapedTypeVerification != SliceVerificationResult::Success) - return produceSubViewErrorMsg(shapedTypeVerification, *this, expectedType); - - // Make sure that the memory space did not change. - if (expectedType.getMemorySpace() != subViewType.getMemorySpace()) - return produceSubViewErrorMsg(SliceVerificationResult::MemSpaceMismatch, - *this, expectedType); - - // Verify the offset of the layout map. - if (!haveCompatibleOffsets(expectedType, subViewType)) - return produceSubViewErrorMsg(SliceVerificationResult::LayoutMismatch, - *this, expectedType); - - // The only thing that's left to verify now are the strides. First, compute - // the unused dimensions due to rank reductions. We have to look at sizes and - // strides to decide which dimensions were dropped. This function also - // partially verifies strides in case of rank reductions. - auto unusedDims = computeMemRefRankReductionMask(expectedType, subViewType, - getMixedSizes()); - if (failed(unusedDims)) - return produceSubViewErrorMsg(SliceVerificationResult::LayoutMismatch, - *this, expectedType); - - // Strides must match if there are no rank reductions. - // TODO: Verify strides when there are rank reductions. Strides are partially - // checked in `computeMemRefRankReductionMask`. - if (unusedDims->none() && !haveCompatibleStrides(expectedType, subViewType)) - return produceSubViewErrorMsg(SliceVerificationResult::LayoutMismatch, - *this, expectedType); + // Verify result type against inferred type. + auto expectedType = SubViewOp::inferResultType( + baseType, getStaticOffsets(), getStaticSizes(), getStaticStrides()); - return success(); + auto result = isRankReducedMemRefType(llvm::cast<MemRefType>(expectedType), + subViewType, getMixedSizes()); + return produceSubViewErrorMsg(result, *this, expectedType); } raw_ostream &mlir::operator<<(raw_ostream &os, const Range &range) { @@ -2900,9 +2882,11 @@ static MemRefType getCanonicalSubViewResultType( ArrayRef<OpFoldResult> mixedSizes, ArrayRef<OpFoldResult> mixedStrides) { auto nonRankReducedType = llvm::cast<MemRefType>(SubViewOp::inferResultType( sourceType, mixedOffsets, mixedSizes, mixedStrides)); - FailureOr<llvm::SmallBitVector> unusedDims = computeMemRefRankReductionMask( - currentSourceType, currentResultType, mixedSizes); - if (failed(unusedDims)) + std::optional<llvm::SmallBitVector> unusedDims = + computeMemRefRankReductionMask(currentSourceType, currentResultType, + mixedSizes); + // Return nullptr as failure mode. + if (!unusedDims) return nullptr; auto layout = llvm::cast<StridedLayoutAttr>(nonRankReducedType.getLayout()); diff --git a/mlir/test/Dialect/GPU/decompose-memrefs.mlir b/mlir/test/Dialect/GPU/decompose-memrefs.mlir index 56fc9a66b7ace..d714010d0f254 100644 --- a/mlir/test/Dialect/GPU/decompose-memrefs.mlir +++ b/mlir/test/Dialect/GPU/decompose-memrefs.mlir @@ -119,7 +119,7 @@ func.func @decompose_subview(%arg0 : memref<?x?x?xf32>) { // CHECK: %[[IDX1:.*]] = affine.apply #[[MAP1]]()[%[[STRIDES]]#1] // CHECK: %[[IDX2:.*]] = affine.apply #[[MAP2]]()[%[[TX]], %[[STRIDES]]#0, %[[TY]], %[[STRIDES]]#1, %[[TZ]]] // CHECK: %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX2]]], sizes: [%{{.*}}, %{{.*}}, %{{.*}}], strides: [%[[IDX]], %[[IDX1]], 4] -// CHECK: "test.test"(%[[PTR]]) : (memref<?x?x?xf32, strided<[?, ?, 4], offset: ?>>) -> () +// CHECK: "test.test"(%[[PTR]]) : (memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>) -> () func.func @decompose_subview_strided(%arg0 : memref<?x?x?xf32>) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -129,8 +129,8 @@ func.func @decompose_subview_strided(%arg0 : memref<?x?x?xf32>) { %block_dim2 = memref.dim %arg0, %c2 : memref<?x?x?xf32> gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %block_dim0, %block_y = %block_dim1, %block_z = %block_dim2) { - %res = memref.subview %arg0[%tx, %ty, %tz] [%c2, %c2, %c2] [2, 3, 4] : memref<?x?x?xf32> to memref<?x?x?xf32, strided<[?, ?, 4], offset: ?>> - "test.test"(%res) : (memref<?x?x?xf32, strided<[?, ?, 4], offset: ?>>) -> () + %res = memref.subview %arg0[%tx, %ty, %tz] [%c2, %c2, %c2] [2, 3, 4] : memref<?x?x?xf32> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + "test.test"(%res) : (memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>) -> () gpu.terminator } return diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index 3407bdbc7c8f9..96b72e042b9e0 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -595,9 +595,9 @@ func.func @subview_of_subview(%m: memref<1x1024xf32, 3>, %pos: index) { %0 = memref.subview %m[3, %pos] [1, 2] [1, 1] : memref<1x1024xf32, 3> - to memref<1x2xf32, strided<[1024, 1], offset: ?>, 3> + to memref<1x2xf32, strided<[1024, 2], offset: ?>, 3> %1 = memref.subview %0[1, 2] [1, 1] [1, 1] - : memref<1x2xf32, strided<[1024, 1], offset: ?>, 3> + : memref<1x2xf32, strided<[1024, 2], offset: ?>, 3> to memref<f32, strided<[], offset: ?>, 3> return %1 : memref<f32, strided<[], offset: ?>, 3> } @@ -675,9 +675,9 @@ func.func @fold_gpu_subgroup_mma_store_matrix_1d(%dst: memref<?xvector<4xf32>>, // CHECK-LABEL: func.func @fold_gpu_subgroup_mma_load_matrix_2d // CHECK-SAME: %[[SRC:.+]]: memref<128x128xf32> func.func @fold_gpu_subgroup_mma_load_matrix_2d(%arg0 : memref<128x128xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) -> !gpu.mma_matrix<16x16xf16, "COp"> { - %subview = memref.subview %arg0[%arg1, %arg2][64, 32][2, 1] : memref<128x128xf32> to memref<64x32xf32, strided<[256, 1], offset: ?>> + %subview = memref.subview %arg0[%arg1, %arg2][64, 32][2, 1] : memref<128x128xf32> to memref<64x32xf32, strided<[64, 1], offset: ?>> // CHECK: gpu.subgroup_mma_load_matrix %[[SRC]][{{.+}}] {leadDimension = 32 : index} : memref<128x128xf32> -> !gpu.mma_matrix<16x16xf16, "COp"> - %matrix = gpu.subgroup_mma_load_matrix %subview[%arg3, %arg4] {leadDimension = 32 : index} : memref<64x32xf32, strided<[256, 1], offset: ?>> -> !gpu.mma_matrix<16x16xf16, "COp"> + %matrix = gpu.subgroup_mma_load_matrix %subview[%arg3, %arg4] {leadDimension = 32 : index} : memref<64x32xf32, strided<[64, 1], offset: ?>> -> !gpu.mma_matrix<16x16xf16, "COp"> return %matrix : !gpu.mma_matrix<16x16xf16, "COp"> } @@ -686,9 +686,9 @@ func.func @fold_gpu_subgroup_mma_load_matrix_2d(%arg0 : memref<128x128xf32>, %ar // CHECK-LABEL: func.func @fold_gpu_subgroup_mma_load_matrix_2d // CHECK-SAME: %[[DST:.+]]: memref<128x128xf32> func.func @fold_gpu_subgroup_mma_load_matrix_2d(%arg0 : memref<128x128xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %matrix: !gpu.mma_matrix<16x16xf16, "COp">) { - %subview = memref.subview %arg0[%arg1, %arg2][64, 32][2, 1] : memref<128x128xf32> to memref<64x32xf32, strided<[256, 1], offset: ?>> + %subview = memref.subview %arg0[%arg1, %arg2][64, 32][2, 1] : memref<128x128xf32> to memref<64x32xf32, strided<[64, 1], offset: ?>> // CHECK: gpu.subgroup_mma_store_matrix %{{.+}}, %[[DST]][{{.+}}] {leadDimension = 32 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<128x128xf32> - gpu.subgroup_mma_store_matrix %matrix, %subview[%arg3, %arg4] {leadDimension = 32 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<64x32xf32, strided<[256, 1], offset: ?>> + gpu.subgroup_mma_store_matrix %matrix, %subview[%arg3, %arg4] {leadDimension = 32 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<64x32xf32, strided<[64, 1], offset: ?>> return } diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir index be60a3dcb1b20..7bb7a2affcbd1 100644 --- a/mlir/test/Dialect/MemRef/invalid.mlir +++ b/mlir/test/Dialect/MemRef/invalid.mlir @@ -1073,12 +1073,3 @@ func.func @dim_0_ranked(%arg : memref<f32>, %arg1 : index) { memref.dim %arg, %arg1 : memref<f32> // expected-error {{'memref.dim' op operand #0 must be unranked.memref of any type values or non-0-ranked.memref of any type values, but got 'memref<f32>'}} return } - -// ----- - -func.func @subview_invalid_strides(%m: memref<7x22x333x4444xi32>) { - // expected-error @below{{expected result type to be 'memref<7x11x333x4444xi32, strided<[32556744, 2959704, 4444, 1]>>' or a rank-reduced version. (mismatch of result layout)}} - %subview = memref.subview %m[0, 0, 0, 0] [7, 11, 333, 4444] [1, 2, 1, 1] - : memref<7x22x333x4444xi32> to memref<7x11x333x4444xi32> - return -} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir index e7dd0ad32a243..3773cca9c8d69 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort_coo.mlir @@ -88,10 +88,10 @@ module { // Prepare a buffer for x0, x1, x2, y0 and a buffer for y1. %xys = memref.alloc() : memref<20xi32> %xy = memref.cast %xys : memref<20xi32> to memref<?xi32> - %x0 = memref.subview %xy[%i0][%i5][4] : memref<?xi32> to memref<?xi32, strided<[4], offset: ?>> - %x1 = memref.subview %xy[%i1][%i5][4] : memref<?xi32> to memref<?xi32, strided<[4], offset: ?>> - %x2 = memref.subview %xy[%i2][%i5][4] : memref<?xi32> to memref<?xi32, strided<[4], offset: ?>> - %y0 = memref.subview %xy[%i3][%i5][4] : memref<?xi32> to memref<?xi32, strided<[4], offset: ?>> + %x0 = memref.subview %xy[%i0][%i5][%i4] : memref<?xi32> to memref<?xi32, strided<[4], offset: ?>> + %x1 = memref.subview %xy[%i1][%i5][%i4] : memref<?xi32> to memref<?xi32, strided<[4], offset: ?>> + %x2 = memref.subview %xy[%i2][%i5][%i4] : memref<?xi32> to memref<?xi32, strided<[4], offset: ?>> + %y0 = memref.subview %xy[%i3][%i5][%i4] : memref<?xi32> to memref<?xi32, strided<[4], offset: ?>> %y1s = memref.alloc() : memref<7xi32> %y1 = memref.cast %y1s : memref<7xi32> to memref<?xi32> _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits