https://github.com/NexMing commented:
My goal really is for Fortran to be able to perform affine transformation optimizations, and I’ve been working on this myself. I now have some experimental results: in the [dev/flang-affine-opt](https://github.com/NexMing/llvm-project/tree/dev/flang-affine-opt) branch of my repository, flang can currently lower the following program to the affine dialect. ``` subroutine gemm(A, B, C) implicit none real, intent(in) :: A(100,200), B(200,100) real, intent(out) :: C(100,100) integer :: i, j, k C = 0.0 do i = 1, 100 do j = 1, 100 do k = 1, 200 C(i, j) = C(i, j) + A(i,k) * B(k,j) end do end do end do end subroutine gemm ``` By using the `flang -S -O1 -emit-llvm -flang-experimental-lower-through-core-mlir -mmlir -mlir-print-ir-after-all` command-line option, you can find it. ``` // -----// IR Dump After RaiseMemrefDialect (affine-raise-from-memref) //----- // func.func @gemm_(%arg0: !fir.ref<!fir.array<100x200xf32>> {fir.bindc_name = "a", llvm.noalias, llvm.nocapture}, %arg1: !fir.ref<!fir.array<200x100xf32>> {fir.bindc_name = "b", llvm.noalias, llvm.nocapture}, %arg2: !fir.ref<!fir.array<100x100xf32>> {fir.bindc_name = "c", llvm.noalias, llvm.nocapture}) attributes {fir.internal_name = "_QPgemm", target_cpu = "x86-64"} { %cst = arith.constant 0.000000e+00 : f32 %0 = builtin.unrealized_conversion_cast %arg1 : !fir.ref<!fir.array<200x100xf32>> to memref<f32, strided<[], offset: ?>> %1 = builtin.unrealized_conversion_cast %arg0 : !fir.ref<!fir.array<100x200xf32>> to memref<f32, strided<[], offset: ?>> %2 = builtin.unrealized_conversion_cast %arg2 : !fir.ref<!fir.array<100x100xf32>> to memref<f32, strided<[], offset: ?>> %base_buffer, %offset = memref.extract_strided_metadata %2 : memref<f32, strided<[], offset: ?>> -> memref<f32>, index %reinterpret_cast = memref.reinterpret_cast %2 to offset: [%offset], sizes: [10000], strides: [1] : memref<f32, strided<[], offset: ?>> to memref<10000xf32, strided<[1], offset: ?>> affine.for %arg3 = 0 to 10000 { affine.store %cst, %reinterpret_cast[%arg3] : memref<10000xf32, strided<[1], offset: ?>> } %reinterpret_cast_0 = memref.reinterpret_cast %2 to offset: [%offset], sizes: [100, 100], strides: [100, 1] : memref<f32, strided<[], offset: ?>> to memref<100x100xf32, strided<[100, 1], offset: ?>> %base_buffer_1, %offset_2 = memref.extract_strided_metadata %1 : memref<f32, strided<[], offset: ?>> -> memref<f32>, index %reinterpret_cast_3 = memref.reinterpret_cast %1 to offset: [%offset_2], sizes: [200, 100], strides: [100, 1] : memref<f32, strided<[], offset: ?>> to memref<200x100xf32, strided<[100, 1], offset: ?>> %base_buffer_4, %offset_5 = memref.extract_strided_metadata %0 : memref<f32, strided<[], offset: ?>> -> memref<f32>, index %reinterpret_cast_6 = memref.reinterpret_cast %0 to offset: [%offset_5], sizes: [100, 200], strides: [200, 1] : memref<f32, strided<[], offset: ?>> to memref<100x200xf32, strided<[200, 1], offset: ?>> affine.for %arg3 = 0 to 100 { affine.for %arg4 = 0 to 100 { affine.for %arg5 = 0 to 200 { %3 = affine.load %reinterpret_cast_0[%arg4, %arg3] : memref<100x100xf32, strided<[100, 1], offset: ?>> %4 = affine.load %reinterpret_cast_3[%arg5, %arg3] : memref<200x100xf32, strided<[100, 1], offset: ?>> %5 = affine.load %reinterpret_cast_6[%arg4, %arg5] : memref<100x200xf32, strided<[200, 1], offset: ?>> %6 = arith.mulf %4, %5 fastmath<contract> : f32 %7 = arith.addf %3, %6 fastmath<contract> : f32 affine.store %7, %reinterpret_cast_0[%arg4, %arg3] : memref<100x100xf32, strided<[100, 1], offset: ?>> } } } return } ``` https://github.com/llvm/llvm-project/pull/168703 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
