https://github.com/NexMing commented:

My goal really is for Fortran to be able to perform affine transformation 
optimizations, and I’ve been working on this myself. I now have some 
experimental results: in the 
[dev/flang-affine-opt](https://github.com/NexMing/llvm-project/tree/dev/flang-affine-opt)
 branch of my repository, flang can currently lower the following program to 
the affine dialect.
```
subroutine gemm(A, B, C)
  implicit none
  real, intent(in)  :: A(100,200), B(200,100)
  real, intent(out) :: C(100,100)
  integer :: i, j, k

  C = 0.0

  do i = 1, 100
    do j = 1, 100
      do k = 1, 200
        C(i, j) = C(i, j) + A(i,k) * B(k,j)
      end do
    end do
  end do

end subroutine gemm
```
By using the `flang -S -O1 -emit-llvm 
-flang-experimental-lower-through-core-mlir -mmlir -mlir-print-ir-after-all` 
command-line option, you can find it.
```
// -----// IR Dump After RaiseMemrefDialect (affine-raise-from-memref) //----- 
//
func.func @gemm_(%arg0: !fir.ref<!fir.array<100x200xf32>> {fir.bindc_name = 
"a", llvm.noalias, llvm.nocapture}, %arg1: !fir.ref<!fir.array<200x100xf32>> 
{fir.bindc_name = "b", llvm.noalias, llvm.nocapture}, %arg2: 
!fir.ref<!fir.array<100x100xf32>> {fir.bindc_name = "c", llvm.noalias, 
llvm.nocapture}) attributes {fir.internal_name = "_QPgemm", target_cpu = 
"x86-64"} {
  %cst = arith.constant 0.000000e+00 : f32
  %0 = builtin.unrealized_conversion_cast %arg1 : 
!fir.ref<!fir.array<200x100xf32>> to memref<f32, strided<[], offset: ?>>
  %1 = builtin.unrealized_conversion_cast %arg0 : 
!fir.ref<!fir.array<100x200xf32>> to memref<f32, strided<[], offset: ?>>
  %2 = builtin.unrealized_conversion_cast %arg2 : 
!fir.ref<!fir.array<100x100xf32>> to memref<f32, strided<[], offset: ?>>
  %base_buffer, %offset = memref.extract_strided_metadata %2 : memref<f32, 
strided<[], offset: ?>> -> memref<f32>, index
  %reinterpret_cast = memref.reinterpret_cast %2 to offset: [%offset], sizes: 
[10000], strides: [1] : memref<f32, strided<[], offset: ?>> to 
memref<10000xf32, strided<[1], offset: ?>>
  affine.for %arg3 = 0 to 10000 {
    affine.store %cst, %reinterpret_cast[%arg3] : memref<10000xf32, 
strided<[1], offset: ?>>
  }
  %reinterpret_cast_0 = memref.reinterpret_cast %2 to offset: [%offset], sizes: 
[100, 100], strides: [100, 1] : memref<f32, strided<[], offset: ?>> to 
memref<100x100xf32, strided<[100, 1], offset: ?>>
  %base_buffer_1, %offset_2 = memref.extract_strided_metadata %1 : memref<f32, 
strided<[], offset: ?>> -> memref<f32>, index
  %reinterpret_cast_3 = memref.reinterpret_cast %1 to offset: [%offset_2], 
sizes: [200, 100], strides: [100, 1] : memref<f32, strided<[], offset: ?>> to 
memref<200x100xf32, strided<[100, 1], offset: ?>>
  %base_buffer_4, %offset_5 = memref.extract_strided_metadata %0 : memref<f32, 
strided<[], offset: ?>> -> memref<f32>, index
  %reinterpret_cast_6 = memref.reinterpret_cast %0 to offset: [%offset_5], 
sizes: [100, 200], strides: [200, 1] : memref<f32, strided<[], offset: ?>> to 
memref<100x200xf32, strided<[200, 1], offset: ?>>
  affine.for %arg3 = 0 to 100 {
    affine.for %arg4 = 0 to 100 {
      affine.for %arg5 = 0 to 200 {
        %3 = affine.load %reinterpret_cast_0[%arg4, %arg3] : 
memref<100x100xf32, strided<[100, 1], offset: ?>>
        %4 = affine.load %reinterpret_cast_3[%arg5, %arg3] : 
memref<200x100xf32, strided<[100, 1], offset: ?>>
        %5 = affine.load %reinterpret_cast_6[%arg4, %arg5] : 
memref<100x200xf32, strided<[200, 1], offset: ?>>
        %6 = arith.mulf %4, %5 fastmath<contract> : f32
        %7 = arith.addf %3, %6 fastmath<contract> : f32
        affine.store %7, %reinterpret_cast_0[%arg4, %arg3] : 
memref<100x100xf32, strided<[100, 1], offset: ?>>
      }
    }
  }
  return
}
```

https://github.com/llvm/llvm-project/pull/168703
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to