================
@@ -658,21 +658,21 @@ func.func @transpose_load(%idx1 : index, %idx2 : index,
%mem : memref<128x32xf16
// CHECK-LABEL: func @gather_to_lds
func.func @gather_to_lds(%idx1 : index, %idx2 : index, %mem1 : memref<32xf16>,
%mem2 : memref<32x32xf16>, %smem1 : memref<32xf16,
#gpu.address_space<workgroup>>, %smem2 : memref<32x32xf16,
#gpu.address_space<workgroup>>, %smem3 : memref<?x?xf16, strided<[?, 1]>,
#gpu.address_space<workgroup>>) {
- // CHECK: amdgpu.gather_to_lds %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}},
%{{.*}}]
+ // CHECK: amdgpu.gather_to_lds async %{{.*}}[%{{.*}}, %{{.*}}],
%{{.*}}[%{{.*}}, %{{.*}}]
// CHECK: amdgpu.gather_to_lds %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}]
+ // CHECK: amdgpu.gather_to_lds async %{{.*}}[%{{.*}}],
%{{.*}}[%{{.*}}, %{{.*}}]
// CHECK: amdgpu.gather_to_lds %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}},
%{{.*}}]
- // CHECK: amdgpu.gather_to_lds %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}},
%{{.*}}]
- amdgpu.gather_to_lds %mem2[%idx1, %idx2], %smem2[%idx1, %idx2] :
vector<2xf16>, memref<32x32xf16>, memref<32x32xf16,
#gpu.address_space<workgroup>>
+ amdgpu.gather_to_lds async %mem2[%idx1, %idx2], %smem2[%idx1, %idx2] :
vector<2xf16>, memref<32x32xf16>, memref<32x32xf16,
#gpu.address_space<workgroup>>
amdgpu.gather_to_lds %mem2[%idx1, %idx2], %smem1[%idx1] :
vector<2xf16>, memref<32x32xf16>, memref<32xf16,
#gpu.address_space<workgroup>>
- amdgpu.gather_to_lds %mem1[%idx1], %smem2[%idx1, %idx2] :
vector<2xf16>, memref<32xf16>, memref<32x32xf16,
#gpu.address_space<workgroup>>
+ amdgpu.gather_to_lds async %mem1[%idx1], %smem2[%idx1, %idx2] :
vector<2xf16>, memref<32xf16>, memref<32x32xf16,
#gpu.address_space<workgroup>>
amdgpu.gather_to_lds %mem1[%idx1], %smem3[%idx1, %idx2] :
vector<2xf16>, memref<32xf16>, memref<?x?xf16, strided<[?, 1]>,
#gpu.address_space<workgroup>>
func.return
}
// CHECK-LABEL: func @gather_to_lds_0d
func.func @gather_to_lds_0d(%mem1 : memref<f16>, %smem1 : memref<f16,
#gpu.address_space<workgroup>>) {
- // CHECK: amdgpu.gather_to_lds %{{.*}}[], %{{.*}}[]
- amdgpu.gather_to_lds %mem1[], %smem1[] : vector<2xf16>, memref<f16>,
memref<f16, #gpu.address_space<workgroup>>
+ // CHECK: amdgpu.gather_to_lds async %{{.*}}[], %{{.*}}[]
+ amdgpu.gather_to_lds async %mem1[], %smem1[] : vector<2xf16>, memref<f16>,
memref<f16, #gpu.address_space<workgroup>>
----------------
lialan wrote:
nit: the above test have both async and sync, here we only have one async.
https://github.com/llvm/llvm-project/pull/181082
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits