================
@@ -658,21 +658,21 @@ func.func @transpose_load(%idx1 : index, %idx2 : index, 
%mem : memref<128x32xf16
 
 // CHECK-LABEL: func @gather_to_lds
 func.func @gather_to_lds(%idx1 : index, %idx2 : index, %mem1 : memref<32xf16>, 
%mem2 : memref<32x32xf16>, %smem1 : memref<32xf16, 
#gpu.address_space<workgroup>>, %smem2 : memref<32x32xf16, 
#gpu.address_space<workgroup>>, %smem3 : memref<?x?xf16, strided<[?, 1]>, 
#gpu.address_space<workgroup>>) {
-  // CHECK: amdgpu.gather_to_lds %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, 
%{{.*}}]
+  // CHECK: amdgpu.gather_to_lds async %{{.*}}[%{{.*}}, %{{.*}}], 
%{{.*}}[%{{.*}}, %{{.*}}]
   // CHECK: amdgpu.gather_to_lds %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}]
+  // CHECK: amdgpu.gather_to_lds async %{{.*}}[%{{.*}}],          
%{{.*}}[%{{.*}}, %{{.*}}]
   // CHECK: amdgpu.gather_to_lds %{{.*}}[%{{.*}}],          %{{.*}}[%{{.*}}, 
%{{.*}}]
-  // CHECK: amdgpu.gather_to_lds %{{.*}}[%{{.*}}],          %{{.*}}[%{{.*}}, 
%{{.*}}]
-  amdgpu.gather_to_lds %mem2[%idx1, %idx2], %smem2[%idx1, %idx2] : 
vector<2xf16>, memref<32x32xf16>, memref<32x32xf16, 
#gpu.address_space<workgroup>>
+  amdgpu.gather_to_lds async %mem2[%idx1, %idx2], %smem2[%idx1, %idx2] : 
vector<2xf16>, memref<32x32xf16>, memref<32x32xf16, 
#gpu.address_space<workgroup>>
   amdgpu.gather_to_lds %mem2[%idx1, %idx2], %smem1[%idx1]        : 
vector<2xf16>, memref<32x32xf16>, memref<32xf16,    
#gpu.address_space<workgroup>>
-  amdgpu.gather_to_lds %mem1[%idx1],        %smem2[%idx1, %idx2] : 
vector<2xf16>, memref<32xf16>,    memref<32x32xf16, 
#gpu.address_space<workgroup>>
+  amdgpu.gather_to_lds async %mem1[%idx1],        %smem2[%idx1, %idx2] : 
vector<2xf16>, memref<32xf16>,    memref<32x32xf16, 
#gpu.address_space<workgroup>>
   amdgpu.gather_to_lds %mem1[%idx1],        %smem3[%idx1, %idx2] : 
vector<2xf16>, memref<32xf16>,   memref<?x?xf16, strided<[?, 1]>, 
#gpu.address_space<workgroup>>
   func.return
 }
 
 // CHECK-LABEL: func @gather_to_lds_0d
 func.func @gather_to_lds_0d(%mem1 : memref<f16>, %smem1 : memref<f16, 
#gpu.address_space<workgroup>>) {
-  // CHECK: amdgpu.gather_to_lds %{{.*}}[], %{{.*}}[]
-  amdgpu.gather_to_lds %mem1[], %smem1[] : vector<2xf16>, memref<f16>, 
memref<f16, #gpu.address_space<workgroup>>
+  // CHECK: amdgpu.gather_to_lds async %{{.*}}[], %{{.*}}[]
+  amdgpu.gather_to_lds async %mem1[], %smem1[] : vector<2xf16>, memref<f16>, 
memref<f16, #gpu.address_space<workgroup>>
----------------
lialan wrote:

nit: the above test have both async and sync, here we only have one async.

https://github.com/llvm/llvm-project/pull/181082
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to