[PATCH] D84820: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rGcb327922101b: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions (authored by tlively). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D84820/new/ https://reviews.llvm.org/D84820 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll llvm/test/MC/WebAssembly/simd-encodings.s Index: llvm/test/MC/WebAssembly/simd-encodings.s === --- llvm/test/MC/WebAssembly/simd-encodings.s +++ llvm/test/MC/WebAssembly/simd-encodings.s @@ -463,9 +463,6 @@ # CHECK: i32x4.sub # encoding: [0xfd,0xb1,0x01] i32x4.sub -# CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xb4,0x01] -i32x4.dot_i16x8_s - # CHECK: i32x4.mul # encoding: [0xfd,0xb5,0x01] i32x4.mul @@ -481,6 +478,9 @@ # CHECK: i32x4.max_u # encoding: [0xfd,0xb9,0x01] i32x4.max_u +# CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xba,0x01] +i32x4.dot_i16x8_s + # CHECK: i64x2.neg # encoding: [0xfd,0xc1,0x01] i64x2.neg @@ -610,10 +610,16 @@ # CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xfb,0x01] f32x4.convert_i32x4_u -# CHECK: f32x4.qfma # encoding: [0xfd,0xfc,0x01] +# CHECK: v128.load32_zero 32 # encoding: [0xfd,0xfc,0x01,0x02,0x20] +v128.load32_zero 32 + +# CHECK: v128.load64_zero 32 # encoding: [0xfd,0xfd,0x01,0x03,0x20] +v128.load64_zero 32 + +# CHECK: f32x4.qfma # encoding: [0xfd,0xb4,0x01] f32x4.qfma -# CHECK: f32x4.qfms # encoding: [0xfd,0xfd,0x01] +# CHECK: f32x4.qfms # encoding: [0xfd,0xd4,0x01] f32x4.qfms # CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01] Index: llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll === --- /dev/null +++ llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s + +; Test SIMD v128.load{32,64}_zero instructions + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare <4 x i32> @llvm.wasm.load32.zero(i32*) +declare <2 x i64> @llvm.wasm.load64.zero(i64*) + +;=== +; v128.load32_zero +;=== + +define <4 x i32> @load_zero_i32_no_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_no_offset: +; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return + %v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p) + ret <4 x i32> %v +} + +define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_offset: +; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 24 +; CHECK-NEXT:# fallthrough-return + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_gep_offset: +; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 24 +; CHECK-NEXT:# fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:i32.const -24 +; CHECK-NEXT:i32.add +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32>
[PATCH] D84820: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions
tlively updated this revision to Diff 282680. tlively added a comment. - Renumber i32x4.dot_i16x8_s to match V8 as well Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D84820/new/ https://reviews.llvm.org/D84820 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll llvm/test/MC/WebAssembly/simd-encodings.s Index: llvm/test/MC/WebAssembly/simd-encodings.s === --- llvm/test/MC/WebAssembly/simd-encodings.s +++ llvm/test/MC/WebAssembly/simd-encodings.s @@ -463,9 +463,6 @@ # CHECK: i32x4.sub # encoding: [0xfd,0xb1,0x01] i32x4.sub -# CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xb4,0x01] -i32x4.dot_i16x8_s - # CHECK: i32x4.mul # encoding: [0xfd,0xb5,0x01] i32x4.mul @@ -481,6 +478,9 @@ # CHECK: i32x4.max_u # encoding: [0xfd,0xb9,0x01] i32x4.max_u +# CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xba,0x01] +i32x4.dot_i16x8_s + # CHECK: i64x2.neg # encoding: [0xfd,0xc1,0x01] i64x2.neg @@ -610,10 +610,16 @@ # CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xfb,0x01] f32x4.convert_i32x4_u -# CHECK: f32x4.qfma # encoding: [0xfd,0xfc,0x01] +# CHECK: v128.load32_zero 32 # encoding: [0xfd,0xfc,0x01,0x02,0x20] +v128.load32_zero 32 + +# CHECK: v128.load64_zero 32 # encoding: [0xfd,0xfd,0x01,0x03,0x20] +v128.load64_zero 32 + +# CHECK: f32x4.qfma # encoding: [0xfd,0xb4,0x01] f32x4.qfma -# CHECK: f32x4.qfms # encoding: [0xfd,0xfd,0x01] +# CHECK: f32x4.qfms # encoding: [0xfd,0xd4,0x01] f32x4.qfms # CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01] Index: llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll === --- /dev/null +++ llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s + +; Test SIMD v128.load{32,64}_zero instructions + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare <4 x i32> @llvm.wasm.load32.zero(i32*) +declare <2 x i64> @llvm.wasm.load64.zero(i64*) + +;=== +; v128.load32_zero +;=== + +define <4 x i32> @load_zero_i32_no_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_no_offset: +; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return + %v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p) + ret <4 x i32> %v +} + +define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_offset: +; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 24 +; CHECK-NEXT:# fallthrough-return + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_gep_offset: +; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 24 +; CHECK-NEXT:# fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:i32.const -24 +; CHECK-NEXT:i32.add +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_offset: +; CHECK: .functype load_zero_i32_with_unfolded_offset
[PATCH] D84820: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions
tlively added inline comments. Comment at: llvm/include/llvm/IR/IntrinsicsWebAssembly.td:198 +[LLVMPointerType], +[IntrReadMem, IntrArgMemOnly, IntrSpeculatable], + "", [SDNPMemOperand]>; aheejin wrote: > Can memory accesses be speculatable? The below too Hmm, maybe not, and it's definitely move conservative for them not to be. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D84820/new/ https://reviews.llvm.org/D84820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D84820: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions
tlively updated this revision to Diff 282017. tlively added a comment. - Remove IntrSpeculatable Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D84820/new/ https://reviews.llvm.org/D84820 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll llvm/test/MC/WebAssembly/simd-encodings.s Index: llvm/test/MC/WebAssembly/simd-encodings.s === --- llvm/test/MC/WebAssembly/simd-encodings.s +++ llvm/test/MC/WebAssembly/simd-encodings.s @@ -610,10 +610,16 @@ # CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xfb,0x01] f32x4.convert_i32x4_u -# CHECK: f32x4.qfma # encoding: [0xfd,0xfc,0x01] +# CHECK: v128.load32_zero 32 # encoding: [0xfd,0xfc,0x01,0x02,0x20] +v128.load32_zero 32 + +# CHECK: v128.load64_zero 32 # encoding: [0xfd,0xfd,0x01,0x03,0x20] +v128.load64_zero 32 + +# CHECK: f32x4.qfma # encoding: [0xfd,0xb4,0x01] f32x4.qfma -# CHECK: f32x4.qfms # encoding: [0xfd,0xfd,0x01] +# CHECK: f32x4.qfms # encoding: [0xfd,0xd4,0x01] f32x4.qfms # CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01] Index: llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll === --- /dev/null +++ llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s + +; Test SIMD v128.load{32,64}_zero instructions + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare <4 x i32> @llvm.wasm.load32.zero(i32*) +declare <2 x i64> @llvm.wasm.load64.zero(i64*) + +;=== +; v128.load32_zero +;=== + +define <4 x i32> @load_zero_i32_no_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_no_offset: +; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return + %v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p) + ret <4 x i32> %v +} + +define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_offset: +; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 24 +; CHECK-NEXT:# fallthrough-return + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_gep_offset: +; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 24 +; CHECK-NEXT:# fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:i32.const -24 +; CHECK-NEXT:i32.add +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_offset: +; CHECK: .functype load_zero_i32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:i32.const 24 +; CHECK-NEXT:i32.add +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_gep_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_gep_offset: +; CHECK:
[PATCH] D84820: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions
aheejin accepted this revision. aheejin added inline comments. This revision is now accepted and ready to land. Comment at: llvm/include/llvm/IR/IntrinsicsWebAssembly.td:198 +[LLVMPointerType], +[IntrReadMem, IntrArgMemOnly, IntrSpeculatable], + "", [SDNPMemOperand]>; Can memory accesses be speculatable? The below too Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D84820/new/ https://reviews.llvm.org/D84820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D84820: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions
tlively added a comment. Since this changes opcodes, it needs to be landed in concert with the corresponding Binaryen change. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D84820/new/ https://reviews.llvm.org/D84820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D84820: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions
tlively created this revision. tlively added a reviewer: aheejin. Herald added subscribers: llvm-commits, cfe-commits, sunfish, hiraditya, jgravelle-google, sbc100, dschuff. Herald added projects: clang, LLVM. tlively requested review of this revision. Specified in https://github.com/WebAssembly/simd/pull/237, these instructions load the first vector lane from memory and zero the other lanes. Since these instructions are not officially part of the SIMD proposal, they are only available on an opt-in basis via LLVM intrinsics and clang builtin functions. If these instructions are merged to the proposal, this implementation will change so that the instructions will be generated from normal IR. At that point the intrinsics and builtin functions would be removed. This PR also changes the opcodes for the experimental f32x4.qfm{a,s} instructions because their opcodes conflicted with those of the v128.load{32,64}_zero instructions. The new opcodes were chosen to match those used in V8. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D84820 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll llvm/test/MC/WebAssembly/simd-encodings.s Index: llvm/test/MC/WebAssembly/simd-encodings.s === --- llvm/test/MC/WebAssembly/simd-encodings.s +++ llvm/test/MC/WebAssembly/simd-encodings.s @@ -610,10 +610,16 @@ # CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xfb,0x01] f32x4.convert_i32x4_u -# CHECK: f32x4.qfma # encoding: [0xfd,0xfc,0x01] +# CHECK: v128.load32_zero 32 # encoding: [0xfd,0xfc,0x01,0x02,0x20] +v128.load32_zero 32 + +# CHECK: v128.load64_zero 32 # encoding: [0xfd,0xfd,0x01,0x03,0x20] +v128.load64_zero 32 + +# CHECK: f32x4.qfma # encoding: [0xfd,0xb4,0x01] f32x4.qfma -# CHECK: f32x4.qfms # encoding: [0xfd,0xfd,0x01] +# CHECK: f32x4.qfms # encoding: [0xfd,0xd4,0x01] f32x4.qfms # CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01] Index: llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll === --- /dev/null +++ llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s + +; Test SIMD v128.load{32,64}_zero instructions + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare <4 x i32> @llvm.wasm.load32.zero(i32*) +declare <2 x i64> @llvm.wasm.load64.zero(i64*) + +;=== +; v128.load32_zero +;=== + +define <4 x i32> @load_zero_i32_no_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_no_offset: +; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return + %v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p) + ret <4 x i32> %v +} + +define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_offset: +; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 24 +; CHECK-NEXT:# fallthrough-return + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_gep_offset: +; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:v128.load32_zero 24 +; CHECK-NEXT:# fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT:local.get 0 +; CHECK-NEXT:i32.const -24 +; CHECK-NEXT:i32.add +; CHECK-NEXT:v128.load32_zero 0 +; CHECK-NEXT:# fallthrough-return +