https://github.com/wizardengineer created https://github.com/llvm/llvm-project/pull/166709
None >From d55c2d69a0d48538f7376d9f06b8cbf0e2215e93 Mon Sep 17 00:00:00 2001 From: wizardengineer <[email protected]> Date: Wed, 5 Nov 2025 11:03:23 -0500 Subject: [PATCH] [ConstantTime][WebAssembly] Add comprehensive tests for ct.select --- .../ctselect-fallback-edge-cases.ll | 376 +++++++++ .../WebAssembly/ctselect-fallback-patterns.ll | 641 ++++++++++++++++ .../WebAssembly/ctselect-fallback-vector.ll | 714 ++++++++++++++++++ .../CodeGen/WebAssembly/ctselect-fallback.ll | 552 ++++++++++++++ .../WebAssembly/ctselect-side-effects.ll | 226 ++++++ 5 files changed, 2509 insertions(+) create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll new file mode 100644 index 0000000000000..b0f7f2807debd --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll @@ -0,0 +1,376 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W32 +; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W64 + +; Test with small integer types +define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) { +; W32-LABEL: test_ctselect_i1: +; W32: .functype test_ctselect_i1 (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_i1: +; W64: .functype test_ctselect_i1 (i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b) + ret i1 %result +} + +; Test with extremal values +define i32 @test_ctselect_extremal_values(i1 %cond) { +; W32-LABEL: test_ctselect_extremal_values: +; W32: .functype test_ctselect_extremal_values (i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: i32.const 2147483647 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: i32.const -2147483648 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_extremal_values: +; W64: .functype test_ctselect_extremal_values (i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 0 +; W64-NEXT: i32.sub +; W64-NEXT: i32.const 2147483647 +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: i32.const -2147483648 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648) + ret i32 %result +} + +; Test with null pointers +define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) { +; W32-LABEL: test_ctselect_null_ptr: +; W32: .functype test_ctselect_null_ptr (i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_null_ptr: +; W64: .functype test_ctselect_null_ptr (i32, i64) -> (i64) +; W64-NEXT: # %bb.0: +; W64-NEXT: i64.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i64.extend_i32_u +; W64-NEXT: i64.const 1 +; W64-NEXT: i64.and +; W64-NEXT: i64.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i64.and +; W64-NEXT: # fallthrough-return + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null) + ret ptr %result +} + +; Test with function pointers +define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) { +; W32-LABEL: test_ctselect_function_ptr: +; W32: .functype test_ctselect_function_ptr (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_function_ptr: +; W64: .functype test_ctselect_function_ptr (i32, i64, i64) -> (i64) +; W64-NEXT: .local i64 +; W64-NEXT: # %bb.0: +; W64-NEXT: i64.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i64.extend_i32_u +; W64-NEXT: i64.const 1 +; W64-NEXT: i64.and +; W64-NEXT: local.tee 3 +; W64-NEXT: i64.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i64.and +; W64-NEXT: local.get 3 +; W64-NEXT: i64.const -1 +; W64-NEXT: i64.add +; W64-NEXT: local.get 2 +; W64-NEXT: i64.and +; W64-NEXT: i64.or +; W64-NEXT: # fallthrough-return + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2) + ret ptr %result +} + +; Test with condition from icmp on pointers +define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) { +; W32-LABEL: test_ctselect_ptr_cmp: +; W32: .functype test_ctselect_ptr_cmp (i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.eq +; W32-NEXT: i32.select +; W32-NEXT: local.tee 1 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 3 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_ptr_cmp: +; W64: .functype test_ctselect_ptr_cmp (i64, i64, i64, i64) -> (i64) +; W64-NEXT: # %bb.0: +; W64-NEXT: i64.const -1 +; W64-NEXT: i64.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i64.eq +; W64-NEXT: i64.select +; W64-NEXT: local.tee 1 +; W64-NEXT: local.get 2 +; W64-NEXT: i64.and +; W64-NEXT: local.get 1 +; W64-NEXT: i64.const -1 +; W64-NEXT: i64.xor +; W64-NEXT: local.get 3 +; W64-NEXT: i64.and +; W64-NEXT: i64.or +; W64-NEXT: # fallthrough-return + %cmp = icmp eq ptr %p1, %p2 + %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b) + ret ptr %result +} + +; Test with struct pointer types +%struct.pair = type { i32, i32 } + +define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) { +; W32-LABEL: test_ctselect_struct_ptr: +; W32: .functype test_ctselect_struct_ptr (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_struct_ptr: +; W64: .functype test_ctselect_struct_ptr (i32, i64, i64) -> (i64) +; W64-NEXT: .local i64 +; W64-NEXT: # %bb.0: +; W64-NEXT: i64.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i64.extend_i32_u +; W64-NEXT: i64.const 1 +; W64-NEXT: i64.and +; W64-NEXT: local.tee 3 +; W64-NEXT: i64.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i64.and +; W64-NEXT: local.get 3 +; W64-NEXT: i64.const -1 +; W64-NEXT: i64.add +; W64-NEXT: local.get 2 +; W64-NEXT: i64.and +; W64-NEXT: i64.or +; W64-NEXT: # fallthrough-return + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) + ret ptr %result +} + +; Test with deeply nested conditions +define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +; W32-LABEL: test_ctselect_deeply_nested: +; W32: .functype test_ctselect_deeply_nested (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 3 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 3 +; W32-NEXT: i32.sub +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 2 +; W32-NEXT: i32.sub +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 1 +; W32-NEXT: i32.sub +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 4 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 5 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 6 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: i32.and +; W32-NEXT: local.get 2 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 7 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: i32.and +; W32-NEXT: local.get 3 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 8 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_deeply_nested: +; W64: .functype test_ctselect_deeply_nested (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 3 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 3 +; W64-NEXT: i32.sub +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 2 +; W64-NEXT: i32.sub +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 1 +; W64-NEXT: i32.sub +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 0 +; W64-NEXT: i32.sub +; W64-NEXT: local.get 4 +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 5 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 6 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: i32.and +; W64-NEXT: local.get 2 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 7 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: i32.and +; W64-NEXT: local.get 3 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 8 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b) + %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c) + %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d) + %sel4 = call i32 @llvm.ct.select.i32(i1 %c4, i32 %sel3, i32 %e) + ret i32 %sel4 +} + +; Declare the intrinsics +declare i1 @llvm.ct.select.i1(i1, i1, i1) +declare i32 @llvm.ct.select.i32(i1, i32, i32) +declare ptr @llvm.ct.select.p0(i1, ptr, ptr) diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll new file mode 100644 index 0000000000000..040ee44addb69 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll @@ -0,0 +1,641 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W32 +; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W64 + +; Test smin(x, 0) pattern +define i32 @test_ctselect_smin_zero(i32 %x) { +; W32-LABEL: test_ctselect_smin_zero: +; W32: .functype test_ctselect_smin_zero (i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 31 +; W32-NEXT: i32.shr_s +; W32-NEXT: local.get 0 +; W32-NEXT: i32.and +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_smin_zero: +; W64: .functype test_ctselect_smin_zero (i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 31 +; W64-NEXT: i32.shr_s +; W64-NEXT: local.get 0 +; W64-NEXT: i32.and +; W64-NEXT: # fallthrough-return + %cmp = icmp slt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0) + ret i32 %result +} + +; Test smax(x, 0) pattern +define i32 @test_ctselect_smax_zero(i32 %x) { +; W32-LABEL: test_ctselect_smax_zero: +; W32: .functype test_ctselect_smax_zero (i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 0 +; W32-NEXT: i32.gt_s +; W32-NEXT: i32.select +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_smax_zero: +; W64: .functype test_ctselect_smax_zero (i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 0 +; W64-NEXT: i32.gt_s +; W64-NEXT: i32.select +; W64-NEXT: # fallthrough-return + %cmp = icmp sgt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0) + ret i32 %result +} + +; Test generic smin pattern +define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) { +; W32-LABEL: test_ctselect_smin_generic: +; W32: .functype test_ctselect_smin_generic (i32, i32) -> (i32) +; W32-NEXT: .local i32 +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.lt_s +; W32-NEXT: i32.select +; W32-NEXT: local.tee 2 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.and +; W32-NEXT: local.get 2 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_smin_generic: +; W64: .functype test_ctselect_smin_generic (i32, i32) -> (i32) +; W64-NEXT: .local i32 +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.lt_s +; W64-NEXT: i32.select +; W64-NEXT: local.tee 2 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.and +; W64-NEXT: local.get 2 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cmp = icmp slt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) + ret i32 %result +} + +; Test generic smax pattern +define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) { +; W32-LABEL: test_ctselect_smax_generic: +; W32: .functype test_ctselect_smax_generic (i32, i32) -> (i32) +; W32-NEXT: .local i32 +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.gt_s +; W32-NEXT: i32.select +; W32-NEXT: local.tee 2 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.and +; W32-NEXT: local.get 2 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_smax_generic: +; W64: .functype test_ctselect_smax_generic (i32, i32) -> (i32) +; W64-NEXT: .local i32 +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.gt_s +; W64-NEXT: i32.select +; W64-NEXT: local.tee 2 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.and +; W64-NEXT: local.get 2 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cmp = icmp sgt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) + ret i32 %result +} + +; Test umin pattern +define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) { +; W32-LABEL: test_ctselect_umin_generic: +; W32: .functype test_ctselect_umin_generic (i32, i32) -> (i32) +; W32-NEXT: .local i32 +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.lt_u +; W32-NEXT: i32.select +; W32-NEXT: local.tee 2 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.and +; W32-NEXT: local.get 2 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_umin_generic: +; W64: .functype test_ctselect_umin_generic (i32, i32) -> (i32) +; W64-NEXT: .local i32 +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.lt_u +; W64-NEXT: i32.select +; W64-NEXT: local.tee 2 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.and +; W64-NEXT: local.get 2 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cmp = icmp ult i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) + ret i32 %result +} + +; Test umax pattern +define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) { +; W32-LABEL: test_ctselect_umax_generic: +; W32: .functype test_ctselect_umax_generic (i32, i32) -> (i32) +; W32-NEXT: .local i32 +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.gt_u +; W32-NEXT: i32.select +; W32-NEXT: local.tee 2 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.and +; W32-NEXT: local.get 2 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_umax_generic: +; W64: .functype test_ctselect_umax_generic (i32, i32) -> (i32) +; W64-NEXT: .local i32 +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.gt_u +; W64-NEXT: i32.select +; W64-NEXT: local.tee 2 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.and +; W64-NEXT: local.get 2 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cmp = icmp ugt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) + ret i32 %result +} + +; Test abs pattern +define i32 @test_ctselect_abs(i32 %x) { +; W32-LABEL: test_ctselect_abs: +; W32: .functype test_ctselect_abs (i32) -> (i32) +; W32-NEXT: .local i32 +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 31 +; W32-NEXT: i32.shr_s +; W32-NEXT: local.tee 1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.sub +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 0 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_abs: +; W64: .functype test_ctselect_abs (i32) -> (i32) +; W64-NEXT: .local i32 +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 31 +; W64-NEXT: i32.shr_s +; W64-NEXT: local.tee 1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.sub +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 0 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %neg = sub i32 0, %x + %cmp = icmp slt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x) + ret i32 %result +} + +; Test nabs pattern (negative abs) +define i32 @test_ctselect_nabs(i32 %x) { +; W32-LABEL: test_ctselect_nabs: +; W32: .functype test_ctselect_nabs (i32) -> (i32) +; W32-NEXT: .local i32 +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 31 +; W32-NEXT: i32.shr_s +; W32-NEXT: local.tee 1 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.sub +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_nabs: +; W64: .functype test_ctselect_nabs (i32) -> (i32) +; W64-NEXT: .local i32 +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 31 +; W64-NEXT: i32.shr_s +; W64-NEXT: local.tee 1 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.sub +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %neg = sub i32 0, %x + %cmp = icmp slt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg) + ret i32 %result +} + +; Test sign extension pattern +define i32 @test_ctselect_sign_extend(i32 %x) { +; W32-LABEL: test_ctselect_sign_extend: +; W32: .functype test_ctselect_sign_extend (i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 31 +; W32-NEXT: i32.shr_s +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_sign_extend: +; W64: .functype test_ctselect_sign_extend (i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 31 +; W64-NEXT: i32.shr_s +; W64-NEXT: # fallthrough-return + %cmp = icmp slt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0) + ret i32 %result +} + +; Test zero extension pattern +define i32 @test_ctselect_zero_extend(i32 %x) { +; W32-LABEL: test_ctselect_zero_extend: +; W32: .functype test_ctselect_zero_extend (i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 0 +; W32-NEXT: i32.ne +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_zero_extend: +; W64: .functype test_ctselect_zero_extend (i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 0 +; W64-NEXT: i32.ne +; W64-NEXT: # fallthrough-return + %cmp = icmp ne i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 1, i32 0) + ret i32 %result +} + +; Test constant folding with known condition +define i32 @test_ctselect_constant_folding_true(i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_constant_folding_true: +; W32: .functype test_ctselect_constant_folding_true (i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_constant_folding_true: +; W64: .functype test_ctselect_constant_folding_true (i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: # fallthrough-return + %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_constant_folding_false: +; W32: .functype test_ctselect_constant_folding_false (i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_constant_folding_false: +; W64: .functype test_ctselect_constant_folding_false (i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b) + ret i32 %result +} + +; Test with identical operands +define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) { +; W32-LABEL: test_ctselect_identical_operands: +; W32: .functype test_ctselect_identical_operands (i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_identical_operands: +; W64: .functype test_ctselect_identical_operands (i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 0 +; W64-NEXT: i32.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x) + ret i32 %result +} + +; Test with inverted condition +define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_inverted_condition: +; W32: .functype test_ctselect_inverted_condition (i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.ne +; W32-NEXT: i32.select +; W32-NEXT: local.tee 1 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 3 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_inverted_condition: +; W64: .functype test_ctselect_inverted_condition (i32, i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.ne +; W64-NEXT: i32.select +; W64-NEXT: local.tee 1 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 3 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cmp = icmp eq i32 %x, %y + %not_cmp = xor i1 %cmp, true + %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b) + ret i32 %result +} + +; Test chain of ct.select operations +define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) { +; W32-LABEL: test_ctselect_chain: +; W32: .functype test_ctselect_chain (i32, i32, i32, i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 2 +; W32-NEXT: i32.sub +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 1 +; W32-NEXT: i32.sub +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 3 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 4 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 5 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: i32.and +; W32-NEXT: local.get 2 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 6 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_chain: +; W64: .functype test_ctselect_chain (i32, i32, i32, i32, i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 2 +; W64-NEXT: i32.sub +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 1 +; W64-NEXT: i32.sub +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 0 +; W64-NEXT: i32.sub +; W64-NEXT: local.get 3 +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 4 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 5 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: i32.and +; W64-NEXT: local.get 2 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 6 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b) + %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c) + %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d) + ret i32 %sel3 +} + +; Test for 64-bit operations (supported on all 64-bit architectures) +define i64 @test_ctselect_i64_smin_zero(i64 %x) { +; W32-LABEL: test_ctselect_i64_smin_zero: +; W32: .functype test_ctselect_i64_smin_zero (i64) -> (i64) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: i64.const 63 +; W32-NEXT: i64.shr_s +; W32-NEXT: local.get 0 +; W32-NEXT: i64.and +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_i64_smin_zero: +; W64: .functype test_ctselect_i64_smin_zero (i64) -> (i64) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: i64.const 63 +; W64-NEXT: i64.shr_s +; W64-NEXT: local.get 0 +; W64-NEXT: i64.and +; W64-NEXT: # fallthrough-return + %cmp = icmp slt i64 %x, 0 + %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0) + ret i64 %result +} + +; Declare the intrinsics +declare i32 @llvm.ct.select.i32(i1, i32, i32) +declare i64 @llvm.ct.select.i64(i1, i64, i64) diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll new file mode 100644 index 0000000000000..75e38e1856a03 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll @@ -0,0 +1,714 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -mattr=+simd128 | FileCheck %s --check-prefix=WASM32 +; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -mattr=+simd128 | FileCheck %s --check-prefix=WASM64 + +; Test 32-bit integer vector (4 x i32 = 128-bit) +define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) { +; WASM32-LABEL: test_ctselect_v4i32: +; WASM32: .functype test_ctselect_v4i32 (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4i32: +; WASM64: .functype test_ctselect_v4i32 (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %result +} + +; Test 16-bit integer vector (8 x i16 = 128-bit) +define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) { +; WASM32-LABEL: test_ctselect_v8i16: +; WASM32: .functype test_ctselect_v8i16 (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i16x8.splat +; WASM32-NEXT: i32.const 15 +; WASM32-NEXT: i16x8.shl +; WASM32-NEXT: i32.const 15 +; WASM32-NEXT: i16x8.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v8i16: +; WASM64: .functype test_ctselect_v8i16 (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i16x8.splat +; WASM64-NEXT: i32.const 15 +; WASM64-NEXT: i16x8.shl +; WASM64-NEXT: i32.const 15 +; WASM64-NEXT: i16x8.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %result = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %result +} + +; Test byte vector (16 x i8 = 128-bit) +define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) { +; WASM32-LABEL: test_ctselect_v16i8: +; WASM32: .functype test_ctselect_v16i8 (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i8x16.splat +; WASM32-NEXT: i32.const 7 +; WASM32-NEXT: i8x16.shl +; WASM32-NEXT: i32.const 7 +; WASM32-NEXT: i8x16.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v16i8: +; WASM64: .functype test_ctselect_v16i8 (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i8x16.splat +; WASM64-NEXT: i32.const 7 +; WASM64-NEXT: i8x16.shl +; WASM64-NEXT: i32.const 7 +; WASM64-NEXT: i8x16.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %result +} + +; Test 64-bit integer vector (2 x i64 = 128-bit) +define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) { +; WASM32-LABEL: test_ctselect_v2i64: +; WASM32: .functype test_ctselect_v2i64 (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 63 +; WASM32-NEXT: i64x2.shl +; WASM32-NEXT: i32.const 63 +; WASM32-NEXT: i64x2.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v2i64: +; WASM64: .functype test_ctselect_v2i64 (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 63 +; WASM64-NEXT: i64x2.shl +; WASM64-NEXT: i32.const 63 +; WASM64-NEXT: i64x2.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %result = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %result +} + +; Test single-precision float vector (4 x float = 128-bit) +define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) { +; WASM32-LABEL: test_ctselect_v4f32: +; WASM32: .functype test_ctselect_v4f32 (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4f32: +; WASM64: .functype test_ctselect_v4f32 (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) + ret <4 x float> %result +} + +; Test double-precision float vector (2 x double = 128-bit) +define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) { +; WASM32-LABEL: test_ctselect_v2f64: +; WASM32: .functype test_ctselect_v2f64 (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 63 +; WASM32-NEXT: i64x2.shl +; WASM32-NEXT: i32.const 63 +; WASM32-NEXT: i64x2.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v2f64: +; WASM64: .functype test_ctselect_v2f64 (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 63 +; WASM64-NEXT: i64x2.shl +; WASM64-NEXT: i32.const 63 +; WASM64-NEXT: i64x2.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %result = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) + ret <2 x double> %result +} + +; Test with aligned loads (common case) +define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) { +; WASM32-LABEL: test_ctselect_v4i32_aligned_load: +; WASM32: .functype test_ctselect_v4i32_aligned_load (i32, i32, i32) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.load 0 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: v128.load 0 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4i32_aligned_load: +; WASM64: .functype test_ctselect_v4i32_aligned_load (i32, i64, i64) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.load 0 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: v128.load 0 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %a = load <4 x i32>, ptr %p1, align 16 + %b = load <4 x i32>, ptr %p2, align 16 + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %result +} + +; Test with unaligned loads (stress test) +define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2) { +; WASM32-LABEL: test_ctselect_v4i32_unaligned_load: +; WASM32: .functype test_ctselect_v4i32_unaligned_load (i32, i32, i32) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.load 0:p2align=2 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: v128.load 0:p2align=2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4i32_unaligned_load: +; WASM64: .functype test_ctselect_v4i32_unaligned_load (i32, i64, i64) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.load 0:p2align=2 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: v128.load 0:p2align=2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %a = load <4 x i32>, ptr %p1, align 4 + %b = load <4 x i32>, ptr %p2, align 4 + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %result +} + +; Test with stores to verify result handling +define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) { +; WASM32-LABEL: test_ctselect_v4i32_store: +; WASM32: .functype test_ctselect_v4i32_store (i32, v128, v128, i32) -> () +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 4 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 4 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: v128.store 0 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4i32_store: +; WASM64: .functype test_ctselect_v4i32_store (i32, v128, v128, i64) -> () +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 4 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 4 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: v128.store 0 +; WASM64-NEXT: # fallthrough-return + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + store <4 x i32> %result, ptr %out, align 16 + ret void +} + +; Test chained selects (multiple conditions) +define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; WASM32-LABEL: test_ctselect_v4i32_chain: +; WASM32: .functype test_ctselect_v4i32_chain (i32, i32, v128, v128, v128) -> (v128) +; WASM32-NEXT: .local v128, v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 5 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 6 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: local.get 6 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 4 +; WASM32-NEXT: local.get 5 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4i32_chain: +; WASM64: .functype test_ctselect_v4i32_chain (i32, i32, v128, v128, v128) -> (v128) +; WASM64-NEXT: .local v128, v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 5 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 6 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: local.get 6 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 4 +; WASM64-NEXT: local.get 5 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %tmp = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond1, <4 x i32> %a, <4 x i32> %b) + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond2, <4 x i32> %tmp, <4 x i32> %c) + ret <4 x i32> %result +} + +; Test with arithmetic operations (ensure float vectors work with FP ops) +define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 x float> %y) { +; WASM32-LABEL: test_ctselect_v4f32_arithmetic: +; WASM32: .functype test_ctselect_v4f32_arithmetic (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: f32x4.add +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: f32x4.sub +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4f32_arithmetic: +; WASM64: .functype test_ctselect_v4f32_arithmetic (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: f32x4.add +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: f32x4.sub +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %sum = fadd <4 x float> %x, %y + %diff = fsub <4 x float> %x, %y + %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %sum, <4 x float> %diff) + ret <4 x float> %result +} + +; Test with zero vectors +define <4 x i32> @test_ctselect_v4i32_zeros(i1 %cond, <4 x i32> %a) { +; WASM32-LABEL: test_ctselect_v4i32_zeros: +; WASM32: .functype test_ctselect_v4i32_zeros (i32, v128) -> (v128) +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4i32_zeros: +; WASM64: .functype test_ctselect_v4i32_zeros (i32, v128) -> (v128) +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: # fallthrough-return + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, + <4 x i32> %a, + <4 x i32> zeroinitializer) + ret <4 x i32> %result +} + +; Test with function arguments directly (no loads) +define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind { +; WASM32-LABEL: test_ctselect_v4i32_args: +; WASM32: .functype test_ctselect_v4i32_args (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4i32_args: +; WASM64: .functype test_ctselect_v4i32_args (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %result +} + +; Test with multiple uses of result +define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) { +; WASM32-LABEL: test_ctselect_v4i32_multi_use: +; WASM32: .functype test_ctselect_v4i32_multi_use (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32x4.splat +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shl +; WASM32-NEXT: i32.const 31 +; WASM32-NEXT: i32x4.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: local.tee 1 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32x4.add +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v4i32_multi_use: +; WASM64: .functype test_ctselect_v4i32_multi_use (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32x4.splat +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shl +; WASM64-NEXT: i32.const 31 +; WASM64-NEXT: i32x4.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: local.tee 1 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i32x4.add +; WASM64-NEXT: # fallthrough-return + %sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + %add = add <4 x i32> %sel, %sel ; Use result twice + ret <4 x i32> %add +} + +; Test byte vector with operations +define <16 x i8> @test_ctselect_v16i8_ops(i1 %cond, <16 x i8> %x, <16 x i8> %y) { +; WASM32-LABEL: test_ctselect_v16i8_ops: +; WASM32: .functype test_ctselect_v16i8_ops (i32, v128, v128) -> (v128) +; WASM32-NEXT: .local v128 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i8x16.splat +; WASM32-NEXT: i32.const 7 +; WASM32-NEXT: i8x16.shl +; WASM32-NEXT: i32.const 7 +; WASM32-NEXT: i8x16.shr_s +; WASM32-NEXT: local.tee 3 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: v128.xor +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: v128.and +; WASM32-NEXT: local.get 3 +; WASM32-NEXT: v128.andnot +; WASM32-NEXT: v128.or +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: test_ctselect_v16i8_ops: +; WASM64: .functype test_ctselect_v16i8_ops (i32, v128, v128) -> (v128) +; WASM64-NEXT: .local v128 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i8x16.splat +; WASM64-NEXT: i32.const 7 +; WASM64-NEXT: i8x16.shl +; WASM64-NEXT: i32.const 7 +; WASM64-NEXT: i8x16.shr_s +; WASM64-NEXT: local.tee 3 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: v128.xor +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: v128.and +; WASM64-NEXT: local.get 3 +; WASM64-NEXT: v128.andnot +; WASM64-NEXT: v128.or +; WASM64-NEXT: # fallthrough-return + %xor = xor <16 x i8> %x, %y + %and = and <16 x i8> %x, %y + %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %xor, <16 x i8> %and) + ret <16 x i8> %result +} + +declare <4 x i32> @llvm.ct.select.v4i32(i1, <4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.ct.select.v8i16(i1, <8 x i16>, <8 x i16>) +declare <16 x i8> @llvm.ct.select.v16i8(i1, <16 x i8>, <16 x i8>) +declare <2 x i64> @llvm.ct.select.v2i64(i1, <2 x i64>, <2 x i64>) +declare <4 x float> @llvm.ct.select.v4f32(i1, <4 x float>, <4 x float>) +declare <2 x double> @llvm.ct.select.v2f64(i1, <2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll new file mode 100644 index 0000000000000..bd318960536df --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll @@ -0,0 +1,552 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W32 +; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W64 + +; Test basic ct.select functionality for scalar types +define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) { +; W32-LABEL: test_ctselect_i8: +; W32: .functype test_ctselect_i8 (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 1 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.xor +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.sub +; W32-NEXT: i32.and +; W32-NEXT: local.get 2 +; W32-NEXT: i32.xor +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_i8: +; W64: .functype test_ctselect_i8 (i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 1 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.xor +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.sub +; W64-NEXT: i32.and +; W64-NEXT: local.get 2 +; W64-NEXT: i32.xor +; W64-NEXT: # fallthrough-return + %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b) + ret i8 %result +} + +define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) { +; W32-LABEL: test_ctselect_i16: +; W32: .functype test_ctselect_i16 (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 1 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.xor +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.sub +; W32-NEXT: i32.and +; W32-NEXT: local.get 2 +; W32-NEXT: i32.xor +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_i16: +; W64: .functype test_ctselect_i16 (i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 1 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.xor +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.sub +; W64-NEXT: i32.and +; W64-NEXT: local.get 2 +; W64-NEXT: i32.xor +; W64-NEXT: # fallthrough-return + %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b) + ret i16 %result +} + +define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_i32: +; W32: .functype test_ctselect_i32 (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_i32: +; W64: .functype test_ctselect_i32 (i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 0 +; W64-NEXT: i32.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) { +; W32-LABEL: test_ctselect_i64: +; W32: .functype test_ctselect_i64 (i32, i64, i64) -> (i64) +; W32-NEXT: .local i64 +; W32-NEXT: # %bb.0: +; W32-NEXT: i64.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i64.extend_i32_u +; W32-NEXT: i64.const 1 +; W32-NEXT: i64.and +; W32-NEXT: local.tee 3 +; W32-NEXT: i64.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i64.and +; W32-NEXT: local.get 3 +; W32-NEXT: i64.const -1 +; W32-NEXT: i64.add +; W32-NEXT: local.get 2 +; W32-NEXT: i64.and +; W32-NEXT: i64.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_i64: +; W64: .functype test_ctselect_i64 (i32, i64, i64) -> (i64) +; W64-NEXT: .local i64 +; W64-NEXT: # %bb.0: +; W64-NEXT: i64.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i64.extend_i32_u +; W64-NEXT: i64.const 1 +; W64-NEXT: i64.and +; W64-NEXT: local.tee 3 +; W64-NEXT: i64.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i64.and +; W64-NEXT: local.get 3 +; W64-NEXT: i64.const -1 +; W64-NEXT: i64.add +; W64-NEXT: local.get 2 +; W64-NEXT: i64.and +; W64-NEXT: i64.or +; W64-NEXT: # fallthrough-return + %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b) + ret i64 %result +} + +define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) { +; W32-LABEL: test_ctselect_ptr: +; W32: .functype test_ctselect_ptr (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_ptr: +; W64: .functype test_ctselect_ptr (i32, i64, i64) -> (i64) +; W64-NEXT: .local i64 +; W64-NEXT: # %bb.0: +; W64-NEXT: i64.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i64.extend_i32_u +; W64-NEXT: i64.const 1 +; W64-NEXT: i64.and +; W64-NEXT: local.tee 3 +; W64-NEXT: i64.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i64.and +; W64-NEXT: local.get 3 +; W64-NEXT: i64.const -1 +; W64-NEXT: i64.add +; W64-NEXT: local.get 2 +; W64-NEXT: i64.and +; W64-NEXT: i64.or +; W64-NEXT: # fallthrough-return + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) + ret ptr %result +} + +; Test with constant conditions +define i32 @test_ctselect_const_true(i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_const_true: +; W32: .functype test_ctselect_const_true (i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_const_true: +; W64: .functype test_ctselect_const_true (i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: # fallthrough-return + %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_const_false(i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_const_false: +; W32: .functype test_ctselect_const_false (i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_const_false: +; W64: .functype test_ctselect_const_false (i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b) + ret i32 %result +} + +; Test with comparison conditions +define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_icmp_eq: +; W32: .functype test_ctselect_icmp_eq (i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.eq +; W32-NEXT: i32.select +; W32-NEXT: local.tee 1 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 3 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_icmp_eq: +; W64: .functype test_ctselect_icmp_eq (i32, i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.eq +; W64-NEXT: i32.select +; W64-NEXT: local.tee 1 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 3 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cond = icmp eq i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_icmp_ne: +; W32: .functype test_ctselect_icmp_ne (i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.ne +; W32-NEXT: i32.select +; W32-NEXT: local.tee 1 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 3 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_icmp_ne: +; W64: .functype test_ctselect_icmp_ne (i32, i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.ne +; W64-NEXT: i32.select +; W64-NEXT: local.tee 1 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 3 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cond = icmp ne i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_icmp_slt: +; W32: .functype test_ctselect_icmp_slt (i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.lt_s +; W32-NEXT: i32.select +; W32-NEXT: local.tee 1 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 3 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_icmp_slt: +; W64: .functype test_ctselect_icmp_slt (i32, i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.lt_s +; W64-NEXT: i32.select +; W64-NEXT: local.tee 1 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 3 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cond = icmp slt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) { +; W32-LABEL: test_ctselect_icmp_ult: +; W32: .functype test_ctselect_icmp_ult (i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.lt_u +; W32-NEXT: i32.select +; W32-NEXT: local.tee 1 +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.xor +; W32-NEXT: local.get 3 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_icmp_ult: +; W64: .functype test_ctselect_icmp_ult (i32, i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.lt_u +; W64-NEXT: i32.select +; W64-NEXT: local.tee 1 +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.xor +; W64-NEXT: local.get 3 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %cond = icmp ult i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test with memory operands +define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) { +; W32-LABEL: test_ctselect_load: +; W32: .functype test_ctselect_load (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i32.load 0 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 2 +; W32-NEXT: i32.load 0 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_load: +; W64: .functype test_ctselect_load (i32, i64, i64) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 0 +; W64-NEXT: i32.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i32.load 0 +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 2 +; W64-NEXT: i32.load 0 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %a = load i32, ptr %p1 + %b = load i32, ptr %p2 + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test nested ctselect calls +define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) { +; W32-LABEL: test_ctselect_nested: +; W32: .functype test_ctselect_nested (i32, i32, i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 1 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: local.get 1 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 3 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 4 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_ctselect_nested: +; W64: .functype test_ctselect_nested (i32, i32, i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 0 +; W64-NEXT: i32.sub +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 1 +; W64-NEXT: i32.sub +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: local.get 1 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 3 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 4 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b) + %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c) + ret i32 %result +} + +; Declare the intrinsics +declare i8 @llvm.ct.select.i8(i1, i8, i8) +declare i16 @llvm.ct.select.i16(i1, i16, i16) +declare i32 @llvm.ct.select.i32(i1, i32, i32) +declare i64 @llvm.ct.select.i64(i1, i64, i64) +declare ptr @llvm.ct.select.p0(i1, ptr, ptr) diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll b/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll new file mode 100644 index 0000000000000..5b20e892c64d2 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W32 +; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W64 + +; Test 1: Basic optimizations should still work +define i32 @test_basic_opts(i32 %x) { +; W32-LABEL: test_basic_opts: +; W32: .functype test_basic_opts (i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_basic_opts: +; W64: .functype test_basic_opts (i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: # fallthrough-return + %a = or i32 %x, 0 ; Should eliminate + %b = and i32 %a, -1 ; Should eliminate + %c = xor i32 %b, 0 ; Should eliminate + ret i32 %c +} + +; Test 2: Constant folding should work +define i32 @test_constant_fold() { +; W32-LABEL: test_constant_fold: +; W32: .functype test_constant_fold () -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_constant_fold: +; W64: .functype test_constant_fold () -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: # fallthrough-return + %a = xor i32 -1, -1 ; Should fold to 0 + ret i32 %a +} + +; Test 3: Protected pattern should NOT have branches +define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) { +; W32-LABEL: test_protected_no_branch: +; W32: .functype test_protected_no_branch (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: local.tee 0 +; W32-NEXT: i32.sub +; W32-NEXT: local.get 1 +; W32-NEXT: i32.and +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const -1 +; W32-NEXT: i32.add +; W32-NEXT: local.get 2 +; W32-NEXT: i32.and +; W32-NEXT: i32.or +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_protected_no_branch: +; W64: .functype test_protected_no_branch (i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: local.tee 0 +; W64-NEXT: i32.sub +; W64-NEXT: local.get 1 +; W64-NEXT: i32.and +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const -1 +; W64-NEXT: i32.add +; W64-NEXT: local.get 2 +; W64-NEXT: i32.and +; W64-NEXT: i32.or +; W64-NEXT: # fallthrough-return + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test 4: Explicit branch should still generate branches +define i32 @test_explicit_branch(i1 %cond, i32 %a, i32 %b) { +; W32-LABEL: test_explicit_branch: +; W32: .functype test_explicit_branch (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: block +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.eqz +; W32-NEXT: br_if 0 # 0: down to label0 +; W32-NEXT: # %bb.1: # %true +; W32-NEXT: local.get 1 +; W32-NEXT: return +; W32-NEXT: .LBB3_2: # %false +; W32-NEXT: end_block # label0: +; W32-NEXT: local.get 2 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_explicit_branch: +; W64: .functype test_explicit_branch (i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: block +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.eqz +; W64-NEXT: br_if 0 # 0: down to label0 +; W64-NEXT: # %bb.1: # %true +; W64-NEXT: local.get 1 +; W64-NEXT: return +; W64-NEXT: .LBB3_2: # %false +; W64-NEXT: end_block # label0: +; W64-NEXT: local.get 2 +; W64-NEXT: # fallthrough-return + br i1 %cond, label %true, label %false +true: + ret i32 %a +false: + ret i32 %b +} + +; Test 5: Regular select (not ct.select) - whatever wasm wants to do +define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) { +; W32-LABEL: test_regular_select: +; W32: .functype test_regular_select (i32, i32, i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 1 +; W32-NEXT: local.get 2 +; W32-NEXT: local.get 0 +; W32-NEXT: i32.const 1 +; W32-NEXT: i32.and +; W32-NEXT: i32.select +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_regular_select: +; W64: .functype test_regular_select (i32, i32, i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 1 +; W64-NEXT: local.get 2 +; W64-NEXT: local.get 0 +; W64-NEXT: i32.const 1 +; W64-NEXT: i32.and +; W64-NEXT: i32.select +; W64-NEXT: # fallthrough-return + %result = select i1 %cond, i32 %a, i32 %b + ret i32 %result +} + +; Test if XOR with all-ones still gets optimized +define i32 @test_xor_all_ones() { +; W32-LABEL: test_xor_all_ones: +; W32: .functype test_xor_all_ones () -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_xor_all_ones: +; W64: .functype test_xor_all_ones () -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: # fallthrough-return + %xor1 = xor i32 -1, -1 ; Should optimize to 0 + ret i32 %xor1 +} + +define i32 @test_xor_same_value(i32 %x) { +; W32-LABEL: test_xor_same_value: +; W32: .functype test_xor_same_value (i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_xor_same_value: +; W64: .functype test_xor_same_value (i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: # fallthrough-return + %xor2 = xor i32 %x, %x ; Should optimize to 0 + ret i32 %xor2 +} + +define i32 @test_normal_ops(i32 %x) { +; W32-LABEL: test_normal_ops: +; W32: .functype test_normal_ops (i32) -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: local.get 0 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_normal_ops: +; W64: .functype test_normal_ops (i32) -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: local.get 0 +; W64-NEXT: # fallthrough-return + %or1 = or i32 %x, 0 + %and1 = and i32 %or1, -1 + %xor1 = xor i32 %and1, 0 + ret i32 %xor1 +} + +; This simulates what the reviewer is worried about +define i32 @test_xor_with_const_operands() { +; W32-LABEL: test_xor_with_const_operands: +; W32: .functype test_xor_with_const_operands () -> (i32) +; W32-NEXT: # %bb.0: +; W32-NEXT: i32.const 0 +; W32-NEXT: # fallthrough-return +; +; W64-LABEL: test_xor_with_const_operands: +; W64: .functype test_xor_with_const_operands () -> (i32) +; W64-NEXT: # %bb.0: +; W64-NEXT: i32.const 0 +; W64-NEXT: # fallthrough-return + %a = xor i32 -1, -1 + %b = xor i32 0, 0 + %c = xor i32 42, 42 + %result = or i32 %a, %b + %final = or i32 %result, %c + ret i32 %final ; Should optimize to 0 +} + +declare i32 @llvm.ct.select.i32(i1, i32, i32) + _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
