https://github.com/wizardengineer created 
https://github.com/llvm/llvm-project/pull/166709

None

>From d55c2d69a0d48538f7376d9f06b8cbf0e2215e93 Mon Sep 17 00:00:00 2001
From: wizardengineer <[email protected]>
Date: Wed, 5 Nov 2025 11:03:23 -0500
Subject: [PATCH] [ConstantTime][WebAssembly] Add comprehensive tests for
 ct.select

---
 .../ctselect-fallback-edge-cases.ll           | 376 +++++++++
 .../WebAssembly/ctselect-fallback-patterns.ll | 641 ++++++++++++++++
 .../WebAssembly/ctselect-fallback-vector.ll   | 714 ++++++++++++++++++
 .../CodeGen/WebAssembly/ctselect-fallback.ll  | 552 ++++++++++++++
 .../WebAssembly/ctselect-side-effects.ll      | 226 ++++++
 5 files changed, 2509 insertions(+)
 create mode 100644 
llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
 create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
 create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
 create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
 create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll

diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll 
b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
new file mode 100644
index 0000000000000..b0f7f2807debd
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
@@ -0,0 +1,376 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck 
%s --check-prefix=W32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck 
%s --check-prefix=W64
+
+; Test with small integer types
+define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
+; W32-LABEL: test_ctselect_i1:
+; W32:         .functype test_ctselect_i1 (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i1:
+; W64:         .functype test_ctselect_i1 (i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
+  ret i1 %result
+}
+
+; Test with extremal values
+define i32 @test_ctselect_extremal_values(i1 %cond) {
+; W32-LABEL: test_ctselect_extremal_values:
+; W32:         .functype test_ctselect_extremal_values (i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.const 2147483647
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    i32.const -2147483648
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_extremal_values:
+; W64:         .functype test_ctselect_extremal_values (i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.const 2147483647
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    i32.const -2147483648
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 
-2147483648)
+  ret i32 %result
+}
+
+; Test with null pointers
+define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
+; W32-LABEL: test_ctselect_null_ptr:
+; W32:         .functype test_ctselect_null_ptr (i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_null_ptr:
+; W64:         .functype test_ctselect_null_ptr (i32, i64) -> (i64)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i64.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i64.extend_i32_u
+; W64-NEXT:    i64.const 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    i64.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    # fallthrough-return
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
+  ret ptr %result
+}
+
+; Test with function pointers
+define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
+; W32-LABEL: test_ctselect_function_ptr:
+; W32:         .functype test_ctselect_function_ptr (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_function_ptr:
+; W64:         .functype test_ctselect_function_ptr (i32, i64, i64) -> (i64)
+; W64-NEXT:    .local i64
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i64.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i64.extend_i32_u
+; W64-NEXT:    i64.const 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.tee 3
+; W64-NEXT:    i64.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i64.const -1
+; W64-NEXT:    i64.add
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i64.and
+; W64-NEXT:    i64.or
+; W64-NEXT:    # fallthrough-return
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
+  ret ptr %result
+}
+
+; Test with condition from icmp on pointers
+define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
+; W32-LABEL: test_ctselect_ptr_cmp:
+; W32:         .functype test_ctselect_ptr_cmp (i32, i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.eq
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_ptr_cmp:
+; W64:         .functype test_ctselect_ptr_cmp (i64, i64, i64, i64) -> (i64)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i64.const -1
+; W64-NEXT:    i64.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i64.eq
+; W64-NEXT:    i64.select
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i64.const -1
+; W64-NEXT:    i64.xor
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i64.and
+; W64-NEXT:    i64.or
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp eq ptr %p1, %p2
+  %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with struct pointer types
+%struct.pair = type { i32, i32 }
+
+define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
+; W32-LABEL: test_ctselect_struct_ptr:
+; W32:         .functype test_ctselect_struct_ptr (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_struct_ptr:
+; W64:         .functype test_ctselect_struct_ptr (i32, i64, i64) -> (i64)
+; W64-NEXT:    .local i64
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i64.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i64.extend_i32_u
+; W64-NEXT:    i64.const 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.tee 3
+; W64-NEXT:    i64.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i64.const -1
+; W64-NEXT:    i64.add
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i64.and
+; W64-NEXT:    i64.or
+; W64-NEXT:    # fallthrough-return
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with deeply nested conditions
+define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 
%a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; W32-LABEL: test_ctselect_deeply_nested:
+; W32:         .functype test_ctselect_deeply_nested (i32, i32, i32, i32, i32, 
i32, i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 3
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 2
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 4
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 5
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 6
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 7
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 8
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_deeply_nested:
+; W64:         .functype test_ctselect_deeply_nested (i32, i32, i32, i32, i32, 
i32, i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 3
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 2
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    local.get 4
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 5
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 6
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 7
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 8
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+  %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+  %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+  %sel4 = call i32 @llvm.ct.select.i32(i1 %c4, i32 %sel3, i32 %e)
+  ret i32 %sel4
+}
+
+; Declare the intrinsics
+declare i1 @llvm.ct.select.i1(i1, i1, i1)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll 
b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
new file mode 100644
index 0000000000000..040ee44addb69
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
@@ -0,0 +1,641 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck 
%s --check-prefix=W32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck 
%s --check-prefix=W64
+
+; Test smin(x, 0) pattern
+define i32 @test_ctselect_smin_zero(i32 %x) {
+; W32-LABEL: test_ctselect_smin_zero:
+; W32:         .functype test_ctselect_smin_zero (i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 31
+; W32-NEXT:    i32.shr_s
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_smin_zero:
+; W64:         .functype test_ctselect_smin_zero (i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 31
+; W64-NEXT:    i32.shr_s
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+  ret i32 %result
+}
+
+; Test smax(x, 0) pattern
+define i32 @test_ctselect_smax_zero(i32 %x) {
+; W32-LABEL: test_ctselect_smax_zero:
+; W32:         .functype test_ctselect_smax_zero (i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    i32.gt_s
+; W32-NEXT:    i32.select
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_smax_zero:
+; W64:         .functype test_ctselect_smax_zero (i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    i32.gt_s
+; W64-NEXT:    i32.select
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp sgt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+  ret i32 %result
+}
+
+; Test generic smin pattern
+define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
+; W32-LABEL: test_ctselect_smin_generic:
+; W32:         .functype test_ctselect_smin_generic (i32, i32) -> (i32)
+; W32-NEXT:    .local i32
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.lt_s
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 2
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_smin_generic:
+; W64:         .functype test_ctselect_smin_generic (i32, i32) -> (i32)
+; W64-NEXT:    .local i32
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.lt_s
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 2
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp slt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test generic smax pattern
+define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
+; W32-LABEL: test_ctselect_smax_generic:
+; W32:         .functype test_ctselect_smax_generic (i32, i32) -> (i32)
+; W32-NEXT:    .local i32
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.gt_s
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 2
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_smax_generic:
+; W64:         .functype test_ctselect_smax_generic (i32, i32) -> (i32)
+; W64-NEXT:    .local i32
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.gt_s
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 2
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp sgt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test umin pattern
+define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
+; W32-LABEL: test_ctselect_umin_generic:
+; W32:         .functype test_ctselect_umin_generic (i32, i32) -> (i32)
+; W32-NEXT:    .local i32
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.lt_u
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 2
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_umin_generic:
+; W64:         .functype test_ctselect_umin_generic (i32, i32) -> (i32)
+; W64-NEXT:    .local i32
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.lt_u
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 2
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp ult i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test umax pattern
+define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
+; W32-LABEL: test_ctselect_umax_generic:
+; W32:         .functype test_ctselect_umax_generic (i32, i32) -> (i32)
+; W32-NEXT:    .local i32
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.gt_u
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 2
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_umax_generic:
+; W64:         .functype test_ctselect_umax_generic (i32, i32) -> (i32)
+; W64-NEXT:    .local i32
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.gt_u
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 2
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp ugt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test abs pattern
+define i32 @test_ctselect_abs(i32 %x) {
+; W32-LABEL: test_ctselect_abs:
+; W32:         .functype test_ctselect_abs (i32) -> (i32)
+; W32-NEXT:    .local i32
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 31
+; W32-NEXT:    i32.shr_s
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_abs:
+; W64:         .functype test_ctselect_abs (i32) -> (i32)
+; W64-NEXT:    .local i32
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 31
+; W64-NEXT:    i32.shr_s
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %neg = sub i32 0, %x
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x)
+  ret i32 %result
+}
+
+; Test nabs pattern (negative abs)
+define i32 @test_ctselect_nabs(i32 %x) {
+; W32-LABEL: test_ctselect_nabs:
+; W32:         .functype test_ctselect_nabs (i32) -> (i32)
+; W32-NEXT:    .local i32
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 31
+; W32-NEXT:    i32.shr_s
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_nabs:
+; W64:         .functype test_ctselect_nabs (i32) -> (i32)
+; W64-NEXT:    .local i32
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 31
+; W64-NEXT:    i32.shr_s
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %neg = sub i32 0, %x
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg)
+  ret i32 %result
+}
+
+; Test sign extension pattern
+define i32 @test_ctselect_sign_extend(i32 %x) {
+; W32-LABEL: test_ctselect_sign_extend:
+; W32:         .functype test_ctselect_sign_extend (i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 31
+; W32-NEXT:    i32.shr_s
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_sign_extend:
+; W64:         .functype test_ctselect_sign_extend (i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 31
+; W64-NEXT:    i32.shr_s
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0)
+  ret i32 %result
+}
+
+; Test zero extension pattern
+define i32 @test_ctselect_zero_extend(i32 %x) {
+; W32-LABEL: test_ctselect_zero_extend:
+; W32:         .functype test_ctselect_zero_extend (i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    i32.ne
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_zero_extend:
+; W64:         .functype test_ctselect_zero_extend (i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    i32.ne
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp ne i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 1, i32 0)
+  ret i32 %result
+}
+
+; Test constant folding with known condition
+define i32 @test_ctselect_constant_folding_true(i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_constant_folding_true:
+; W32:         .functype test_ctselect_constant_folding_true (i32, i32) -> 
(i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_constant_folding_true:
+; W64:         .functype test_ctselect_constant_folding_true (i32, i32) -> 
(i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    # fallthrough-return
+  %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_constant_folding_false:
+; W32:         .functype test_ctselect_constant_folding_false (i32, i32) -> 
(i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_constant_folding_false:
+; W64:         .functype test_ctselect_constant_folding_false (i32, i32) -> 
(i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with identical operands
+define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
+; W32-LABEL: test_ctselect_identical_operands:
+; W32:         .functype test_ctselect_identical_operands (i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_identical_operands:
+; W64:         .functype test_ctselect_identical_operands (i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
+  ret i32 %result
+}
+
+; Test with inverted condition
+define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_inverted_condition:
+; W32:         .functype test_ctselect_inverted_condition (i32, i32, i32, i32) 
-> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.ne
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_inverted_condition:
+; W64:         .functype test_ctselect_inverted_condition (i32, i32, i32, i32) 
-> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.ne
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp eq i32 %x, %y
+  %not_cmp = xor i1 %cmp, true
+  %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test chain of ct.select operations
+define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 
%c, i32 %d) {
+; W32-LABEL: test_ctselect_chain:
+; W32:         .functype test_ctselect_chain (i32, i32, i32, i32, i32, i32, 
i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 2
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 4
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 5
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 6
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_chain:
+; W64:         .functype test_ctselect_chain (i32, i32, i32, i32, i32, i32, 
i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 2
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 4
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 5
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 6
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+  %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+  %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+  ret i32 %sel3
+}
+
+; Test for 64-bit operations (supported on all 64-bit architectures)
+define i64 @test_ctselect_i64_smin_zero(i64 %x) {
+; W32-LABEL: test_ctselect_i64_smin_zero:
+; W32:         .functype test_ctselect_i64_smin_zero (i64) -> (i64)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i64.const 63
+; W32-NEXT:    i64.shr_s
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i64.and
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i64_smin_zero:
+; W64:         .functype test_ctselect_i64_smin_zero (i64) -> (i64)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i64.const 63
+; W64-NEXT:    i64.shr_s
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i64.and
+; W64-NEXT:    # fallthrough-return
+  %cmp = icmp slt i64 %x, 0
+  %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
+  ret i64 %result
+}
+
+; Declare the intrinsics
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll 
b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
new file mode 100644
index 0000000000000..75e38e1856a03
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
@@ -0,0 +1,714 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -mattr=+simd128 | 
FileCheck %s --check-prefix=WASM32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -mattr=+simd128 | 
FileCheck %s --check-prefix=WASM64
+
+; Test 32-bit integer vector (4 x i32 = 128-bit)
+define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
+; WASM32-LABEL: test_ctselect_v4i32:
+; WASM32:         .functype test_ctselect_v4i32 (i32, v128, v128) -> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32:
+; WASM64:         .functype test_ctselect_v4i32 (i32, v128, v128) -> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x 
i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test 16-bit integer vector (8 x i16 = 128-bit)
+define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
+; WASM32-LABEL: test_ctselect_v8i16:
+; WASM32:         .functype test_ctselect_v8i16 (i32, v128, v128) -> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i16x8.splat
+; WASM32-NEXT:    i32.const 15
+; WASM32-NEXT:    i16x8.shl
+; WASM32-NEXT:    i32.const 15
+; WASM32-NEXT:    i16x8.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v8i16:
+; WASM64:         .functype test_ctselect_v8i16 (i32, v128, v128) -> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i16x8.splat
+; WASM64-NEXT:    i32.const 15
+; WASM64-NEXT:    i16x8.shl
+; WASM64-NEXT:    i32.const 15
+; WASM64-NEXT:    i16x8.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x 
i16> %b)
+  ret <8 x i16> %result
+}
+
+; Test byte vector (16 x i8 = 128-bit)
+define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
+; WASM32-LABEL: test_ctselect_v16i8:
+; WASM32:         .functype test_ctselect_v16i8 (i32, v128, v128) -> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i8x16.splat
+; WASM32-NEXT:    i32.const 7
+; WASM32-NEXT:    i8x16.shl
+; WASM32-NEXT:    i32.const 7
+; WASM32-NEXT:    i8x16.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v16i8:
+; WASM64:         .functype test_ctselect_v16i8 (i32, v128, v128) -> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i8x16.splat
+; WASM64-NEXT:    i32.const 7
+; WASM64-NEXT:    i8x16.shl
+; WASM64-NEXT:    i32.const 7
+; WASM64-NEXT:    i8x16.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x 
i8> %b)
+  ret <16 x i8> %result
+}
+
+; Test 64-bit integer vector (2 x i64 = 128-bit)
+define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
+; WASM32-LABEL: test_ctselect_v2i64:
+; WASM32:         .functype test_ctselect_v2i64 (i32, v128, v128) -> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 63
+; WASM32-NEXT:    i64x2.shl
+; WASM32-NEXT:    i32.const 63
+; WASM32-NEXT:    i64x2.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v2i64:
+; WASM64:         .functype test_ctselect_v2i64 (i32, v128, v128) -> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 63
+; WASM64-NEXT:    i64x2.shl
+; WASM64-NEXT:    i32.const 63
+; WASM64-NEXT:    i64x2.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x 
i64> %b)
+  ret <2 x i64> %result
+}
+
+; Test single-precision float vector (4 x float = 128-bit)
+define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> 
%b) {
+; WASM32-LABEL: test_ctselect_v4f32:
+; WASM32:         .functype test_ctselect_v4f32 (i32, v128, v128) -> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4f32:
+; WASM64:         .functype test_ctselect_v4f32 (i32, v128, v128) -> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, 
<4 x float> %b)
+  ret <4 x float> %result
+}
+
+; Test double-precision float vector (2 x double = 128-bit)
+define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x 
double> %b) {
+; WASM32-LABEL: test_ctselect_v2f64:
+; WASM32:         .functype test_ctselect_v2f64 (i32, v128, v128) -> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 63
+; WASM32-NEXT:    i64x2.shl
+; WASM32-NEXT:    i32.const 63
+; WASM32-NEXT:    i64x2.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v2f64:
+; WASM64:         .functype test_ctselect_v2f64 (i32, v128, v128) -> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 63
+; WASM64-NEXT:    i64x2.shl
+; WASM64-NEXT:    i32.const 63
+; WASM64-NEXT:    i64x2.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, 
<2 x double> %b)
+  ret <2 x double> %result
+}
+
+; Test with aligned loads (common case)
+define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) 
{
+; WASM32-LABEL: test_ctselect_v4i32_aligned_load:
+; WASM32:         .functype test_ctselect_v4i32_aligned_load (i32, i32, i32) 
-> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.load 0
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    v128.load 0
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_aligned_load:
+; WASM64:         .functype test_ctselect_v4i32_aligned_load (i32, i64, i64) 
-> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.load 0
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    v128.load 0
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %a = load <4 x i32>, ptr %p1, align 16
+  %b = load <4 x i32>, ptr %p2, align 16
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x 
i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with unaligned loads (stress test)
+define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr 
%p2) {
+; WASM32-LABEL: test_ctselect_v4i32_unaligned_load:
+; WASM32:         .functype test_ctselect_v4i32_unaligned_load (i32, i32, i32) 
-> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.load 0:p2align=2
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    v128.load 0:p2align=2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_unaligned_load:
+; WASM64:         .functype test_ctselect_v4i32_unaligned_load (i32, i64, i64) 
-> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.load 0:p2align=2
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    v128.load 0:p2align=2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %a = load <4 x i32>, ptr %p1, align 4
+  %b = load <4 x i32>, ptr %p2, align 4
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x 
i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with stores to verify result handling
+define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, 
ptr %out) {
+; WASM32-LABEL: test_ctselect_v4i32_store:
+; WASM32:         .functype test_ctselect_v4i32_store (i32, v128, v128, i32) 
-> ()
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 4
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 4
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    v128.store 0
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_store:
+; WASM64:         .functype test_ctselect_v4i32_store (i32, v128, v128, i64) 
-> ()
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 4
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 4
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    v128.store 0
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x 
i32> %b)
+  store <4 x i32> %result, ptr %out, align 16
+  ret void
+}
+
+; Test chained selects (multiple conditions)
+define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> 
%a, <4 x i32> %b, <4 x i32> %c) {
+; WASM32-LABEL: test_ctselect_v4i32_chain:
+; WASM32:         .functype test_ctselect_v4i32_chain (i32, i32, v128, v128, 
v128) -> (v128)
+; WASM32-NEXT:    .local v128, v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 5
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 6
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    local.get 6
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 4
+; WASM32-NEXT:    local.get 5
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_chain:
+; WASM64:         .functype test_ctselect_v4i32_chain (i32, i32, v128, v128, 
v128) -> (v128)
+; WASM64-NEXT:    .local v128, v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 5
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 6
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    local.get 6
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 4
+; WASM64-NEXT:    local.get 5
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %tmp = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond1, <4 x i32> %a, <4 x 
i32> %b)
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond2, <4 x i32> %tmp, <4 
x i32> %c)
+  ret <4 x i32> %result
+}
+
+; Test with arithmetic operations (ensure float vectors work with FP ops)
+define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, 
<4 x float> %y) {
+; WASM32-LABEL: test_ctselect_v4f32_arithmetic:
+; WASM32:         .functype test_ctselect_v4f32_arithmetic (i32, v128, v128) 
-> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    f32x4.add
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    f32x4.sub
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4f32_arithmetic:
+; WASM64:         .functype test_ctselect_v4f32_arithmetic (i32, v128, v128) 
-> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    f32x4.add
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    f32x4.sub
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %sum = fadd <4 x float> %x, %y
+  %diff = fsub <4 x float> %x, %y
+  %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %sum, 
<4 x float> %diff)
+  ret <4 x float> %result
+}
+
+; Test with zero vectors
+define <4 x i32> @test_ctselect_v4i32_zeros(i1 %cond, <4 x i32> %a) {
+; WASM32-LABEL: test_ctselect_v4i32_zeros:
+; WASM32:         .functype test_ctselect_v4i32_zeros (i32, v128) -> (v128)
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_zeros:
+; WASM64:         .functype test_ctselect_v4i32_zeros (i32, v128) -> (v128)
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond,
+                                                   <4 x i32> %a,
+                                                   <4 x i32> zeroinitializer)
+  ret <4 x i32> %result
+}
+
+; Test with function arguments directly (no loads)
+define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> 
%b) nounwind {
+; WASM32-LABEL: test_ctselect_v4i32_args:
+; WASM32:         .functype test_ctselect_v4i32_args (i32, v128, v128) -> 
(v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_args:
+; WASM64:         .functype test_ctselect_v4i32_args (i32, v128, v128) -> 
(v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x 
i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with multiple uses of result
+define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x 
i32> %b) {
+; WASM32-LABEL: test_ctselect_v4i32_multi_use:
+; WASM32:         .functype test_ctselect_v4i32_multi_use (i32, v128, v128) -> 
(v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i32x4.splat
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shl
+; WASM32-NEXT:    i32.const 31
+; WASM32-NEXT:    i32x4.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    local.tee 1
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    i32x4.add
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_multi_use:
+; WASM64:         .functype test_ctselect_v4i32_multi_use (i32, v128, v128) -> 
(v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i32x4.splat
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shl
+; WASM64-NEXT:    i32.const 31
+; WASM64-NEXT:    i32x4.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    local.tee 1
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    i32x4.add
+; WASM64-NEXT:    # fallthrough-return
+  %sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x 
i32> %b)
+  %add = add <4 x i32> %sel, %sel  ; Use result twice
+  ret <4 x i32> %add
+}
+
+; Test byte vector with operations
+define <16 x i8> @test_ctselect_v16i8_ops(i1 %cond, <16 x i8> %x, <16 x i8> 
%y) {
+; WASM32-LABEL: test_ctselect_v16i8_ops:
+; WASM32:         .functype test_ctselect_v16i8_ops (i32, v128, v128) -> (v128)
+; WASM32-NEXT:    .local v128
+; WASM32-NEXT:  # %bb.0:
+; WASM32-NEXT:    local.get 0
+; WASM32-NEXT:    i8x16.splat
+; WASM32-NEXT:    i32.const 7
+; WASM32-NEXT:    i8x16.shl
+; WASM32-NEXT:    i32.const 7
+; WASM32-NEXT:    i8x16.shr_s
+; WASM32-NEXT:    local.tee 3
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    v128.xor
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    v128.and
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    v128.andnot
+; WASM32-NEXT:    v128.or
+; WASM32-NEXT:    # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v16i8_ops:
+; WASM64:         .functype test_ctselect_v16i8_ops (i32, v128, v128) -> (v128)
+; WASM64-NEXT:    .local v128
+; WASM64-NEXT:  # %bb.0:
+; WASM64-NEXT:    local.get 0
+; WASM64-NEXT:    i8x16.splat
+; WASM64-NEXT:    i32.const 7
+; WASM64-NEXT:    i8x16.shl
+; WASM64-NEXT:    i32.const 7
+; WASM64-NEXT:    i8x16.shr_s
+; WASM64-NEXT:    local.tee 3
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    v128.xor
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    v128.and
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    v128.andnot
+; WASM64-NEXT:    v128.or
+; WASM64-NEXT:    # fallthrough-return
+  %xor = xor <16 x i8> %x, %y
+  %and = and <16 x i8> %x, %y
+  %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %xor, <16 
x i8> %and)
+  ret <16 x i8> %result
+}
+
+declare <4 x i32> @llvm.ct.select.v4i32(i1, <4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.ct.select.v8i16(i1, <8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.ct.select.v16i8(i1, <16 x i8>, <16 x i8>)
+declare <2 x i64> @llvm.ct.select.v2i64(i1, <2 x i64>, <2 x i64>)
+declare <4 x float> @llvm.ct.select.v4f32(i1, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.ct.select.v2f64(i1, <2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll 
b/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
new file mode 100644
index 0000000000000..bd318960536df
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
@@ -0,0 +1,552 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck 
%s --check-prefix=W32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck 
%s --check-prefix=W64
+
+; Test basic ct.select functionality for scalar types
+define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
+; W32-LABEL: test_ctselect_i8:
+; W32:         .functype test_ctselect_i8 (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.xor
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.xor
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i8:
+; W64:         .functype test_ctselect_i8 (i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.xor
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.xor
+; W64-NEXT:    # fallthrough-return
+  %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
+  ret i8 %result
+}
+
+define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
+; W32-LABEL: test_ctselect_i16:
+; W32:         .functype test_ctselect_i16 (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.xor
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.xor
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i16:
+; W64:         .functype test_ctselect_i16 (i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.xor
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.xor
+; W64-NEXT:    # fallthrough-return
+  %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
+  ret i16 %result
+}
+
+define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_i32:
+; W32:         .functype test_ctselect_i32 (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i32:
+; W64:         .functype test_ctselect_i32 (i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
+; W32-LABEL: test_ctselect_i64:
+; W32:         .functype test_ctselect_i64 (i32, i64, i64) -> (i64)
+; W32-NEXT:    .local i64
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i64.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i64.extend_i32_u
+; W32-NEXT:    i64.const 1
+; W32-NEXT:    i64.and
+; W32-NEXT:    local.tee 3
+; W32-NEXT:    i64.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i64.and
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i64.const -1
+; W32-NEXT:    i64.add
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i64.and
+; W32-NEXT:    i64.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i64:
+; W64:         .functype test_ctselect_i64 (i32, i64, i64) -> (i64)
+; W64-NEXT:    .local i64
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i64.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i64.extend_i32_u
+; W64-NEXT:    i64.const 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.tee 3
+; W64-NEXT:    i64.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i64.const -1
+; W64-NEXT:    i64.add
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i64.and
+; W64-NEXT:    i64.or
+; W64-NEXT:    # fallthrough-return
+  %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
+  ret i64 %result
+}
+
+define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
+; W32-LABEL: test_ctselect_ptr:
+; W32:         .functype test_ctselect_ptr (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_ptr:
+; W64:         .functype test_ctselect_ptr (i32, i64, i64) -> (i64)
+; W64-NEXT:    .local i64
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i64.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i64.extend_i32_u
+; W64-NEXT:    i64.const 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.tee 3
+; W64-NEXT:    i64.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i64.and
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i64.const -1
+; W64-NEXT:    i64.add
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i64.and
+; W64-NEXT:    i64.or
+; W64-NEXT:    # fallthrough-return
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with constant conditions
+define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_const_true:
+; W32:         .functype test_ctselect_const_true (i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_const_true:
+; W64:         .functype test_ctselect_const_true (i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    # fallthrough-return
+  %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_const_false:
+; W32:         .functype test_ctselect_const_false (i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_const_false:
+; W64:         .functype test_ctselect_const_false (i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with comparison conditions
+define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_icmp_eq:
+; W32:         .functype test_ctselect_icmp_eq (i32, i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.eq
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_icmp_eq:
+; W64:         .functype test_ctselect_icmp_eq (i32, i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.eq
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cond = icmp eq i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_icmp_ne:
+; W32:         .functype test_ctselect_icmp_ne (i32, i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.ne
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_icmp_ne:
+; W64:         .functype test_ctselect_icmp_ne (i32, i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.ne
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cond = icmp ne i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_icmp_slt:
+; W32:         .functype test_ctselect_icmp_slt (i32, i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.lt_s
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_icmp_slt:
+; W64:         .functype test_ctselect_icmp_slt (i32, i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.lt_s
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cond = icmp slt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_icmp_ult:
+; W32:         .functype test_ctselect_icmp_ult (i32, i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.lt_u
+; W32-NEXT:    i32.select
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.xor
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_icmp_ult:
+; W64:         .functype test_ctselect_icmp_ult (i32, i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.lt_u
+; W64-NEXT:    i32.select
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.xor
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %cond = icmp ult i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with memory operands
+define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
+; W32-LABEL: test_ctselect_load:
+; W32:         .functype test_ctselect_load (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.load 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.load 0
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_load:
+; W64:         .functype test_ctselect_load (i32, i64, i64) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.load 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.load 0
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %a = load i32, ptr %p1
+  %b = load i32, ptr %p2
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test nested ctselect calls
+define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) 
{
+; W32-LABEL: test_ctselect_nested:
+; W32:         .functype test_ctselect_nested (i32, i32, i32, i32, i32) -> 
(i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 1
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 3
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 4
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_ctselect_nested:
+; W64:         .functype test_ctselect_nested (i32, i32, i32, i32, i32) -> 
(i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 1
+; W64-NEXT:    i32.sub
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 3
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 4
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
+  %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
+  ret i32 %result
+}
+
+; Declare the intrinsics
+declare i8 @llvm.ct.select.i8(i1, i8, i8)
+declare i16 @llvm.ct.select.i16(i1, i16, i16)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll 
b/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll
new file mode 100644
index 0000000000000..5b20e892c64d2
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck 
%s --check-prefix=W32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck 
%s --check-prefix=W64
+
+; Test 1: Basic optimizations should still work
+define i32 @test_basic_opts(i32 %x) {
+; W32-LABEL: test_basic_opts:
+; W32:         .functype test_basic_opts (i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_basic_opts:
+; W64:         .functype test_basic_opts (i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    # fallthrough-return
+  %a = or i32 %x, 0      ; Should eliminate
+  %b = and i32 %a, -1    ; Should eliminate
+  %c = xor i32 %b, 0     ; Should eliminate
+  ret i32 %c
+}
+
+; Test 2: Constant folding should work
+define i32 @test_constant_fold() {
+; W32-LABEL: test_constant_fold:
+; W32:         .functype test_constant_fold () -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_constant_fold:
+; W64:         .functype test_constant_fold () -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    # fallthrough-return
+  %a = xor i32 -1, -1    ; Should fold to 0
+  ret i32 %a
+}
+
+; Test 3: Protected pattern should NOT have branches
+define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
+; W32-LABEL: test_protected_no_branch:
+; W32:         .functype test_protected_no_branch (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.tee 0
+; W32-NEXT:    i32.sub
+; W32-NEXT:    local.get 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const -1
+; W32-NEXT:    i32.add
+; W32-NEXT:    local.get 2
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.or
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_protected_no_branch:
+; W64:         .functype test_protected_no_branch (i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.tee 0
+; W64-NEXT:    i32.sub
+; W64-NEXT:    local.get 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const -1
+; W64-NEXT:    i32.add
+; W64-NEXT:    local.get 2
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.or
+; W64-NEXT:    # fallthrough-return
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test 4: Explicit branch should still generate branches
+define i32 @test_explicit_branch(i1 %cond, i32 %a, i32 %b) {
+; W32-LABEL: test_explicit_branch:
+; W32:         .functype test_explicit_branch (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    block
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.eqz
+; W32-NEXT:    br_if 0 # 0: down to label0
+; W32-NEXT:  # %bb.1: # %true
+; W32-NEXT:    local.get 1
+; W32-NEXT:    return
+; W32-NEXT:  .LBB3_2: # %false
+; W32-NEXT:    end_block # label0:
+; W32-NEXT:    local.get 2
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_explicit_branch:
+; W64:         .functype test_explicit_branch (i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    block
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.eqz
+; W64-NEXT:    br_if 0 # 0: down to label0
+; W64-NEXT:  # %bb.1: # %true
+; W64-NEXT:    local.get 1
+; W64-NEXT:    return
+; W64-NEXT:  .LBB3_2: # %false
+; W64-NEXT:    end_block # label0:
+; W64-NEXT:    local.get 2
+; W64-NEXT:    # fallthrough-return
+  br i1 %cond, label %true, label %false
+true:
+  ret i32 %a
+false:
+  ret i32 %b
+}
+
+; Test 5: Regular select (not ct.select) - whatever wasm wants to do
+define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) {
+; W32-LABEL: test_regular_select:
+; W32:         .functype test_regular_select (i32, i32, i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 1
+; W32-NEXT:    local.get 2
+; W32-NEXT:    local.get 0
+; W32-NEXT:    i32.const 1
+; W32-NEXT:    i32.and
+; W32-NEXT:    i32.select
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_regular_select:
+; W64:         .functype test_regular_select (i32, i32, i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 1
+; W64-NEXT:    local.get 2
+; W64-NEXT:    local.get 0
+; W64-NEXT:    i32.const 1
+; W64-NEXT:    i32.and
+; W64-NEXT:    i32.select
+; W64-NEXT:    # fallthrough-return
+  %result = select i1 %cond, i32 %a, i32 %b
+  ret i32 %result
+}
+
+; Test if XOR with all-ones still gets optimized
+define i32 @test_xor_all_ones() {
+; W32-LABEL: test_xor_all_ones:
+; W32:         .functype test_xor_all_ones () -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_xor_all_ones:
+; W64:         .functype test_xor_all_ones () -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    # fallthrough-return
+  %xor1 = xor i32 -1, -1  ; Should optimize to 0
+  ret i32 %xor1
+}
+
+define i32 @test_xor_same_value(i32 %x) {
+; W32-LABEL: test_xor_same_value:
+; W32:         .functype test_xor_same_value (i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_xor_same_value:
+; W64:         .functype test_xor_same_value (i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    # fallthrough-return
+  %xor2 = xor i32 %x, %x  ; Should optimize to 0
+  ret i32 %xor2
+}
+
+define i32 @test_normal_ops(i32 %x) {
+; W32-LABEL: test_normal_ops:
+; W32:         .functype test_normal_ops (i32) -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    local.get 0
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_normal_ops:
+; W64:         .functype test_normal_ops (i32) -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    local.get 0
+; W64-NEXT:    # fallthrough-return
+  %or1 = or i32 %x, 0
+  %and1 = and i32 %or1, -1
+  %xor1 = xor i32 %and1, 0
+  ret i32 %xor1
+}
+
+; This simulates what the reviewer is worried about
+define i32 @test_xor_with_const_operands() {
+; W32-LABEL: test_xor_with_const_operands:
+; W32:         .functype test_xor_with_const_operands () -> (i32)
+; W32-NEXT:  # %bb.0:
+; W32-NEXT:    i32.const 0
+; W32-NEXT:    # fallthrough-return
+;
+; W64-LABEL: test_xor_with_const_operands:
+; W64:         .functype test_xor_with_const_operands () -> (i32)
+; W64-NEXT:  # %bb.0:
+; W64-NEXT:    i32.const 0
+; W64-NEXT:    # fallthrough-return
+  %a = xor i32 -1, -1
+  %b = xor i32 0, 0
+  %c = xor i32 42, 42
+  %result = or i32 %a, %b
+  %final = or i32 %result, %c
+  ret i32 %final  ; Should optimize to 0
+}
+
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to