https://github.com/wizardengineer updated https://github.com/llvm/llvm-project/pull/166705
>From 9dac6cad69ae839442c551be1e0a03617f8579d8 Mon Sep 17 00:00:00 2001 From: wizardengineer <[email protected]> Date: Wed, 5 Nov 2025 11:01:26 -0500 Subject: [PATCH] [LLVM][MIPS] Add comprehensive tests for ct.select --- .../Mips/ctselect-fallback-edge-cases.ll | 244 +++++ .../Mips/ctselect-fallback-patterns.ll | 426 +++++++++ .../CodeGen/Mips/ctselect-fallback-vector.ll | 830 ++++++++++++++++++ llvm/test/CodeGen/Mips/ctselect-fallback.ll | 371 ++++++++ .../CodeGen/Mips/ctselect-side-effects.ll | 183 ++++ 5 files changed, 2054 insertions(+) create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback.ll create mode 100644 llvm/test/CodeGen/Mips/ctselect-side-effects.ll diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll new file mode 100644 index 0000000000000..f1831a625d4a4 --- /dev/null +++ b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32 +; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64 + +; Portable edge case tests + +; Test with small integer types +define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) { +; M32-LABEL: test_ctselect_i1: +; M32: # %bb.0: +; M32-NEXT: xori $2, $4, 1 +; M32-NEXT: and $1, $4, $5 +; M32-NEXT: and $2, $2, $6 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $1, $2 +; +; M64-LABEL: test_ctselect_i1: +; M64: # %bb.0: +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: sll $1, $6, 0 +; M64-NEXT: xori $2, $2, 1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: and $2, $4, $5 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b) + ret i1 %result +} + +; Test with extremal values +define i32 @test_ctselect_extremal_values(i1 %cond) { +; M32-LABEL: test_ctselect_extremal_values: +; M32: # %bb.0: +; M32-NEXT: lui $3, 32767 +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $2, $1 +; M32-NEXT: ori $3, $3, 65535 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $3 +; M32-NEXT: lui $3, 32768 +; M32-NEXT: and $1, $1, $3 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_extremal_values: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: lui $3, 32767 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: ori $3, $3, 65535 +; M64-NEXT: negu $2, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $2, $3 +; M64-NEXT: lui $3, 32768 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648) + ret i32 %result +} + +; Test with null pointers +define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) { +; M32-LABEL: test_ctselect_null_ptr: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $1, $1 +; M32-NEXT: jr $ra +; M32-NEXT: and $2, $1, $5 +; +; M64-LABEL: test_ctselect_null_ptr: +; M64: # %bb.0: +; M64-NEXT: andi $1, $4, 1 +; M64-NEXT: dnegu $1, $1 +; M64-NEXT: jr $ra +; M64-NEXT: and $2, $1, $5 + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null) + ret ptr %result +} + +; Test with function pointers +define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) { +; M32-LABEL: test_ctselect_function_ptr: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $5 +; M32-NEXT: and $1, $1, $6 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_function_ptr: +; M64: # %bb.0: +; M64-NEXT: andi $1, $4, 1 +; M64-NEXT: dnegu $2, $1 +; M64-NEXT: daddiu $1, $1, -1 +; M64-NEXT: and $2, $2, $5 +; M64-NEXT: and $1, $1, $6 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2) + ret ptr %result +} + +; Test with condition from icmp on pointers +define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) { +; M32-LABEL: test_ctselect_ptr_cmp: +; M32: # %bb.0: +; M32-NEXT: xor $1, $4, $5 +; M32-NEXT: sltu $1, $zero, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $6 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $7 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_ptr_cmp: +; M64: # %bb.0: +; M64-NEXT: xor $1, $4, $5 +; M64-NEXT: daddiu $3, $zero, -1 +; M64-NEXT: daddiu $2, $zero, -1 +; M64-NEXT: movn $3, $zero, $1 +; M64-NEXT: xor $2, $3, $2 +; M64-NEXT: and $1, $3, $6 +; M64-NEXT: and $2, $2, $7 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $1, $2 + %cmp = icmp eq ptr %p1, %p2 + %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b) + ret ptr %result +} + +; Test with struct pointer types +%struct.pair = type { i32, i32 } + +define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) { +; M32-LABEL: test_ctselect_struct_ptr: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $5 +; M32-NEXT: and $1, $1, $6 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_struct_ptr: +; M64: # %bb.0: +; M64-NEXT: andi $1, $4, 1 +; M64-NEXT: dnegu $2, $1 +; M64-NEXT: daddiu $1, $1, -1 +; M64-NEXT: and $2, $2, $5 +; M64-NEXT: and $1, $1, $6 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) + ret ptr %result +} + +; Test with deeply nested conditions +define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +; M32-LABEL: test_ctselect_deeply_nested: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: lw $3, 16($sp) +; M32-NEXT: lw $9, 32($sp) +; M32-NEXT: lw $8, 28($sp) +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $3 +; M32-NEXT: lw $3, 20($sp) +; M32-NEXT: and $1, $1, $3 +; M32-NEXT: andi $3, $5, 1 +; M32-NEXT: or $1, $2, $1 +; M32-NEXT: andi $2, $6, 1 +; M32-NEXT: andi $6, $7, 1 +; M32-NEXT: negu $4, $3 +; M32-NEXT: addiu $3, $3, -1 +; M32-NEXT: addiu $7, $6, -1 +; M32-NEXT: and $1, $4, $1 +; M32-NEXT: addiu $5, $2, -1 +; M32-NEXT: negu $2, $2 +; M32-NEXT: negu $6, $6 +; M32-NEXT: and $4, $7, $9 +; M32-NEXT: lw $7, 24($sp) +; M32-NEXT: and $5, $5, $8 +; M32-NEXT: and $3, $3, $7 +; M32-NEXT: or $1, $1, $3 +; M32-NEXT: and $1, $2, $1 +; M32-NEXT: or $1, $1, $5 +; M32-NEXT: and $1, $6, $1 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $1, $4 +; +; M64-LABEL: test_ctselect_deeply_nested: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sll $3, $8, 0 +; M64-NEXT: sll $4, $5, 0 +; M64-NEXT: lw $8, 0($sp) +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: andi $4, $4, 1 +; M64-NEXT: negu $2, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: negu $5, $4 +; M64-NEXT: addiu $4, $4, -1 +; M64-NEXT: and $2, $2, $3 +; M64-NEXT: sll $3, $9, 0 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: sll $3, $11, 0 +; M64-NEXT: or $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: sll $6, $7, 0 +; M64-NEXT: andi $2, $2, 1 +; M64-NEXT: and $1, $5, $1 +; M64-NEXT: andi $6, $6, 1 +; M64-NEXT: addiu $5, $2, -1 +; M64-NEXT: negu $2, $2 +; M64-NEXT: addiu $7, $6, -1 +; M64-NEXT: negu $6, $6 +; M64-NEXT: and $3, $5, $3 +; M64-NEXT: sll $5, $10, 0 +; M64-NEXT: and $7, $7, $8 +; M64-NEXT: and $4, $4, $5 +; M64-NEXT: or $1, $1, $4 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: or $1, $1, $3 +; M64-NEXT: and $1, $6, $1 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $1, $7 + %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b) + %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c) + %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d) + %sel4 = call i32 @llvm.ct.select.i32(i1 %c4, i32 %sel3, i32 %e) + ret i32 %sel4 +} + +; Declare the intrinsics +declare i1 @llvm.ct.select.i1(i1, i1, i1) +declare i32 @llvm.ct.select.i32(i1, i32, i32) +declare ptr @llvm.ct.select.p0(i1, ptr, ptr) diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll new file mode 100644 index 0000000000000..2e65e586ce5fa --- /dev/null +++ b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll @@ -0,0 +1,426 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32 +; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64 + +; Test smin(x, 0) pattern +define i32 @test_ctselect_smin_zero(i32 %x) { +; M32-LABEL: test_ctselect_smin_zero: +; M32: # %bb.0: +; M32-NEXT: sra $1, $4, 31 +; M32-NEXT: jr $ra +; M32-NEXT: and $2, $1, $4 +; +; M64-LABEL: test_ctselect_smin_zero: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sra $2, $1, 31 +; M64-NEXT: jr $ra +; M64-NEXT: and $2, $2, $1 + %cmp = icmp slt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0) + ret i32 %result +} + +; Test smax(x, 0) pattern +define i32 @test_ctselect_smax_zero(i32 %x) { +; M32-LABEL: test_ctselect_smax_zero: +; M32: # %bb.0: +; M32-NEXT: slti $1, $4, 1 +; M32-NEXT: movn $4, $zero, $1 +; M32-NEXT: jr $ra +; M32-NEXT: move $2, $4 +; +; M64-LABEL: test_ctselect_smax_zero: +; M64: # %bb.0: +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: slti $1, $2, 1 +; M64-NEXT: jr $ra +; M64-NEXT: movn $2, $zero, $1 + %cmp = icmp sgt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0) + ret i32 %result +} + +; Test generic smin pattern +define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) { +; M32-LABEL: test_ctselect_smin_generic: +; M32: # %bb.0: +; M32-NEXT: slt $1, $4, $5 +; M32-NEXT: xori $1, $1, 1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $4 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $5 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_smin_generic: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: slt $3, $2, $1 +; M64-NEXT: xori $3, $3, 1 +; M64-NEXT: addiu $3, $3, -1 +; M64-NEXT: and $2, $3, $2 +; M64-NEXT: not $3, $3 +; M64-NEXT: and $1, $3, $1 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %cmp = icmp slt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) + ret i32 %result +} + +; Test generic smax pattern +define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) { +; M32-LABEL: test_ctselect_smax_generic: +; M32: # %bb.0: +; M32-NEXT: slt $1, $5, $4 +; M32-NEXT: xori $1, $1, 1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $4 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $5 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_smax_generic: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sll $2, $5, 0 +; M64-NEXT: slt $3, $2, $1 +; M64-NEXT: xori $3, $3, 1 +; M64-NEXT: addiu $3, $3, -1 +; M64-NEXT: and $1, $3, $1 +; M64-NEXT: not $3, $3 +; M64-NEXT: and $2, $3, $2 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $1, $2 + %cmp = icmp sgt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) + ret i32 %result +} + +; Test umin pattern +define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) { +; M32-LABEL: test_ctselect_umin_generic: +; M32: # %bb.0: +; M32-NEXT: sltu $1, $4, $5 +; M32-NEXT: xori $1, $1, 1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $4 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $5 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_umin_generic: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: sltu $3, $2, $1 +; M64-NEXT: xori $3, $3, 1 +; M64-NEXT: addiu $3, $3, -1 +; M64-NEXT: and $2, $3, $2 +; M64-NEXT: not $3, $3 +; M64-NEXT: and $1, $3, $1 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %cmp = icmp ult i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) + ret i32 %result +} + +; Test umax pattern +define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) { +; M32-LABEL: test_ctselect_umax_generic: +; M32: # %bb.0: +; M32-NEXT: sltu $1, $5, $4 +; M32-NEXT: xori $1, $1, 1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $4 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $5 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_umax_generic: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sll $2, $5, 0 +; M64-NEXT: sltu $3, $2, $1 +; M64-NEXT: xori $3, $3, 1 +; M64-NEXT: addiu $3, $3, -1 +; M64-NEXT: and $1, $3, $1 +; M64-NEXT: not $3, $3 +; M64-NEXT: and $2, $3, $2 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $1, $2 + %cmp = icmp ugt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) + ret i32 %result +} + +; Test abs pattern +define i32 @test_ctselect_abs(i32 %x) { +; M32-LABEL: test_ctselect_abs: +; M32: # %bb.0: +; M32-NEXT: negu $1, $4 +; M32-NEXT: sra $2, $4, 31 +; M32-NEXT: and $1, $2, $1 +; M32-NEXT: not $2, $2 +; M32-NEXT: and $2, $2, $4 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $1, $2 +; +; M64-LABEL: test_ctselect_abs: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: negu $2, $1 +; M64-NEXT: sra $3, $1, 31 +; M64-NEXT: and $2, $3, $2 +; M64-NEXT: not $3, $3 +; M64-NEXT: and $1, $3, $1 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %neg = sub i32 0, %x + %cmp = icmp slt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x) + ret i32 %result +} + +; Test nabs pattern (negative abs) +define i32 @test_ctselect_nabs(i32 %x) { +; M32-LABEL: test_ctselect_nabs: +; M32: # %bb.0: +; M32-NEXT: sra $1, $4, 31 +; M32-NEXT: negu $3, $4 +; M32-NEXT: and $2, $1, $4 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $3 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_nabs: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sra $2, $1, 31 +; M64-NEXT: and $3, $2, $1 +; M64-NEXT: negu $1, $1 +; M64-NEXT: not $2, $2 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $3, $1 + %neg = sub i32 0, %x + %cmp = icmp slt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg) + ret i32 %result +} + +; Test sign extension pattern +define i32 @test_ctselect_sign_extend(i32 %x) { +; M32-LABEL: test_ctselect_sign_extend: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: sra $2, $4, 31 +; +; M64-LABEL: test_ctselect_sign_extend: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: jr $ra +; M64-NEXT: sra $2, $1, 31 + %cmp = icmp slt i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0) + ret i32 %result +} + +; Test zero extension pattern +define i32 @test_ctselect_zero_extend(i32 %x) { +; M32-LABEL: test_ctselect_zero_extend: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: sltu $2, $zero, $4 +; +; M64-LABEL: test_ctselect_zero_extend: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: jr $ra +; M64-NEXT: sltu $2, $zero, $1 + %cmp = icmp ne i32 %x, 0 + %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 1, i32 0) + ret i32 %result +} + +; Test constant folding with known condition +define i32 @test_ctselect_constant_folding_true(i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_constant_folding_true: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: move $2, $4 +; +; M64-LABEL: test_ctselect_constant_folding_true: +; M64: # %bb.0: +; M64-NEXT: jr $ra +; M64-NEXT: sll $2, $4, 0 + %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_constant_folding_false: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $zero, $5 +; +; M64-LABEL: test_ctselect_constant_folding_false: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $zero, $1 + %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b) + ret i32 %result +} + +; Test with identical operands +define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) { +; M32-LABEL: test_ctselect_identical_operands: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $5 +; M32-NEXT: and $1, $1, $5 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_identical_operands: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sll $3, $5, 0 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: negu $2, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $2, $3 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x) + ret i32 %result +} + +; Test with inverted condition +define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_inverted_condition: +; M32: # %bb.0: +; M32-NEXT: xor $1, $4, $5 +; M32-NEXT: sltiu $1, $1, 1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $6 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $7 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_inverted_condition: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: sll $3, $7, 0 +; M64-NEXT: xor $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: sltiu $1, $1, 1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $1, $2 +; M64-NEXT: not $1, $1 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %cmp = icmp eq i32 %x, %y + %not_cmp = xor i1 %cmp, true + %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b) + ret i32 %result +} + +; Test chain of ct.select operations +define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) { +; M32-LABEL: test_ctselect_chain: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: andi $3, $5, 1 +; M32-NEXT: lw $5, 16($sp) +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: negu $4, $3 +; M32-NEXT: addiu $3, $3, -1 +; M32-NEXT: and $1, $1, $5 +; M32-NEXT: and $2, $2, $7 +; M32-NEXT: lw $5, 24($sp) +; M32-NEXT: or $1, $2, $1 +; M32-NEXT: andi $2, $6, 1 +; M32-NEXT: and $1, $4, $1 +; M32-NEXT: addiu $4, $2, -1 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $4, $4, $5 +; M32-NEXT: lw $5, 20($sp) +; M32-NEXT: and $3, $3, $5 +; M32-NEXT: or $1, $1, $3 +; M32-NEXT: and $1, $2, $1 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $1, $4 +; +; M64-LABEL: test_ctselect_chain: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sll $3, $7, 0 +; M64-NEXT: sll $4, $5, 0 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: andi $4, $4, 1 +; M64-NEXT: negu $2, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: negu $5, $4 +; M64-NEXT: addiu $4, $4, -1 +; M64-NEXT: and $2, $2, $3 +; M64-NEXT: sll $3, $8, 0 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: sll $3, $6, 0 +; M64-NEXT: sll $6, $10, 0 +; M64-NEXT: or $1, $2, $1 +; M64-NEXT: andi $3, $3, 1 +; M64-NEXT: and $1, $5, $1 +; M64-NEXT: sll $5, $9, 0 +; M64-NEXT: addiu $2, $3, -1 +; M64-NEXT: negu $3, $3 +; M64-NEXT: and $4, $4, $5 +; M64-NEXT: and $2, $2, $6 +; M64-NEXT: or $1, $1, $4 +; M64-NEXT: and $1, $3, $1 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $1, $2 + %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b) + %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c) + %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d) + ret i32 %sel3 +} + +; Test for 64-bit operations (supported on all 64-bit architectures) +define i64 @test_ctselect_i64_smin_zero(i64 %x) { +; M32-LABEL: test_ctselect_i64_smin_zero: +; M32: # %bb.0: +; M32-NEXT: sra $1, $5, 31 +; M32-NEXT: and $2, $1, $4 +; M32-NEXT: jr $ra +; M32-NEXT: and $3, $1, $5 +; +; M64-LABEL: test_ctselect_i64_smin_zero: +; M64: # %bb.0: +; M64-NEXT: dsra $1, $4, 63 +; M64-NEXT: jr $ra +; M64-NEXT: and $2, $1, $4 + %cmp = icmp slt i64 %x, 0 + %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0) + ret i64 %result +} + +; Declare the intrinsics +declare i32 @llvm.ct.select.i32(i1, i32, i32) +declare i64 @llvm.ct.select.i64(i1, i64, i64) diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll new file mode 100644 index 0000000000000..6222f6052e12f --- /dev/null +++ b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll @@ -0,0 +1,830 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r6 -mattr=+msa -O3 | FileCheck %s --check-prefix=MIPS64-MSA +; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r6 -mattr=+msa -O3 | FileCheck %s --check-prefix=MIPS32-MSA + +; Test 32-bit integer vector (128 bits) +define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) { +; MIPS64-MSA-LABEL: test_ctselect_v4i32: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ldi.b $w0, -1 +; MIPS64-MSA-NEXT: fill.w $w1, $1 +; MIPS64-MSA-NEXT: insert.d $w2[1], $8 +; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: insert.d $w2[0], $5 +; MIPS64-MSA-NEXT: insert.d $w2[1], $6 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v4i32: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: fill.w $w2, $4 +; MIPS32-MSA-NEXT: ldi.b $w1, -1 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %result +} + +; Test 16-bit integer vector (8 x i16 = 128-bit) +define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) { +; MIPS64-MSA-LABEL: test_ctselect_v8i16: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ldi.b $w0, -1 +; MIPS64-MSA-NEXT: fill.h $w1, $1 +; MIPS64-MSA-NEXT: insert.d $w2[1], $8 +; MIPS64-MSA-NEXT: slli.h $w1, $w1, 15 +; MIPS64-MSA-NEXT: srai.h $w1, $w1, 15 +; MIPS64-MSA-NEXT: shf.h $w2, $w2, 27 +; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: insert.d $w2[0], $5 +; MIPS64-MSA-NEXT: insert.d $w2[1], $6 +; MIPS64-MSA-NEXT: shf.h $w2, $w2, 27 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: shf.h $w0, $w0, 27 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v8i16: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: fill.h $w1, $4 +; MIPS32-MSA-NEXT: ldi.b $w0, -1 +; MIPS32-MSA-NEXT: insert.w $w2[0], $2 +; MIPS32-MSA-NEXT: slli.h $w1, $w1, 15 +; MIPS32-MSA-NEXT: srai.h $w1, $w1, 15 +; MIPS32-MSA-NEXT: insert.w $w2[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: shf.h $w2, $w2, 177 +; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS32-MSA-NEXT: insert.w $w2[0], $6 +; MIPS32-MSA-NEXT: insert.w $w2[1], $7 +; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: shf.h $w2, $w2, 177 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: shf.h $w0, $w0, 177 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] + %result = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %result +} + +; Test byte vector (16 x i8 = 128-bit) +define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) { +; MIPS64-MSA-LABEL: test_ctselect_v16i8: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w0[0], $5 +; MIPS64-MSA-NEXT: insert.d $w1[0], $7 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: fill.b $w2, $1 +; MIPS64-MSA-NEXT: insert.d $w0[1], $6 +; MIPS64-MSA-NEXT: insert.d $w1[1], $8 +; MIPS64-MSA-NEXT: slli.b $w2, $w2, 7 +; MIPS64-MSA-NEXT: shf.b $w0, $w0, 27 +; MIPS64-MSA-NEXT: shf.b $w1, $w1, 27 +; MIPS64-MSA-NEXT: srai.b $w2, $w2, 7 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 +; MIPS64-MSA-NEXT: and.v $w0, $w2, $w0 +; MIPS64-MSA-NEXT: xori.b $w2, $w2, 255 +; MIPS64-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS64-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS64-MSA-NEXT: shf.b $w0, $w0, 27 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v16i8: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: insert.w $w0[0], $6 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: fill.b $w2, $4 +; MIPS32-MSA-NEXT: insert.w $w0[1], $7 +; MIPS32-MSA-NEXT: insert.w $w1[0], $2 +; MIPS32-MSA-NEXT: slli.b $w2, $w2, 7 +; MIPS32-MSA-NEXT: srai.b $w2, $w2, 7 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: shf.b $w0, $w0, 27 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: and.v $w0, $w2, $w0 +; MIPS32-MSA-NEXT: xori.b $w2, $w2, 255 +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: shf.b $w1, $w1, 27 +; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS32-MSA-NEXT: shf.b $w0, $w0, 27 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] + %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %result +} + +; Test 64-bit integer vector (2 x i64 = 128-bit) +define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) { +; MIPS64-MSA-LABEL: test_ctselect_v2i64: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: fill.d $w2, $4 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: ldi.b $w1, -1 +; MIPS64-MSA-NEXT: slli.d $w2, $w2, 63 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: srai.d $w2, $w2, 63 +; MIPS64-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS64-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v2i64: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: addiu $sp, $sp, -32 +; MIPS32-MSA-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-MSA-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32-MSA-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; MIPS32-MSA-NEXT: .cfi_offset 31, -4 +; MIPS32-MSA-NEXT: .cfi_offset 30, -8 +; MIPS32-MSA-NEXT: move $fp, $sp +; MIPS32-MSA-NEXT: .cfi_def_cfa_register 30 +; MIPS32-MSA-NEXT: addiu $1, $zero, -16 +; MIPS32-MSA-NEXT: and $sp, $sp, $1 +; MIPS32-MSA-NEXT: lw $2, 56($fp) +; MIPS32-MSA-NEXT: lw $1, 60($fp) +; MIPS32-MSA-NEXT: sw $4, 12($sp) +; MIPS32-MSA-NEXT: sw $4, 4($sp) +; MIPS32-MSA-NEXT: ldi.b $w0, -1 +; MIPS32-MSA-NEXT: ld.d $w1, 0($sp) +; MIPS32-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS32-MSA-NEXT: insert.w $w2[0], $2 +; MIPS32-MSA-NEXT: slli.d $w1, $w1, 63 +; MIPS32-MSA-NEXT: insert.w $w2[1], $1 +; MIPS32-MSA-NEXT: lw $1, 64($fp) +; MIPS32-MSA-NEXT: srai.d $w1, $w1, 63 +; MIPS32-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: lw $1, 68($fp) +; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: lw $1, 48($fp) +; MIPS32-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS32-MSA-NEXT: insert.w $w2[0], $6 +; MIPS32-MSA-NEXT: insert.w $w2[1], $7 +; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: lw $1, 52($fp) +; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] +; MIPS32-MSA-NEXT: move $sp, $fp +; MIPS32-MSA-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; MIPS32-MSA-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: addiu $sp, $sp, 32 + %result = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %result +} + +; Test single-precision float vector (4 x float = 128-bit) +define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) { +; MIPS64-MSA-LABEL: test_ctselect_v4f32: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ldi.b $w0, -1 +; MIPS64-MSA-NEXT: fill.w $w1, $1 +; MIPS64-MSA-NEXT: insert.d $w2[1], $8 +; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: insert.d $w2[0], $5 +; MIPS64-MSA-NEXT: insert.d $w2[1], $6 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v4f32: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: fill.w $w2, $5 +; MIPS32-MSA-NEXT: ldi.b $w1, -1 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: st.w $w0, 0($4) + %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) + ret <4 x float> %result +} + +; Test double-precision float vector (2 x double = 128-bit) +define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) { +; MIPS64-MSA-LABEL: test_ctselect_v2f64: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: fill.d $w2, $4 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: ldi.b $w1, -1 +; MIPS64-MSA-NEXT: slli.d $w2, $w2, 63 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: srai.d $w2, $w2, 63 +; MIPS64-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS64-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v2f64: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: addiu $sp, $sp, -32 +; MIPS32-MSA-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-MSA-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32-MSA-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; MIPS32-MSA-NEXT: .cfi_offset 31, -4 +; MIPS32-MSA-NEXT: .cfi_offset 30, -8 +; MIPS32-MSA-NEXT: move $fp, $sp +; MIPS32-MSA-NEXT: .cfi_def_cfa_register 30 +; MIPS32-MSA-NEXT: addiu $1, $zero, -16 +; MIPS32-MSA-NEXT: and $sp, $sp, $1 +; MIPS32-MSA-NEXT: lw $2, 56($fp) +; MIPS32-MSA-NEXT: lw $1, 60($fp) +; MIPS32-MSA-NEXT: sw $5, 12($sp) +; MIPS32-MSA-NEXT: sw $5, 4($sp) +; MIPS32-MSA-NEXT: ldi.b $w0, -1 +; MIPS32-MSA-NEXT: ld.d $w1, 0($sp) +; MIPS32-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS32-MSA-NEXT: insert.w $w2[0], $2 +; MIPS32-MSA-NEXT: slli.d $w1, $w1, 63 +; MIPS32-MSA-NEXT: insert.w $w2[1], $1 +; MIPS32-MSA-NEXT: lw $1, 64($fp) +; MIPS32-MSA-NEXT: srai.d $w1, $w1, 63 +; MIPS32-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: lw $1, 68($fp) +; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: lw $1, 48($fp) +; MIPS32-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS32-MSA-NEXT: insert.w $w2[0], $6 +; MIPS32-MSA-NEXT: insert.w $w2[1], $7 +; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: lw $1, 52($fp) +; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: st.d $w0, 0($4) +; MIPS32-MSA-NEXT: move $sp, $fp +; MIPS32-MSA-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; MIPS32-MSA-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: addiu $sp, $sp, 32 + %result = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) + ret <2 x double> %result +} + +; Test with aligned loads (common case) +define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) { +; MIPS64-MSA-LABEL: test_ctselect_v4i32_aligned_load: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ld.w $w1, 0($5) +; MIPS64-MSA-NEXT: ldi.b $w2, -1 +; MIPS64-MSA-NEXT: fill.w $w0, $1 +; MIPS64-MSA-NEXT: slli.w $w0, $w0, 31 +; MIPS64-MSA-NEXT: srai.w $w0, $w0, 31 +; MIPS64-MSA-NEXT: and.v $w1, $w0, $w1 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: ld.w $w2, 0($6) +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v4i32_aligned_load: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: fill.w $w0, $4 +; MIPS32-MSA-NEXT: ld.w $w1, 0($5) +; MIPS32-MSA-NEXT: ldi.b $w2, -1 +; MIPS32-MSA-NEXT: slli.w $w0, $w0, 31 +; MIPS32-MSA-NEXT: srai.w $w0, $w0, 31 +; MIPS32-MSA-NEXT: and.v $w1, $w0, $w1 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w2 +; MIPS32-MSA-NEXT: ld.w $w2, 0($6) +; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] + %a = load <4 x i32>, ptr %p1, align 16 + %b = load <4 x i32>, ptr %p2, align 16 + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %result +} + +; Test with unaligned loads (stress test) +define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2) { +; MIPS64-MSA-LABEL: test_ctselect_v4i32_unaligned_load: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ld.w $w1, 0($5) +; MIPS64-MSA-NEXT: ldi.b $w2, -1 +; MIPS64-MSA-NEXT: fill.w $w0, $1 +; MIPS64-MSA-NEXT: slli.w $w0, $w0, 31 +; MIPS64-MSA-NEXT: srai.w $w0, $w0, 31 +; MIPS64-MSA-NEXT: and.v $w1, $w0, $w1 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: ld.w $w2, 0($6) +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v4i32_unaligned_load: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: fill.w $w0, $4 +; MIPS32-MSA-NEXT: ld.w $w1, 0($5) +; MIPS32-MSA-NEXT: ldi.b $w2, -1 +; MIPS32-MSA-NEXT: slli.w $w0, $w0, 31 +; MIPS32-MSA-NEXT: srai.w $w0, $w0, 31 +; MIPS32-MSA-NEXT: and.v $w1, $w0, $w1 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w2 +; MIPS32-MSA-NEXT: ld.w $w2, 0($6) +; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] + %a = load <4 x i32>, ptr %p1, align 4 + %b = load <4 x i32>, ptr %p2, align 4 + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %result +} + +; Test with stores to verify result handling +define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) { +; MIPS64-MSA-LABEL: test_ctselect_v4i32_store: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ldi.b $w0, -1 +; MIPS64-MSA-NEXT: fill.w $w1, $1 +; MIPS64-MSA-NEXT: insert.d $w2[1], $8 +; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: insert.d $w2[0], $5 +; MIPS64-MSA-NEXT: insert.d $w2[1], $6 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: st.w $w0, 0($9) +; +; MIPS32-MSA-LABEL: test_ctselect_v4i32_store: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: fill.w $w2, $4 +; MIPS32-MSA-NEXT: ldi.b $w1, -1 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: lw $1, 40($sp) +; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: st.w $w0, 0($1) + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + store <4 x i32> %result, ptr %out, align 16 + ret void +} + +; Test chained selects (multiple conditions) +define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; MIPS64-MSA-LABEL: test_ctselect_v4i32_chain: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w0[0], $8 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ldi.b $w1, -1 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: sll $1, $5, 0 +; MIPS64-MSA-NEXT: insert.d $w0[1], $9 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: xor.v $w3, $w2, $w1 +; MIPS64-MSA-NEXT: and.v $w0, $w3, $w0 +; MIPS64-MSA-NEXT: insert.d $w3[0], $6 +; MIPS64-MSA-NEXT: insert.d $w3[1], $7 +; MIPS64-MSA-NEXT: shf.w $w3, $w3, 177 +; MIPS64-MSA-NEXT: and.v $w2, $w2, $w3 +; MIPS64-MSA-NEXT: or.v $w0, $w2, $w0 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: and.v $w0, $w2, $w0 +; MIPS64-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS64-MSA-NEXT: insert.d $w2[0], $10 +; MIPS64-MSA-NEXT: insert.d $w2[1], $11 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v4i32_chain: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: fill.w $w2, $4 +; MIPS32-MSA-NEXT: ldi.b $w1, -1 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: lw $2, 40($sp) +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: xor.v $w3, $w2, $w1 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: and.v $w0, $w3, $w0 +; MIPS32-MSA-NEXT: insert.w $w3[0], $6 +; MIPS32-MSA-NEXT: insert.w $w3[1], $7 +; MIPS32-MSA-NEXT: insert.w $w3[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w3[3], $1 +; MIPS32-MSA-NEXT: lw $1, 44($sp) +; MIPS32-MSA-NEXT: and.v $w2, $w2, $w3 +; MIPS32-MSA-NEXT: or.v $w0, $w2, $w0 +; MIPS32-MSA-NEXT: fill.w $w2, $5 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: and.v $w0, $w2, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: insert.w $w2[0], $2 +; MIPS32-MSA-NEXT: insert.w $w2[1], $1 +; MIPS32-MSA-NEXT: lw $1, 48($sp) +; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: lw $1, 52($sp) +; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] + %tmp = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond1, <4 x i32> %a, <4 x i32> %b) + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond2, <4 x i32> %tmp, <4 x i32> %c) + ret <4 x i32> %result +} + +; Test with arithmetic operations (ensure float vectors work with FP ops) +define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 x float> %y) { +; MIPS64-MSA-LABEL: test_ctselect_v4f32_arithmetic: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: fill.w $w3, $1 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: slli.w $w3, $w3, 31 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 +; MIPS64-MSA-NEXT: srai.w $w3, $w3, 31 +; MIPS64-MSA-NEXT: fadd.w $w2, $w1, $w0 +; MIPS64-MSA-NEXT: fsub.w $w0, $w1, $w0 +; MIPS64-MSA-NEXT: ldi.b $w1, -1 +; MIPS64-MSA-NEXT: xor.v $w1, $w3, $w1 +; MIPS64-MSA-NEXT: and.v $w2, $w3, $w2 +; MIPS64-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: or.v $w0, $w2, $w0 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v4f32_arithmetic: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 +; MIPS32-MSA-NEXT: fill.w $w3, $5 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: slli.w $w3, $w3, 31 +; MIPS32-MSA-NEXT: srai.w $w3, $w3, 31 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: fadd.w $w2, $w1, $w0 +; MIPS32-MSA-NEXT: fsub.w $w0, $w1, $w0 +; MIPS32-MSA-NEXT: ldi.b $w1, -1 +; MIPS32-MSA-NEXT: xor.v $w1, $w3, $w1 +; MIPS32-MSA-NEXT: and.v $w2, $w3, $w2 +; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: or.v $w0, $w2, $w0 +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: st.w $w0, 0($4) + %sum = fadd <4 x float> %x, %y + %diff = fsub <4 x float> %x, %y + %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %sum, <4 x float> %diff) + ret <4 x float> %result +} + +; Test with mixed operations (load, compute, select, store) +define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) { +; MIPS64-MSA-LABEL: test_ctselect_v4i32_mixed: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ld.w $w0, 0($5) +; MIPS64-MSA-NEXT: ldi.b $w2, -1 +; MIPS64-MSA-NEXT: fill.w $w1, $1 +; MIPS64-MSA-NEXT: addvi.w $w0, $w0, 1 +; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: ld.w $w2, 0($6) +; MIPS64-MSA-NEXT: addvi.w $w2, $w2, 2 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: st.w $w0, 0($7) +; +; MIPS32-MSA-LABEL: test_ctselect_v4i32_mixed: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: ld.w $w0, 0($5) +; MIPS32-MSA-NEXT: fill.w $w1, $4 +; MIPS32-MSA-NEXT: ldi.b $w2, -1 +; MIPS32-MSA-NEXT: slli.w $w1, $w1, 31 +; MIPS32-MSA-NEXT: addvi.w $w0, $w0, 1 +; MIPS32-MSA-NEXT: srai.w $w1, $w1, 31 +; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: ld.w $w2, 0($6) +; MIPS32-MSA-NEXT: addvi.w $w2, $w2, 2 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: st.w $w0, 0($7) + %a = load <4 x i32>, ptr %p1, align 16 + %b = load <4 x i32>, ptr %p2, align 16 + %a_plus_1 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1> + %b_plus_2 = add <4 x i32> %b, <i32 2, i32 2, i32 2, i32 2> + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a_plus_1, <4 x i32> %b_plus_2) + store <4 x i32> %result, ptr %out, align 16 + ret void +} + +; Test with function arguments directly (no loads) +define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind { +; MIPS64-MSA-LABEL: test_ctselect_v4i32_args: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ldi.b $w0, -1 +; MIPS64-MSA-NEXT: fill.w $w1, $1 +; MIPS64-MSA-NEXT: insert.d $w2[1], $8 +; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: insert.d $w2[0], $5 +; MIPS64-MSA-NEXT: insert.d $w2[1], $6 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v4i32_args: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: fill.w $w2, $4 +; MIPS32-MSA-NEXT: ldi.b $w1, -1 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] + %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %result +} + +; Test with multiple uses of result +define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) { +; MIPS64-MSA-LABEL: test_ctselect_v4i32_multi_use: +; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ldi.b $w0, -1 +; MIPS64-MSA-NEXT: fill.w $w1, $1 +; MIPS64-MSA-NEXT: insert.d $w2[1], $8 +; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: insert.d $w2[0], $5 +; MIPS64-MSA-NEXT: insert.d $w2[1], $6 +; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: addv.w $w0, $w0, $w0 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] +; MIPS64-MSA-NEXT: jr $ra +; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] +; +; MIPS32-MSA-LABEL: test_ctselect_v4i32_multi_use: +; MIPS32-MSA: # %bb.0: +; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: fill.w $w2, $4 +; MIPS32-MSA-NEXT: ldi.b $w1, -1 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) +; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 +; MIPS32-MSA-NEXT: lw $1, 36($sp) +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 +; MIPS32-MSA-NEXT: lw $1, 16($sp) +; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 +; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 +; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: addv.w $w0, $w0, $w0 +; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] +; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] +; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] +; MIPS32-MSA-NEXT: jr $ra +; MIPS32-MSA-NEXT: copy_s.w $5, $w0[3] + %sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) + %add = add <4 x i32> %sel, %sel ; Use result twice + ret <4 x i32> %add +} + +declare <4 x i32> @llvm.ct.select.v4i32(i1, <4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.ct.select.v8i16(i1, <8 x i16>, <8 x i16>) +declare <16 x i8> @llvm.ct.select.v16i8(i1, <16 x i8>, <16 x i8>) +declare <2 x i64> @llvm.ct.select.v2i64(i1, <2 x i64>, <2 x i64>) +declare <4 x float> @llvm.ct.select.v4f32(i1, <4 x float>, <4 x float>) +declare <2 x double> @llvm.ct.select.v2f64(i1, <2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback.ll b/llvm/test/CodeGen/Mips/ctselect-fallback.ll new file mode 100644 index 0000000000000..d89d7fc698712 --- /dev/null +++ b/llvm/test/CodeGen/Mips/ctselect-fallback.ll @@ -0,0 +1,371 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32 +; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64 + +; Test basic ct.select functionality for scalar types +define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) { +; M32-LABEL: test_ctselect_i8: +; M32: # %bb.0: +; M32-NEXT: andi $2, $4, 1 +; M32-NEXT: xor $1, $5, $6 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 +; M32-NEXT: jr $ra +; M32-NEXT: xor $2, $1, $6 +; +; M64-LABEL: test_ctselect_i8: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: xor $2, $5, $6 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: negu $1, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: jr $ra +; M64-NEXT: xor $2, $1, $2 + %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b) + ret i8 %result +} + +define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) { +; M32-LABEL: test_ctselect_i16: +; M32: # %bb.0: +; M32-NEXT: andi $2, $4, 1 +; M32-NEXT: xor $1, $5, $6 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 +; M32-NEXT: jr $ra +; M32-NEXT: xor $2, $1, $6 +; +; M64-LABEL: test_ctselect_i16: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: xor $2, $5, $6 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: negu $1, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: jr $ra +; M64-NEXT: xor $2, $1, $2 + %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b) + ret i16 %result +} + +define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_i32: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $5 +; M32-NEXT: and $1, $1, $6 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_i32: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sll $3, $5, 0 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: negu $2, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $2, $3 +; M64-NEXT: sll $3, $6, 0 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) { +; M32-LABEL: test_ctselect_i64: +; M32: # %bb.0: +; M32-NEXT: lw $1, 16($sp) +; M32-NEXT: andi $3, $4, 1 +; M32-NEXT: negu $3, $3 +; M32-NEXT: xor $2, $6, $1 +; M32-NEXT: and $2, $2, $3 +; M32-NEXT: xor $2, $2, $1 +; M32-NEXT: lw $1, 20($sp) +; M32-NEXT: xor $4, $7, $1 +; M32-NEXT: and $3, $4, $3 +; M32-NEXT: jr $ra +; M32-NEXT: xor $3, $3, $1 +; +; M64-LABEL: test_ctselect_i64: +; M64: # %bb.0: +; M64-NEXT: andi $1, $4, 1 +; M64-NEXT: dnegu $2, $1 +; M64-NEXT: daddiu $1, $1, -1 +; M64-NEXT: and $2, $2, $5 +; M64-NEXT: and $1, $1, $6 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b) + ret i64 %result +} + +define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) { +; M32-LABEL: test_ctselect_ptr: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $5 +; M32-NEXT: and $1, $1, $6 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_ptr: +; M64: # %bb.0: +; M64-NEXT: andi $1, $4, 1 +; M64-NEXT: dnegu $2, $1 +; M64-NEXT: daddiu $1, $1, -1 +; M64-NEXT: and $2, $2, $5 +; M64-NEXT: and $1, $1, $6 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) + ret ptr %result +} + +; Test with constant conditions +define i32 @test_ctselect_const_true(i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_const_true: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: move $2, $4 +; +; M64-LABEL: test_ctselect_const_true: +; M64: # %bb.0: +; M64-NEXT: jr $ra +; M64-NEXT: sll $2, $4, 0 + %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_const_false(i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_const_false: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $zero, $5 +; +; M64-LABEL: test_ctselect_const_false: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $zero, $1 + %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b) + ret i32 %result +} + +; Test with comparison conditions +define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_icmp_eq: +; M32: # %bb.0: +; M32-NEXT: xor $1, $4, $5 +; M32-NEXT: sltu $1, $zero, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $6 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $7 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_icmp_eq: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: sll $3, $7, 0 +; M64-NEXT: xor $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: sltu $1, $zero, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $1, $2 +; M64-NEXT: not $1, $1 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %cond = icmp eq i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_icmp_ne: +; M32: # %bb.0: +; M32-NEXT: xor $1, $4, $5 +; M32-NEXT: sltiu $1, $1, 1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $6 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $7 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_icmp_ne: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: sll $3, $7, 0 +; M64-NEXT: xor $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: sltiu $1, $1, 1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $1, $2 +; M64-NEXT: not $1, $1 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %cond = icmp ne i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_icmp_slt: +; M32: # %bb.0: +; M32-NEXT: slt $1, $4, $5 +; M32-NEXT: xori $1, $1, 1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $6 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $7 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_icmp_slt: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: sll $3, $7, 0 +; M64-NEXT: slt $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: xori $1, $1, 1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $1, $2 +; M64-NEXT: not $1, $1 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %cond = icmp slt i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) { +; M32-LABEL: test_ctselect_icmp_ult: +; M32: # %bb.0: +; M32-NEXT: sltu $1, $4, $5 +; M32-NEXT: xori $1, $1, 1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $1, $6 +; M32-NEXT: not $1, $1 +; M32-NEXT: and $1, $1, $7 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_icmp_ult: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: sll $3, $7, 0 +; M64-NEXT: sltu $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: xori $1, $1, 1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $1, $2 +; M64-NEXT: not $1, $1 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %cond = icmp ult i32 %x, %y + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test with memory operands +define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) { +; M32-LABEL: test_ctselect_load: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: lw $3, 0($5) +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $3 +; M32-NEXT: lw $3, 0($6) +; M32-NEXT: and $1, $1, $3 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_ctselect_load: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: lw $3, 0($5) +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: negu $2, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $2, $3 +; M64-NEXT: lw $3, 0($6) +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %a = load i32, ptr %p1 + %b = load i32, ptr %p2 + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test nested ctselect calls +define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) { +; M32-LABEL: test_ctselect_nested: +; M32: # %bb.0: +; M32-NEXT: andi $1, $5, 1 +; M32-NEXT: andi $3, $4, 1 +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: negu $4, $3 +; M32-NEXT: and $2, $2, $6 +; M32-NEXT: and $1, $1, $7 +; M32-NEXT: or $1, $2, $1 +; M32-NEXT: addiu $2, $3, -1 +; M32-NEXT: lw $3, 16($sp) +; M32-NEXT: and $1, $4, $1 +; M32-NEXT: and $2, $2, $3 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $1, $2 +; +; M64-LABEL: test_ctselect_nested: +; M64: # %bb.0: +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $3, $6, 0 +; M64-NEXT: sll $4, $4, 0 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: andi $4, $4, 1 +; M64-NEXT: negu $2, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: negu $5, $4 +; M64-NEXT: and $2, $2, $3 +; M64-NEXT: sll $3, $7, 0 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: addiu $3, $4, -1 +; M64-NEXT: or $1, $2, $1 +; M64-NEXT: sll $2, $8, 0 +; M64-NEXT: and $1, $5, $1 +; M64-NEXT: and $2, $3, $2 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $1, $2 + %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b) + %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c) + ret i32 %result +} + +; Declare the intrinsics +declare i8 @llvm.ct.select.i8(i1, i8, i8) +declare i16 @llvm.ct.select.i16(i1, i16, i16) +declare i32 @llvm.ct.select.i32(i1, i32, i32) +declare i64 @llvm.ct.select.i64(i1, i64, i64) +declare ptr @llvm.ct.select.p0(i1, ptr, ptr) diff --git a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll new file mode 100644 index 0000000000000..6cfa07afdd51e --- /dev/null +++ b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32 +; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64 + +; Test 1: Basic optimizations should still work +define i32 @test_basic_opts(i32 %x) { +; M32-LABEL: test_basic_opts: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: move $2, $4 +; +; M64-LABEL: test_basic_opts: +; M64: # %bb.0: +; M64-NEXT: jr $ra +; M64-NEXT: sll $2, $4, 0 + %a = or i32 %x, 0 + %b = and i32 %a, -1 + %c = xor i32 %b, 0 + ret i32 %c +} + +; Test 2: Constant folding should work +define i32 @test_constant_fold() { +; M32-LABEL: test_constant_fold: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: addiu $2, $zero, 0 +; +; M64-LABEL: test_constant_fold: +; M64: # %bb.0: +; M64-NEXT: jr $ra +; M64-NEXT: addiu $2, $zero, 0 + %a = xor i32 -1, -1 ; Should fold to 0 + ret i32 %a +} + +; Test 3: Protected pattern should NOT have branches +define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) { +; M32-LABEL: test_protected_no_branch: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $2, $1 +; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: and $2, $2, $5 +; M32-NEXT: and $1, $1, $6 +; M32-NEXT: jr $ra +; M32-NEXT: or $2, $2, $1 +; +; M64-LABEL: test_protected_no_branch: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: sll $3, $5, 0 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: negu $2, $1 +; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: and $2, $2, $3 +; M64-NEXT: sll $3, $6, 0 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: jr $ra +; M64-NEXT: or $2, $2, $1 + %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) + ret i32 %result +} + +; Test 4: Explicit branch should still generate branches +define i32 @test_explicit_branch(i1 %cond, i32 %a, i32 %b) { +; M32-LABEL: test_explicit_branch: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: beqz $1, $BB3_2 +; M32-NEXT: nop +; M32-NEXT: # %bb.1: # %true +; M32-NEXT: jr $ra +; M32-NEXT: move $2, $5 +; M32-NEXT: $BB3_2: # %false +; M32-NEXT: jr $ra +; M32-NEXT: move $2, $6 +; +; M64-LABEL: test_explicit_branch: +; M64: # %bb.0: +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: beqz $1, .LBB3_2 +; M64-NEXT: nop +; M64-NEXT: # %bb.1: # %true +; M64-NEXT: jr $ra +; M64-NEXT: sll $2, $5, 0 +; M64-NEXT: .LBB3_2: # %false +; M64-NEXT: jr $ra +; M64-NEXT: sll $2, $6, 0 + br i1 %cond, label %true, label %false +true: + ret i32 %a +false: + ret i32 %b +} + +; Test 5: Regular select (not ct.select) - whatever wasm wants to do +define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) { +; M32-LABEL: test_regular_select: +; M32: # %bb.0: +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: movn $6, $5, $1 +; M32-NEXT: jr $ra +; M32-NEXT: move $2, $6 +; +; M64-LABEL: test_regular_select: +; M64: # %bb.0: +; M64-NEXT: sll $3, $4, 0 +; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: andi $3, $3, 1 +; M64-NEXT: jr $ra +; M64-NEXT: movn $2, $1, $3 + %result = select i1 %cond, i32 %a, i32 %b + ret i32 %result +} + +; Test if XOR with all-ones still gets optimized +define i32 @test_xor_all_ones() { +; M32-LABEL: test_xor_all_ones: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: addiu $2, $zero, 0 +; +; M64-LABEL: test_xor_all_ones: +; M64: # %bb.0: +; M64-NEXT: jr $ra +; M64-NEXT: addiu $2, $zero, 0 + %xor1 = xor i32 -1, -1 ; Should optimize to 0 + ret i32 %xor1 +} + +define i32 @test_xor_same_value(i32 %x) { +; M32-LABEL: test_xor_same_value: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: addiu $2, $zero, 0 +; +; M64-LABEL: test_xor_same_value: +; M64: # %bb.0: +; M64-NEXT: jr $ra +; M64-NEXT: addiu $2, $zero, 0 + %xor2 = xor i32 %x, %x ; Should optimize to 0 + ret i32 %xor2 +} + +define i32 @test_normal_ops(i32 %x) { +; M32-LABEL: test_normal_ops: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: move $2, $4 +; +; M64-LABEL: test_normal_ops: +; M64: # %bb.0: +; M64-NEXT: jr $ra +; M64-NEXT: sll $2, $4, 0 + %or1 = or i32 %x, 0 ; Should optimize to %x + %and1 = and i32 %or1, -1 ; Should optimize to %x + %xor1 = xor i32 %and1, 0 ; Should optimize to %x + ret i32 %xor1 +} + +; This simulates what the reviewer is worried about +define i32 @test_xor_with_const_operands() { +; M32-LABEL: test_xor_with_const_operands: +; M32: # %bb.0: +; M32-NEXT: jr $ra +; M32-NEXT: addiu $2, $zero, 0 +; +; M64-LABEL: test_xor_with_const_operands: +; M64: # %bb.0: +; M64-NEXT: jr $ra +; M64-NEXT: addiu $2, $zero, 0 + %a = xor i32 -1, -1 + %b = xor i32 0, 0 + %c = xor i32 42, 42 + %result = or i32 %a, %b + %final = or i32 %result, %c + ret i32 %final ; Should optimize to 0 +} + +declare i32 @llvm.ct.select.i32(i1, i32, i32) _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
