Hi! The following testcase is miscompiled on s390x-linux with e.g. -march=z13 (both -O0 and -O2) starting with r15-7053. The problem is in the splitters which emulate TImode/V1TImode GT and GTU comparisons. For GT we want to do (ior (gt (hi op1) (hi op2)) (and (eq (hi op1) (hi op2)) (gtu (lo op1) (lo op2)))) and for GTU similarly except for gtu instead of gt in there. Now, the splitter emulation is using V2DImode comparisons where on s390x the hi part is in the first element of the vector, lo part in the second, and for the gtu case it swaps the elements of the vector. So, we get the right result in the first element of the result vector. But vrepg was then broadcasting the second element of the result vector rather than the first, and the value of the second element of the vector is instead (ior (gt (lo op1) (lo op2)) (and (eq (lo op1) (lo op2)) (gtu (hi op1) (hi op2)))) so something not really usable for the emulated comparison.
The following patch fixes that. The testcase tries to test behavior of double-word smin/smax/umin/umax with various cases of the halves of both operands (one that is sometimes EQ, sometimes GT, sometimes LT, sometimes GTU, sometimes LTU). Stefan has successfully bootstrapped/regtested this on s390x-linux (thanks for that; I'm still in stage3 of LTO profiledbootstrap), ok for trunk? 2025-01-30 Jakub Jelinek <ja...@redhat.com> Stefan Schulze Frielinghaus <stefa...@gcc.gnu.org> PR target/118696 * config/s390/vector.md (*vec_cmpgt<mode><mode>_nocc_emu, *vec_cmpgtu<mode><mode>_nocc_emu): Duplicate the first rather than second V2DImode element. * gcc.dg/pr118696.c: New test. * gcc.target/s390/vector/pr118696.c: New test. * gcc.target/s390/vector/vec-abs-emu.c: Expect vrepg with 0 as last operand rather than 1. * gcc.target/s390/vector/vec-max-emu.c: Likewise. * gcc.target/s390/vector/vec-min-emu.c: Likewise. --- gcc/config/s390/vector.md.jj 2025-01-24 17:37:48.987458141 +0100 +++ gcc/config/s390/vector.md 2025-01-30 09:10:53.413542300 +0100 @@ -2166,7 +2166,7 @@ (define_insn_and_split "*vec_cmpgt<mode> (vec_duplicate:V2DI (vec_select:DI (match_dup 4) - (parallel [(const_int 1)])))) + (parallel [(const_int 0)])))) (set (match_dup 0) (subreg:<MODE> (match_dup 4) 0))] { @@ -2198,7 +2198,7 @@ (define_insn_and_split "*vec_cmpgtu<mode (vec_duplicate:V2DI (vec_select:DI (match_dup 4) - (parallel [(const_int 1)])))) + (parallel [(const_int 0)])))) (set (match_dup 0) (subreg:<MODE> (match_dup 4) 0))] { --- gcc/testsuite/gcc.dg/pr118696.c.jj 2025-01-30 09:52:52.064679434 +0100 +++ gcc/testsuite/gcc.dg/pr118696.c 2025-01-30 09:52:33.430936447 +0100 @@ -0,0 +1,131 @@ +/* PR target/118696 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#if __CHAR_BIT__ == 8 +#if __SIZEOF_INT128__ == 16 && __SIZEOF_LONG_LONG__ == 8 +#define D __int128 +#define S long long +#define M 0x8000000000000000ULL +#define C 64 +#elif __SIZEOF_LONG_LONG__ == 8 && __SIZEOF_INT__ == 4 +#define D long long +#define S int +#define M 0x80000000U +#define C 32 +#endif +#endif + +#ifdef D +static inline D +combine (unsigned S x, unsigned S y) +{ + return (unsigned D) x << C | y; +} + +__attribute__((noipa)) D +smin (D x, D y) +{ + return x < y ? x : y; +} + +__attribute__((noipa)) D +smax (D x, D y) +{ + return x > y ? x : y; +} + +__attribute__((noipa)) unsigned D +umin (unsigned D x, unsigned D y) +{ + return x < y ? x : y; +} + +__attribute__((noipa)) unsigned D +umax (unsigned D x, unsigned D y) +{ + return x > y ? x : y; +} +#endif + +int +main () +{ +#ifdef D + unsigned S vals[] = { + 0, 12, 42, M, M | 12, M | 42 + }; + unsigned char expected[] = { + 4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3, + 3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,4,3,3,3,3,3,3, + 3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,4,3, + 3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, + 0,0,0,4,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3, + 3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, + 0,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,4,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,4,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,4,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,4, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,4,3,3,3,3,3,3,3, + 3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,4,3,3, + 3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, + 0,0,4,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0, + 0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 4,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,4,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,4,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4 + }; + int m = 0; + for (int i = 0; i < 6; ++i) + for (int j = 0; j < 6; ++j) + for (int k = 0; k < 6; ++k) + for (int l = 0; l < 6; ++l) + { + D a = combine (vals[i], vals[j]); + D b = combine (vals[k], vals[l]); + int r = 0; + r |= (smin (a, b) == a) * 1; + r |= (smin (a, b) == b) * 2; + r |= (smax (a, b) == a) * 4; + r |= (smax (a, b) == b) * 8; + r |= (umin (a, b) == a) * 16; + r |= (umin (a, b) == b) * 32; + r |= (umax (a, b) == a) * 64; + r |= (umax (a, b) == b) * 128; + switch (r) + { + case 102: r = 0; break; + case 105: r = 1; break; + case 150: r = 2; break; + case 153: r = 3; break; + case 255: r = 4; break; + default: __builtin_abort (); + } + if (r != expected[m++]) + __builtin_abort (); + } +#endif +} --- gcc/testsuite/gcc.target/s390/vector/pr118696.c.jj 2025-01-30 09:55:53.134181945 +0100 +++ gcc/testsuite/gcc.target/s390/vector/pr118696.c 2025-01-30 09:55:47.433260572 +0100 @@ -0,0 +1,5 @@ +/* PR target/118696 */ +/* { dg-do run } */ +/* { dg-options "-O2 -mzarch -march=z13" } */ + +#include "../../../gcc.dg/pr118696.c" --- gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c.jj 2025-01-20 10:23:38.572601427 +0100 +++ gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c 2025-01-30 16:58:51.853928339 +0100 @@ -18,7 +18,7 @@ typedef __attribute__ ((vector_size (16) ** vchg %v[0-9]+,%v[0-9]+,%v[0-9]+ ** vn %v[0-9]+,%v[0-9]+,%v[0-9]+ ** vo %v[0-9]+,%v[0-9]+,%v[0-9]+ -** vrepg %v[0-9]+,%v[0-9]+,1 +** vrepg %v[0-9]+,%v[0-9]+,0 ** vsq %v[0-9]+,%v[0-9]+,%v[0-9]+ ** vsel %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+ ** br %r14 --- gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c.jj 2025-01-20 10:23:38.572601427 +0100 +++ gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c 2025-01-30 16:59:06.279729311 +0100 @@ -17,7 +17,7 @@ typedef __attribute__ ((vector_size (16) ** vchg %v[0-9]+,%v[0-9]+,%v[0-9]+ ** vn %v[0-9]+,%v[0-9]+,%v[0-9]+ ** vo %v[0-9]+,%v[0-9]+,%v[0-9]+ -** vrepg %v[0-9]+,%v[0-9]+,1 +** vrepg %v[0-9]+,%v[0-9]+,0 ** vsel %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+ ** br %r14 */ --- gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c.jj 2025-01-20 10:23:38.572601427 +0100 +++ gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c 2025-01-30 16:59:00.054815191 +0100 @@ -17,7 +17,7 @@ typedef __attribute__ ((vector_size (16) ** vchg %v[0-9]+,%v[0-9]+,%v[0-9]+ ** vn %v[0-9]+,%v[0-9]+,%v[0-9]+ ** vo %v[0-9]+,%v[0-9]+,%v[0-9]+ -** vrepg %v[0-9]+,%v[0-9]+,1 +** vrepg %v[0-9]+,%v[0-9]+,0 ** vsel %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+ ** br %r14 */ Jakub