Hi Roger! If you don't mind, I could use your help here (but: low priority!):
On 2024-07-27T19:18:35+0100, "Roger Sayle" <ro...@nextmovesoftware.com> wrote: > Previously, for isnormal, GCC -O2 would generate: [...] > and with this patch becomes: > > mov.f64 %r23, %ar0; > setp.neu.f64 %r24, %r23, 0d0000000000000000; > testp.normal.f64 %r25, %r23; > and.pred %r26, %r24, %r25; > selp.u32 %value, 1, 0, %r26; Looking at this, shouldn't we be able to optimize ("combine") this into somethink like (untested): mov.f64 %r23, %ar0; testp.normal.f64 %r25, %r23; setp.neu.and.f64 %r26, %r23, 0d0000000000000000, %r25; selp.u32 %value, 1, 0, %r26; (I hope I correctly understood PTX 'setp', 'combine [...] with a predicate value by applying a Boolean operator'!) That is, "combine": CmpOp = { eq, ne, lt, le, gt, ge, lo, ls, hi, hs, equ, neu, ltu, leu, gtu, geu, num, nan }; BoolOp = { and, or, xor }; setp.CmpOp.TYPE %3, %2, %1; BoolOp.pred %5, %3, %4 ... into: setp.CmpOp.BoolOp.TYPE %5, %2, %1, %4; I tried adding a corresponding 'define_insn' for just the 'and' case at hand (eventually to be generalized to 'BoolOp'), see the attached "WIP nvptx: 'setp', 'combine [...] with a predicate value by applying a Boolean operator'". This does do the expected transformation for quite a number of instances in the GCC/nvptx target libraries (again: completely untested!) -- but it doesn't for the new 'gcc.target/nvptx/isnormal.c', and I don't know how to read '-fdump-rtl-combine-all', to understand, why. Any "RTFM" or other pointers gladly accepted, guidance about how to approach such an issue. (Or tell me it's just 'TARGET_RTX_COSTS'...) Grüße Thomas > --- a/gcc/config/nvptx/nvptx.md > +++ b/gcc/config/nvptx/nvptx.md > +(define_insn "setcc_isnormal<mode>" > + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") > + (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] > + UNSPEC_ISNORMAL))] > + "" > + "%.\\ttestp.normal%t1\\t%0, %1;") > + > +(define_expand "isnormal<mode>2" > + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") > + (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] > + UNSPEC_ISNORMAL))] > + "" > +{ > + rtx pred1 = gen_reg_rtx (BImode); > + rtx pred2 = gen_reg_rtx (BImode); > + rtx pred3 = gen_reg_rtx (BImode); > + rtx zero = CONST0_RTX (<MODE>mode); > + rtx cmp = gen_rtx_fmt_ee (NE, BImode, operands[1], zero); > + emit_insn (gen_cmp<mode> (pred1, cmp, operands[1], zero)); > + emit_insn (gen_setcc_isnormal<mode> (pred2, operands[1])); > + emit_insn (gen_andbi3 (pred3, pred1, pred2)); > + emit_insn (gen_setccsi_from_bi (operands[0], pred3)); > + DONE; > +}) > --- /dev/null > +++ b/gcc/testsuite/gcc.target/nvptx/isnormal.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +int isnormal(double x) > +{ > + return __builtin_isnormal(x); > +} > + > +/* { dg-final { scan-assembler-times "testp.normal.f64" 1 } } */
>From c4c389a6bd262356023202adab08a48f044e59b2 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge <tschwi...@baylibre.com> Date: Fri, 27 Sep 2024 15:14:19 +0200 Subject: [PATCH] WIP nvptx: 'setp', 'combine [...] with a predicate value by applying a Boolean operator' Re "Implement isfinite and isnormal optabs in nvptx.md" mov.f64 %r23, %ar0; setp.neu.f64 %r24, %r23, 0d0000000000000000; testp.normal.f64 %r25, %r23; and.pred %r26, %r24, %r25; selp.u32 %value, 1, 0, %r26; Can we optimize this into somethink like (untested): mov.f64 %r23, %ar0; testp.normal.f64 %r25, %r23; setp.neu.and.f64 %r26, %r23, 0d0000000000000000, %r25; selp.u32 %value, 1, 0, %r26; That is, "combine": CmpOp = { eq, ne, lt, le, gt, ge, lo, ls, hi, hs, equ, neu, ltu, leu, gtu, geu, num, nan }; BoolOp = { and, or, xor }; setp.CmpOp.TYPE %3, %2, %1; BoolOp.pred %5, %3, %4 ..., into: setp.CmpOp.BoolOp.TYPE %5, %2, %1, %4; --- gcc/config/nvptx/nvptx.cc | 3 +++ gcc/config/nvptx/nvptx.md | 23 ++++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 96a1134220e..b4c4f9ff021 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -3080,6 +3080,9 @@ nvptx_print_operand (FILE *file, rtx x, int code) default: gcc_unreachable (); } + break; + case /*TODO*/ 'C': + mode = GET_MODE (XEXP (x, 0)); if (FLOAT_MODE_P (mode) || x_code == EQ || x_code == NE || x_code == GEU || x_code == GTU diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index ae711bbd250..ce2603eeccb 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -881,13 +881,22 @@ ;; Comparisons and branches +(define_insn "" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (and:BI (match_operator:BI 1 "nvptx_comparison_operator" + [(match_operand:HSDIM 2 "nvptx_register_operand" "R") + (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]) + (match_operand:BI 4 "nvptx_register_operand" "R")))] + "" + "%.\\tsetp%c1.and%C1\\t%0, %2, %3, %4;") + (define_insn "cmp<mode>" [(set (match_operand:BI 0 "nvptx_register_operand" "=R") (match_operator:BI 1 "nvptx_comparison_operator" [(match_operand:HSDIM 2 "nvptx_register_operand" "R") (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] "" - "%.\\tsetp%c1\\t%0, %2, %3;") + "%.\\tsetp%c1%C1\\t%0, %2, %3;") (define_insn "cmp<mode>" [(set (match_operand:BI 0 "nvptx_register_operand" "=R") @@ -895,7 +904,7 @@ [(match_operand:SDFM 2 "nvptx_register_operand" "R") (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] "" - "%.\\tsetp%c1\\t%0, %2, %3;") + "%.\\tsetp%c1%C1\\t%0, %2, %3;") (define_insn "*cmphf" [(set (match_operand:BI 0 "nvptx_register_operand" "=R") @@ -903,7 +912,7 @@ [(match_operand:HF 2 "nvptx_register_operand" "R") (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))] "TARGET_SM53" - "%.\\tsetp%c1\\t%0, %2, %3;") + "%.\\tsetp%c1%C1\\t%0, %2, %3;") (define_insn "jump" [(set (pc) @@ -1095,7 +1104,7 @@ [(match_operand:HSDIM 2 "nvptx_register_operand" "R") (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))] "" - "%.\\tset%t0%c1\\t%0, %2, %3;") + "%.\\tset%t0%c1%C1\\t%0, %2, %3;") (define_insn "*setcc_int<mode>" [(set (match_operand:SI 0 "nvptx_register_operand" "=R") @@ -1104,7 +1113,7 @@ [(match_operand:SDFM 2 "nvptx_register_operand" "R") (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))] "" - "%.\\tset%t0%c1\\t%0, %2, %3;") + "%.\\tset%t0%c1%C1\\t%0, %2, %3;") (define_insn "setcc_float<mode>" [(set (match_operand:SF 0 "nvptx_register_operand" "=R") @@ -1112,7 +1121,7 @@ [(match_operand:HSDIM 2 "nvptx_register_operand" "R") (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] "" - "%.\\tset%t0%c1\\t%0, %2, %3;") + "%.\\tset%t0%c1%C1\\t%0, %2, %3;") (define_insn "setcc_float<mode>" [(set (match_operand:SF 0 "nvptx_register_operand" "=R") @@ -1120,7 +1129,7 @@ [(match_operand:SDFM 2 "nvptx_register_operand" "R") (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] "" - "%.\\tset%t0%c1\\t%0, %2, %3;") + "%.\\tset%t0%c1%C1\\t%0, %2, %3;") (define_expand "cstore<mode>4" [(set (match_operand:SI 0 "nvptx_register_operand") -- 2.34.1