Hi Roger!

If you don't mind, I could use your help here (but: low priority!):

On 2024-07-27T19:18:35+0100, "Roger Sayle" <ro...@nextmovesoftware.com> wrote:
> Previously, for isnormal, GCC -O2 would generate: [...]
> and with this patch becomes:
>
>                 mov.f64 %r23, %ar0;
>                 setp.neu.f64    %r24, %r23, 0d0000000000000000;
>                 testp.normal.f64        %r25, %r23;
>                 and.pred        %r26, %r24, %r25;
>                 selp.u32        %value, 1, 0, %r26;

Looking at this, shouldn't we be able to optimize ("combine") this into
somethink like (untested):

    mov.f64 %r23, %ar0;
    testp.normal.f64        %r25, %r23;
    setp.neu.and.f64    %r26, %r23, 0d0000000000000000, %r25;
    selp.u32        %value, 1, 0, %r26;

(I hope I correctly understood PTX 'setp', 'combine [...] with a
predicate value by applying a Boolean operator'!)

That is, "combine":

    CmpOp = { eq, ne, lt, le, gt, ge, lo, ls, hi, hs, equ, neu, ltu, leu, gtu, 
geu, num, nan };

    BoolOp = { and, or, xor };

    setp.CmpOp.TYPE %3, %2, %1;
    BoolOp.pred %5, %3, %4

... into:

    setp.CmpOp.BoolOp.TYPE %5, %2, %1, %4;

I tried adding a corresponding 'define_insn' for just the 'and' case at
hand (eventually to be generalized to 'BoolOp'), see the attached
"WIP nvptx: 'setp', 'combine [...] with a predicate value by applying a Boolean 
operator'".
This does do the expected transformation for quite a number of instances
in the GCC/nvptx target libraries (again: completely untested!) -- but it
doesn't for the new 'gcc.target/nvptx/isnormal.c', and I don't know how
to read '-fdump-rtl-combine-all', to understand, why.  Any "RTFM" or
other pointers gladly accepted, guidance about how to approach such an
issue.  (Or tell me it's just 'TARGET_RTX_COSTS'...)


Grüße
 Thomas


> --- a/gcc/config/nvptx/nvptx.md
> +++ b/gcc/config/nvptx/nvptx.md

> +(define_insn "setcc_isnormal<mode>"
> +  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
> +     (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
> +                UNSPEC_ISNORMAL))]
> +  ""
> +  "%.\\ttestp.normal%t1\\t%0, %1;")
> +
> +(define_expand "isnormal<mode>2"
> +  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
> +     (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
> +                UNSPEC_ISNORMAL))]
> +  ""
> +{
> +  rtx pred1 = gen_reg_rtx (BImode);
> +  rtx pred2 = gen_reg_rtx (BImode);
> +  rtx pred3 = gen_reg_rtx (BImode);
> +  rtx zero = CONST0_RTX (<MODE>mode);
> +  rtx cmp = gen_rtx_fmt_ee (NE, BImode, operands[1], zero);
> +  emit_insn (gen_cmp<mode> (pred1, cmp, operands[1], zero));
> +  emit_insn (gen_setcc_isnormal<mode> (pred2, operands[1]));
> +  emit_insn (gen_andbi3 (pred3, pred1, pred2));
> +  emit_insn (gen_setccsi_from_bi (operands[0], pred3));
> +  DONE;
> +})

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/nvptx/isnormal.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +int isnormal(double x)
> +{
> +  return __builtin_isnormal(x);
> +}
> +
> +/* { dg-final { scan-assembler-times "testp.normal.f64" 1 } } */


>From c4c389a6bd262356023202adab08a48f044e59b2 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <tschwi...@baylibre.com>
Date: Fri, 27 Sep 2024 15:14:19 +0200
Subject: [PATCH] WIP nvptx: 'setp', 'combine [...] with a predicate value by
 applying a Boolean operator'

Re "Implement isfinite and isnormal optabs in nvptx.md"

    mov.f64 %r23, %ar0;
    setp.neu.f64    %r24, %r23, 0d0000000000000000;
    testp.normal.f64        %r25, %r23;
    and.pred        %r26, %r24, %r25;
    selp.u32        %value, 1, 0, %r26;

Can we optimize this into somethink like (untested):

    mov.f64 %r23, %ar0;
    testp.normal.f64        %r25, %r23;
    setp.neu.and.f64    %r26, %r23, 0d0000000000000000, %r25;
    selp.u32        %value, 1, 0, %r26;

That is, "combine":

    CmpOp = { eq, ne, lt, le, gt, ge, lo, ls, hi, hs, equ, neu, ltu, leu, gtu, geu, num, nan };

    BoolOp = { and, or, xor };

    setp.CmpOp.TYPE %3, %2, %1;
    BoolOp.pred %5, %3, %4

..., into:

    setp.CmpOp.BoolOp.TYPE %5, %2, %1, %4;
---
 gcc/config/nvptx/nvptx.cc |  3 +++
 gcc/config/nvptx/nvptx.md | 23 ++++++++++++++++-------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 96a1134220e..b4c4f9ff021 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -3080,6 +3080,9 @@ nvptx_print_operand (FILE *file, rtx x, int code)
 	default:
 	  gcc_unreachable ();
 	}
+      break;
+    case /*TODO*/ 'C':
+      mode = GET_MODE (XEXP (x, 0));
       if (FLOAT_MODE_P (mode)
 	  || x_code == EQ || x_code == NE
 	  || x_code == GEU || x_code == GTU
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index ae711bbd250..ce2603eeccb 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -881,13 +881,22 @@
 
 ;; Comparisons and branches
 
+(define_insn ""
+  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
+	(and:BI (match_operator:BI 1 "nvptx_comparison_operator"
+		   [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
+		    (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])
+		(match_operand:BI 4 "nvptx_register_operand" "R")))]
+  ""
+  "%.\\tsetp%c1.and%C1\\t%0, %2, %3, %4;")
+
 (define_insn "cmp<mode>"
   [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
 	(match_operator:BI 1 "nvptx_comparison_operator"
 	   [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
 	    (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
   ""
-  "%.\\tsetp%c1\\t%0, %2, %3;")
+  "%.\\tsetp%c1%C1\\t%0, %2, %3;")
 
 (define_insn "cmp<mode>"
   [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
@@ -895,7 +904,7 @@
 	   [(match_operand:SDFM 2 "nvptx_register_operand" "R")
 	    (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
   ""
-  "%.\\tsetp%c1\\t%0, %2, %3;")
+  "%.\\tsetp%c1%C1\\t%0, %2, %3;")
 
 (define_insn "*cmphf"
   [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
@@ -903,7 +912,7 @@
 	   [(match_operand:HF 2 "nvptx_register_operand" "R")
 	    (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))]
   "TARGET_SM53"
-  "%.\\tsetp%c1\\t%0, %2, %3;")
+  "%.\\tsetp%c1%C1\\t%0, %2, %3;")
 
 (define_insn "jump"
   [(set (pc)
@@ -1095,7 +1104,7 @@
 	    [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
 	     (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))]
   ""
-  "%.\\tset%t0%c1\\t%0, %2, %3;")
+  "%.\\tset%t0%c1%C1\\t%0, %2, %3;")
 
 (define_insn "*setcc_int<mode>"
   [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
@@ -1104,7 +1113,7 @@
 	    [(match_operand:SDFM 2 "nvptx_register_operand" "R")
 	     (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))]
   ""
-  "%.\\tset%t0%c1\\t%0, %2, %3;")
+  "%.\\tset%t0%c1%C1\\t%0, %2, %3;")
 
 (define_insn "setcc_float<mode>"
   [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
@@ -1112,7 +1121,7 @@
 	   [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
 	    (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
   ""
-  "%.\\tset%t0%c1\\t%0, %2, %3;")
+  "%.\\tset%t0%c1%C1\\t%0, %2, %3;")
 
 (define_insn "setcc_float<mode>"
   [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
@@ -1120,7 +1129,7 @@
 	   [(match_operand:SDFM 2 "nvptx_register_operand" "R")
 	    (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
   ""
-  "%.\\tset%t0%c1\\t%0, %2, %3;")
+  "%.\\tset%t0%c1%C1\\t%0, %2, %3;")
 
 (define_expand "cstore<mode>4"
   [(set (match_operand:SI 0 "nvptx_register_operand")
-- 
2.34.1

Reply via email to