In legacy AMX, we use inline assembly for intrins since we are
not doing tile register allocation. For ACE, we will try a different
way.

Although we are not doing tile register allocation for now since
there is no convenient way for register spill, which is a must for
register allocation, we are going to make it convenient for future
implementation for potential register allocation. Thus, we will
introduce a fake tmm register.

We will use internal pattern instead of inline assembly wrapper
to support ACE ISAs. This will help compiler know the dependency on
insts. In this case, compiler will rely on users to pass tmm
register number and do the register allocation themselves. Since in
ACE, tmm is an accumulation unit rather than a calculation unit,
allocation by user is acceptable.

gcc/ChangeLog:

        * config/i386/i386.cc (regclass_map): Add tmm.
        (debugger64_register_map): Ditto.
        (debugger_register_map): Ditto. Also fix for APX r16-31.
        (svr4_debugger_register_map): Ditto.
        (ix86_conditional_register_usage): Enable tmm only when AMX
        or ACE exists.
        * config/i386/i386.h (FIXED_REGISTERS): Ditto.
        (CALL_USED_REGISTERS): Ditto.
        (REG_ALLOC_ORDER): Ditto.
        (HI_REGISTER_NAMES): Ditto.
        * config/i386/i386.md: Ditto.

gcc/testsuite/Changelog:

        * gcc.target/i386/pr124407-1.c: Adjust fragile testcase.

Co-authored-by: Dipesh Sharma <[email protected]>
---
 gcc/config/i386/i386.cc                    | 30 +++++++++++++++++++---
 gcc/config/i386/i386.h                     | 13 +++++++---
 gcc/config/i386/i386.md                    |  3 ++-
 gcc/testsuite/gcc.target/i386/pr124407-1.c |  2 +-
 4 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e66958db7ac..9cdd0138104 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -177,6 +177,8 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  /* TMM fake register placeholder */
+  NO_REGS,
 };
 
 /* The "default" register map used in 32bit mode.  */
@@ -207,7 +209,14 @@ unsigned int const 
debugger_register_map[FIRST_PSEUDO_REGISTER] =
   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
   /* Mask registers */
-  93, 94, 95, 96, 97, 98, 99, 100
+  93, 94, 95, 96, 97, 98, 99, 100,
+  /* APX r16-r31 */
+  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
+  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
+  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
+  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
+  /* TMM fake register placeholder */
+  INVALID_REGNUM,
 };
 
 /* The "default" register map used in 64bit mode.  */
@@ -237,7 +246,9 @@ unsigned int const 
debugger64_register_map[FIRST_PSEUDO_REGISTER] =
   118, 119, 120, 121, 122, 123, 124, 125,
   /* rex2 extend integer registers */
   130, 131, 132, 133, 134, 135, 136, 137,
-  138, 139, 140, 141, 142, 143, 144, 145
+  138, 139, 140, 141, 142, 143, 144, 145,
+  /* tmm fake register placeholder */
+  IGNORED_DWARF_REGNUM,
 };
 
 /* Define the register numbers to be used in Dwarf debugging information.
@@ -320,7 +331,14 @@ unsigned int const 
svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
   /* Mask registers */
-  93, 94, 95, 96, 97, 98, 99, 100
+  93, 94, 95, 96, 97, 98, 99, 100,
+  /* APX r16-r31 */
+  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
+  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
+  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
+  INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
+  /* TMM fake register placeholder */
+  INVALID_REGNUM,
 };
 
 /* Define parameter passing and return registers.  */
@@ -571,6 +589,12 @@ ix86_conditional_register_usage (void)
       for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
        CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     }
+
+  /* If AMX-TILE or ACEV1 is disabled, disable tmm registers.  */
+  if (! (TARGET_AMX_TILE || TARGET_ACEV1))
+    {
+      CLEAR_HARD_REG_BIT (accessible_reg_set, TMM_REGNUM);
+    }
 }
 
 /* Canonicalize a comparison from one we don't have to one we do have.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 1b8785d7872..6e0460ccc56 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1027,7 +1027,9 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
 /*  r16,  r17, r18, r19, r20, r21, r22, r23*/                  \
      0,   0,   0,   0,   0,   0,   0,   0,                     \
 /*  r24,  r25, r26, r27, r28, r29, r30, r31*/                  \
-     0,   0,   0,   0,   0,   0,   0,   0}                     \
+     0,   0,   0,   0,   0,   0,   0,   0,                     \
+/*  tmm*/                                                      \
+     1}                                                                \
 
 /* 1 for registers not available across function calls.
    These must include the FIXED_REGISTERS and also any
@@ -1068,7 +1070,9 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
 /*  r16,  r17, r18, r19, r20, r21, r22, r23*/                  \
      1,   1,   1,   1,   1,   1,   1,   1,                     \
 /*  r24,  r25, r26, r27, r28, r29, r30, r31*/                  \
-     1,   1,   1,   1,   1,   1,   1,   1}                     \
+     1,   1,   1,   1,   1,   1,   1,   1,                     \
+/*  tmm*/                                                      \
+     1}                                                                \
 
 /* Order in which to allocate registers.  Each register must be
    listed once, even those in FIXED_REGISTERS.  List frame pointer
@@ -1085,7 +1089,7 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,      \
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,      \
   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,      \
-  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91}
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92}
 
 /* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
    to be rearranged based on a particular function.  When using sse math,
@@ -2107,7 +2111,8 @@ do {                                                      
\
  "xmm28", "xmm29", "xmm30", "xmm31",                                   \
  "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7",                       \
  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",               \
- "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" }
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",               \
+ ""}
 
 #define REGISTER_NAMES HI_REGISTER_NAMES
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f173423b334..65263a3d34b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -508,7 +508,8 @@
    (R29_REG                    89)
    (R30_REG                    90)
    (R31_REG                    91)
-   (FIRST_PSEUDO_REG           92)
+   (TMM_REGNUM                 92)
+   (FIRST_PSEUDO_REG           93)
   ])
 
 ;; Insn callee abi index.
diff --git a/gcc/testsuite/gcc.target/i386/pr124407-1.c 
b/gcc/testsuite/gcc.target/i386/pr124407-1.c
index e8fb0dd7872..3065bfae7aa 100644
--- a/gcc/testsuite/gcc.target/i386/pr124407-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr124407-1.c
@@ -14,5 +14,5 @@ foo()
   v /= f;
 }
 
-/* { dg-final { scan-rtl-dump {\(set \(reg:V16QI 125\)} "x86_cse" { target { ! 
ia32 } } } } */
+/* { dg-final { scan-rtl-dump {\(set \(reg:V16QI [0-9]+\)} "x86_cse" { target 
{ ! ia32 } } } } */
 /* { dg-final { scan-rtl-dump {\(const_int 0 \[0\]\) repeated x16} "x86_cse" { 
target { ! ia32 } } } } */
-- 
2.31.1

Reply via email to