[PATCH 04/28] rs6000: Add helper functions for parsing

2020-06-17 Thread Bill Schmidt via Gcc-patches
2020-06-17  Bill Schmidt  

* config/rs6000/rs6000-gen-builtins.c (MININT): New defined
constant.
(exit_codes): New enum.
(consume_whitespace): New function.
(advance_line): Likewise.
(safe_inc_pos): Likewise.
(match_identifier): Likewise.
(match_integer): Likewise.
(match_to_right_bracket): Likewise.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 121 
 1 file changed, 121 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index db7afa31f87..6b38a7f6101 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -141,6 +141,10 @@ along with GCC; see the file COPYING3.  If not see
 #include 
 #include 
 
+/* Used as a sentinel for range constraints on integer fields.  No field can
+   be 32 bits wide, so this is a safe sentinel value.  */
+#define MININT INT32_MIN
+
 /* Input and output file descriptors and pathnames.  */
 static FILE *bif_file;
 static FILE *ovld_file;
@@ -163,6 +167,11 @@ static char linebuf[LINELEN];
 static int line;
 static int pos;
 
+/* Exit codes for the shell.  */
+enum exit_codes {
+  EC_INTERR
+};
+
 /* Pointer to a diagnostic function.  */
 void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2)))
   = NULL;
@@ -188,3 +197,115 @@ ovld_diag (const char * fmt, ...)
   va_end (args);
 }
 
+/* Pass over unprintable characters and whitespace (other than a newline,
+   which terminates the scan).  */
+static void
+consume_whitespace ()
+{
+  while (pos < LINELEN && isspace(linebuf[pos]) && linebuf[pos] != '\n')
+pos++;
+  return;
+}
+
+/* Get the next nonblank, noncomment line, returning 0 on EOF, 1 otherwise.  */
+static int
+advance_line (FILE *file)
+{
+  while (1)
+{
+  /* Read ahead one line and check for EOF.  */
+  if (!fgets (linebuf, sizeof(linebuf), file))
+   return 0;
+  line++;
+  pos = 0;
+  consume_whitespace ();
+  if (linebuf[pos] != '\n' && linebuf[pos] != ';')
+   return 1;
+}
+}
+
+static inline void
+safe_inc_pos ()
+{
+  if (pos++ >= LINELEN)
+{
+  (*diag) ("line length overrun.\n");
+  exit (EC_INTERR);
+}
+}
+
+/* Match an identifier, returning NULL on failure, else a pointer to a
+   buffer containing the identifier.  */
+static char *
+match_identifier ()
+{
+  int lastpos = pos - 1;
+  while (isalnum (linebuf[lastpos + 1]) || linebuf[lastpos + 1] == '_')
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return 0;
+
+  char *buf = (char *) malloc (lastpos - pos + 2);
+  memcpy (buf, [pos], lastpos - pos + 1);
+  buf[lastpos - pos + 1] = '\0';
+
+  pos = lastpos + 1;
+  return buf;
+}
+
+/* Match an integer and return its value, or MININT on failure.  */
+static int
+match_integer ()
+{
+  int startpos = pos;
+  if (linebuf[pos] == '-')
+safe_inc_pos ();
+
+  int lastpos = pos - 1;
+  while (isdigit (linebuf[lastpos + 1]))
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun in match_integer.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return MININT;
+
+  pos = lastpos + 1;
+  char *buf = (char *) malloc (lastpos - startpos + 2);
+  memcpy (buf, [startpos], lastpos - startpos + 1);
+  buf[lastpos - startpos + 1] = '\0';
+
+  int x;
+  sscanf (buf, "%d", );
+  return x;
+}
+
+static char *
+match_to_right_bracket ()
+{
+  int lastpos = pos - 1;
+  while (linebuf[lastpos + 1] != ']')
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return 0;
+
+  char *buf = (char *) malloc (lastpos - pos + 2);
+  memcpy (buf, [pos], lastpos - pos + 1);
+  buf[lastpos - pos + 1] = '\0';
+
+  pos = lastpos + 1;
+  return buf;
+}
+
-- 
2.17.1



[PATCH 02/28] rs6000: Add initial input files

2020-06-17 Thread Bill Schmidt via Gcc-patches
This patch adds a tiny subset of the built-in and overload descriptions.

2020-06-17  Bill Schmidt  

* config/rs6000/rs6000-builtin-new.def: New.
* config/rs6000/rs6000-overload.def: New.
---
 gcc/config/rs6000/rs6000-builtin-new.def | 178 +++
 gcc/config/rs6000/rs6000-overload.def|  57 
 2 files changed, 235 insertions(+)
 create mode 100644 gcc/config/rs6000/rs6000-builtin-new.def
 create mode 100644 gcc/config/rs6000/rs6000-overload.def

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def 
b/gcc/config/rs6000/rs6000-builtin-new.def
new file mode 100644
index 000..88c0ac07d66
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -0,0 +1,178 @@
+; Built-in functions for PowerPC.
+; Copyright (C) 2020 Free Software Foundation, Inc.
+; Contributed by Bill Schmidt, IBM 
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.  */
+
+
+; Built-in functions in this file are organized into "stanzas", where
+; all built-ins in a given stanza are enabled together.  Each stanza
+; starts with a line identifying the option mask for which the group
+; functions is permitted, with the mask in square brackets.  This is
+; the only information allowed on the stanza header line, other than
+; whitespace.
+;
+; Following the stanza header are two lines for each function: the
+; prototype line and the attributes line.  The prototype line has
+; this format, where the square brackets indicate optional
+; information and angle brackets indicate required information:
+;
+;   [kind]   ();
+;
+; Here [kind] can be one of "const", "pure", or "fpmath";
+;  is a legal type for a built-in function result;
+;  is the name by which the function can be called;
+; and  is a comma-separated list of legal types
+; for built-in function arguments.  The argument list may be
+; empty, but the parentheses and semicolon are required.
+;
+; A legal type is of the form:
+;
+;   [const] [[signed|unsigned]  | ] [*]
+;
+; where "const" applies only to a  of "int".  Legal values
+; of  are (for now):
+;
+;   char
+;   short
+;   int
+;   long long
+;   float
+;   double
+;   __int128
+;   _Float128
+;   _Decimal32
+;   _Decimal64
+;   _Decimal128
+;   __ibm128
+;
+; Legal values of  are as follows, and are shorthand for
+; the associated meaning:
+;
+;   vscvector signed char
+;   vucvector unsigned char
+;   vbcvector bool char
+;   vssvector signed short
+;   vusvector unsigned short
+;   vbsvector bool short
+;   vsivector signed int
+;   vuivector unsigned int
+;   vbivector bool int
+;   vsll   vector signed long long
+;   vull   vector unsigned long long
+;   vbll   vector bool long long
+;   vsqvector signed __int128
+;   vuqvector unsigned __int128
+;   vbqvector bool __int128
+;   vp vector pixel
+;   vf vector float
+;   vd vector double
+;   vopopaque vector (matches all vectors)
+;
+; For simplicity, We don't support "short int" and "long long int".
+; We don't currently support a  of "bool", "long double",
+; or "_Float16".  "signed" and "unsigned" only apply to integral base
+; types.  The optional * indicates a pointer type, which can be used
+; only with "void" and "const char" in this file.  (More specific
+; pointer types are allowed in overload prototypes.)
+;
+; The attributes line looks like this:
+;
+; {}
+;
+; Here  is a unique internal identifier for the built-in
+; function that will be used as part of an enumeration of all
+; built-in functions;  is the define_expand or
+; define_insn that will be invoked when the call is expanded;
+; and  is a comma-separated list of special
+; conditions that apply to the built-in function.  The attribute
+; list may be empty, but the braces are required.
+;
+; Attributes are strings, and the allowed ones are listed below.
+;
+;   init Process as a vec_init function
+;   set  Process as a vec_set function
+;   extract  Process as a vec_extract function
+;   nosoft   Not valid with -msoft-float
+;   ldvecNee

[PATCH 07/28] rs6000: Add functions for matching types, part 3 of 3

2020-06-17 Thread Bill Schmidt via Gcc-patches
2020-06-17  Bill Schmidt  

* config/rs6000/rs6000-gen-builtins.c (restriction): New enum.
(typeinfo): Add restriction field.
(match_const_restriction): Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 136 
 1 file changed, 136 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index e38f3af9c7a..e4b08ee5036 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -189,6 +189,21 @@ enum basetype {
   BT_IBM128
 };
 
+/* Ways in which a const int value can be restricted.  RES_BITS indicates
+   that the integer is restricted to val1 bits, interpreted as an unsigned
+   number.  RES_RANGE indicates that the integer is restricted to values
+   between val1 and val2, inclusive.  RES_VAR_RANGE is like RES_RANGE, but
+   the argument may be variable, so it can only be checked if it is constant.
+   RES_VALUES indicates that the integer must have one of the values val1
+   or val2.  */
+enum restriction {
+  RES_NONE,
+  RES_BITS,
+  RES_RANGE,
+  RES_VAR_RANGE,
+  RES_VALUES
+};
+
 /* Type modifiers for an argument or return type.  */
 struct typeinfo {
   char isvoid;
@@ -201,6 +216,7 @@ struct typeinfo {
   char ispointer;
   char isopaque;
   basetype base;
+  restriction restr;
   int val1;
   int val2;
 };
@@ -434,6 +450,126 @@ match_basetype (typeinfo *typedata)
 static int
 match_const_restriction (typeinfo *typedata)
 {
+  int oldpos = pos;
+  if (linebuf[pos] == '<')
+{
+  safe_inc_pos ();
+  oldpos = pos;
+  int x = match_integer ();
+  if (x == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  consume_whitespace ();
+  if (linebuf[pos] == '>')
+   {
+ typedata->restr = RES_BITS;
+ typedata->val1 = x;
+ safe_inc_pos ();
+ return 1;
+   }
+  else if (linebuf[pos] != ',')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+  oldpos = pos;
+  int y = match_integer ();
+  if (y == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  typedata->restr = RES_RANGE;
+  typedata->val1 = x;
+  typedata->val2 = y;
+
+  consume_whitespace ();
+  if (linebuf[pos] != '>')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+}
+  else if (linebuf[pos] == '{')
+{
+  safe_inc_pos ();
+  oldpos = pos;
+  int x = match_integer ();
+  if (x == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  consume_whitespace ();
+  if (linebuf[pos] != ',')
+   {
+ (*diag) ("missing comma at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+  consume_whitespace ();
+  oldpos = pos;
+  int y = match_integer ();
+  if (y == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  typedata->restr = RES_VALUES;
+  typedata->val1 = x;
+  typedata->val2 = y;
+
+  consume_whitespace ();
+  if (linebuf[pos] != '}')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+}
+  else
+{
+  assert (linebuf[pos] == '[');
+  safe_inc_pos ();
+  oldpos = pos;
+  int x = match_integer ();
+  if (x == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  consume_whitespace ();
+  if (linebuf[pos] != ',')
+   {
+ (*diag) ("missing comma at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+  consume_whitespace ();
+  oldpos = pos;
+  int y = match_integer ();
+  if (y == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  typedata->restr = RES_VAR_RANGE;
+  typedata->val1 = x;
+  typedata->val2 = y;
+
+  consume_whitespace ();
+  if (linebuf[pos] != ']')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+}
+
   return 1;
 }
 
-- 
2.17.1



[PATCH 03/28] rs6000: Add file support and functions for diagnostic support

2020-06-17 Thread Bill Schmidt via Gcc-patches
2020-06-17  Bill Schmidt  

* config/rs6000/rs6000-gen-builtins.c (bif_file): New filescope
variable.
(ovld_file): Likewise.
(header_file): Likewise.
(init_file): Likewise.
(defines_file): Likewise.
(pgm_path): Likewise.
(bif_path): Likewise.
(ovld_path): Likewise.
(header_path): Likewise.
(init_path): Likewise.
(defines_path): Likewise.
(LINELEN): New defined constant.
(linebuf): New filescope variable.
(line): Likewise.
(pos): Likewise.
(diag): Likewise.
(bif_diag): New function.
(ovld_diag): New function.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 48 +
 1 file changed, 48 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 1ef12aa0413..db7afa31f87 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -140,3 +140,51 @@ along with GCC; see the file COPYING3.  If not see
 #include 
 #include 
 #include 
+
+/* Input and output file descriptors and pathnames.  */
+static FILE *bif_file;
+static FILE *ovld_file;
+static FILE *header_file;
+static FILE *init_file;
+static FILE *defines_file;
+
+static const char *pgm_path;
+static const char *bif_path;
+static const char *ovld_path;
+static const char *header_path;
+static const char *init_path;
+static const char *defines_path;
+
+/* Position information.  Note that "pos" is zero-indexed, but users
+   expect one-indexed column information, so representations of "pos"
+   as columns in diagnostic messages must be adjusted.  */
+#define LINELEN 1024
+static char linebuf[LINELEN];
+static int line;
+static int pos;
+
+/* Pointer to a diagnostic function.  */
+void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2)))
+  = NULL;
+
+/* Custom diagnostics.  */
+static void __attribute__ ((format (printf, 1, 2)))
+bif_diag (const char * fmt, ...)
+{
+  va_list args;
+  fprintf (stderr, "%s:%d: ", bif_path, line);
+  va_start (args, fmt);
+  vfprintf (stderr, fmt, args);
+  va_end (args);
+}
+
+static void __attribute__ ((format (printf, 1, 2)))
+ovld_diag (const char * fmt, ...)
+{
+  va_list args;
+  fprintf (stderr, "%s:%d: ", ovld_path, line);
+  va_start (args, fmt);
+  vfprintf (stderr, fmt, args);
+  va_end (args);
+}
+
-- 
2.17.1



[PATCH 06/28] rs6000: Add functions for matching types, part 2 of 3

2020-06-17 Thread Bill Schmidt via Gcc-patches
2020-06-17  Bill Schmidt  

* config/rs6000/rs6000-gen-builtins.c (match_basetype):
Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 49 +
 1 file changed, 49 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 56938de55f2..e38f3af9c7a 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -365,6 +365,55 @@ handle_pointer (typeinfo *typedata)
 static int
 match_basetype (typeinfo *typedata)
 {
+  consume_whitespace ();
+  int oldpos = pos;
+  char *token = match_identifier ();
+  if (!token)
+{
+  (*diag) ("missing base type in return type at column %d\n", pos + 1);
+  return 0;
+}
+
+  if (!strcmp (token, "char"))
+typedata->base = BT_CHAR;
+  else if (!strcmp (token, "short"))
+typedata->base = BT_SHORT;
+  else if (!strcmp (token, "int"))
+typedata->base = BT_INT;
+  else if (!strcmp (token, "long"))
+{
+  consume_whitespace ();
+  char *mustbelong = match_identifier ();
+  if (!mustbelong || strcmp (mustbelong, "long"))
+   {
+ (*diag) ("incomplete 'long long' at column %d\n", oldpos + 1);
+ return 0;
+   }
+  typedata->base = BT_LONGLONG;
+}
+  else if (!strcmp (token, "float"))
+typedata->base = BT_FLOAT;
+  else if (!strcmp (token, "double"))
+typedata->base = BT_DOUBLE;
+  else if (!strcmp (token, "__int128"))
+typedata->base = BT_INT128;
+  else if (!strcmp (token, "_Float128"))
+typedata->base = BT_FLOAT128;
+  else if (!strcmp (token, "_Decimal32"))
+typedata->base = BT_DECIMAL32;
+  else if (!strcmp (token, "_Decimal64"))
+typedata->base = BT_DECIMAL64;
+  else if (!strcmp (token, "_Decimal128"))
+typedata->base = BT_DECIMAL128;
+  else if (!strcmp (token, "__ibm128"))
+typedata->base = BT_IBM128;
+  else
+{
+  (*diag) ("unrecognized base type at column %d\n", oldpos + 1);
+  return 0;
+}
+
+  handle_pointer (typedata);
   return 1;
 }
 
-- 
2.17.1



[PATCH 01/28] rs6000: Initial create of rs6000-gen-builtins.c

2020-06-17 Thread Bill Schmidt via Gcc-patches
2020-06-17  Bill Schmidt  

* config/rs6000/rs6000-gen-builtins.c: New.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 142 
 1 file changed, 142 insertions(+)
 create mode 100644 gcc/config/rs6000/rs6000-gen-builtins.c

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
new file mode 100644
index 000..1ef12aa0413
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -0,0 +1,142 @@
+/* Generate built-in function initialization and recognition for Power.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Contributed by Bill Schmidt, IBM 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This program generates built-in function initialization and
+   recognition code for Power targets, based on text files that
+   describe the built-in functions and vector overloads:
+
+ rs6000-builtin-new.def Table of built-in functions
+ rs6000-overload.defTable of overload functions
+
+   Both files group similar functions together in "stanzas," as
+   described below.
+
+   Each stanza in the built-in function file starts with a line
+   identifying the option mask(s) for which the group of functions
+   is permitted, with the mask information in square brackets.  For
+   example, this could be
+
+ [MASK_ALTIVEC]
+
+   or it could be
+
+ [(MASK_P9_VECTOR | MASK_POWERPC64)]
+
+   Note that parentheses must be used around expressions that are not
+   simple masks to get correct behavior.  The bracketed mask expression
+   is the only information allowed on the stanza header line, other than
+   whitespace.
+
+   Following the stanza header are two lines for each function: the
+   prototype line and the attributes line.  The prototype line has
+   this format, where the square brackets indicate optional
+   information and angle brackets indicate required information:
+
+ [kind]   ();
+
+   Here [kind] can be one of "const", "pure", or "fpmath";
+is a legal type for a built-in function result;
+is the name by which the function can be called;
+   and  is a comma-separated list of legal types
+   for built-in function arguments.  The argument list may be
+   empty, but the parentheses and semicolon are required.
+
+   The attributes line looks like this:
+
+   {}
+
+   Here  is a unique internal identifier for the built-in
+   function that will be used as part of an enumeration of all
+   built-in functions;  is the define_expand or
+   define_insn that will be invoked when the call is expanded;
+   and  is a comma-separated list of special
+   conditions that apply to the built-in function.  The attribute
+   list may be empty, but the braces are required.
+
+   Attributes are strings, such as these:
+
+ init Process as a vec_init function
+ set  Process as a vec_set function
+ extract  Process as a vec_extract function
+ nosoft   Not valid with -msoft-float
+ ldvecNeeds special handling for vec_ld semantics
+ stvecNeeds special handling for vec_st semantics
+ reve Needs special handling for element reversal
+ pred Needs special handling for comparison predicates
+ htm  Needs special handling for transactional memory
+ htmspr   HTM function using an SPR
+ htmcrHTM function using a CR
+ no32bit  Not valid for TARGET_32BIT
+ cpu  This is a "cpu_is" or "cpu_supports" builtin
+ ldstmask Altivec mask for load or store
+
+   An example stanza might look like this:
+
+[TARGET_ALTIVEC]
+  const vsc __builtin_altivec_abs_v16qi (vsc);
+ABS_V16QI absv16qi2 {}
+  const vss __builtin_altivec_abs_v8hi (vss);
+ABS_V8HI absv8hi2 {}
+
+   Here "vsc" and "vss" are shorthand for "vector signed char" and
+   "vector signed short" to shorten line lengths and improve readability.
+   Note the use of indentation, which is recommended but not required.
+
+   The overload file has more complex stanza headers.  Here the stanza
+   represents all functions with the same overloaded function name:
+
+ [, , ]
+
+   Here the square brackets are part of the syntax,  is a
+   unique internal identifier for the overload that will be used as part
+   of an enumeration of all overloaded functions;  is the name
+   that 

[PATCH] [V2] rs6000: Add vec_extracth and vec_extractl

2020-05-13 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add new insns vextdu[bhw]vlx, vextddvlx, vextdu[bhw]vhx, and
vextddvhx, along with built-in access and overloaded built-in
access to these insns.

Changes from previous patch:
 * Removed the int iterators
 * Created separate expansions and insns
vextractl
vextractl_internal
vextractr
vextractr_internal
 * Adjusted rs6000-builtin.def entries to match the new expansion
   names

I didn't understand the comment about moving the decision making
part to the built-in handling code.  All the built-in handling
does is a table-driven call to the expansions; this logic *is*
the built-in handling code.  I don't see any way to simplify that.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, using a Power9 configuration.  Is this okay for
master?

Thanks,
Bill


[gcc]

2020-05-12  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_extractl): New #define.
(vec_extracth): Likewise.
* config/rs6000/altivec.md (UNSPEC_EXTRACTL): New constant.
(UNSPEC_EXTRACTR): Likewise.
(vextractl): New expansion.
(vextractl_internal): New insn.
(vextractr): New expansion.
(vextractr_internal): New insn.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vextdubvlx):
New built-in function.
(__builtin_altivec_vextduhvlx): Likewise.
(__builtin_altivec_vextduwvlx): Likewise.
(__builtin_altivec_vextddvlx): Likewise.
(__builtin_altivec_vextdubvhx): Likewise.
(__builtin_altivec_vextduhvhx): Likewise.
(__builtin_altivec_vextduwvhx): Likewise.
(__builtin_altivec_vextddvhx): Likewise.
(__builtin_vec_extractl): New overloaded built-in function.
(__builtin_vec_extracth): Likewise.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Define overloaded forms of __builtin_vec_extractl and
__builtin_vec_extracth.
(builtin_function_type): Add cases to mark arguments of new
built-in functions as unsigned.
(rs6000_common_init_builtins): Add
opaque_ftype_opaque_opaque_opaque_opaque.
* config/rs6000/rs6000.md (du_or_d): New mode attribute.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add description of vec_extractl and
vec_extractr built-in functions.

[gcc/testsuite]

2020-05-10  Kelvin Nilsen  

* gcc.target/powerpc/vec-extracth-0.c: New.
* gcc.target/powerpc/vec-extracth-1.c: New.
* gcc.target/powerpc/vec-extracth-2.c: New.
* gcc.target/powerpc/vec-extracth-3.c: New.
* gcc.target/powerpc/vec-extracth-4.c: New.
* gcc.target/powerpc/vec-extracth-5.c: New.
* gcc.target/powerpc/vec-extracth-6.c: New.
* gcc.target/powerpc/vec-extracth-7.c: New.
* gcc.target/powerpc/vec-extracth-be-0.c: New.
* gcc.target/powerpc/vec-extracth-be-1.c: New.
* gcc.target/powerpc/vec-extracth-be-2.c: New.
* gcc.target/powerpc/vec-extracth-be-3.c: New.
* gcc.target/powerpc/vec-extractl-0.c: New.
* gcc.target/powerpc/vec-extractl-1.c: New.
* gcc.target/powerpc/vec-extractl-2.c: New.
* gcc.target/powerpc/vec-extractl-3.c: New.
* gcc.target/powerpc/vec-extractl-4.c: New.
* gcc.target/powerpc/vec-extractl-5.c: New.
* gcc.target/powerpc/vec-extractl-6.c: New.
* gcc.target/powerpc/vec-extractl-7.c: New.
* gcc.target/powerpc/vec-extractl-be-0.c: New.
* gcc.target/powerpc/vec-extractl-be-1.c: New.
* gcc.target/powerpc/vec-extractl-be-2.c: New.
* gcc.target/powerpc/vec-extractl-be-3.c: New.
---
 gcc/config/rs6000/altivec.h   |  3 +
 gcc/config/rs6000/altivec.md  | 62 +++
 gcc/config/rs6000/rs6000-builtin.def  | 13 
 gcc/config/rs6000/rs6000-call.c   | 39 +++-
 gcc/config/rs6000/rs6000.md   | 10 +++
 gcc/doc/extend.texi   | 56 +
 .../gcc.target/powerpc/vec-extracth-0.c   | 33 ++
 .../gcc.target/powerpc/vec-extracth-1.c   | 32 ++
 .../gcc.target/powerpc/vec-extracth-2.c   | 31 ++
 .../gcc.target/powerpc/vec-extracth-3.c   | 30 +
 .../gcc.target/powerpc/vec-extracth-4.c   | 31 ++
 .../gcc.target/powerpc/vec-extracth-5.c   | 29 +
 .../gcc.target/powerpc/vec-extracth-6.c   | 31 ++
 .../gcc.target/powerpc/vec-extracth-7.c   | 30 +
 .../gcc.target/powerpc/vec-extracth-be-0.c| 32 ++
 .../gcc.target/powerpc/vec-extracth-be-1.c| 30 +
 .../gcc.target/powerpc/vec-extracth-be-2.c| 30 +
 .../gcc.target/powerpc/vec-extracth-be-3.c| 30 +
 .../gcc.target/powerpc/vec-extractl-0.c   | 33 ++
 .../gcc.target/powerpc/vec-extractl-1.c   | 32 ++
 

Re: [PATCH] rs6000: Add vec_extracth and vec_extractl

2020-05-12 Thread Bill Schmidt via Gcc-patches

On 5/12/20 1:21 PM, Segher Boessenkool wrote:

Hi!

On Mon, May 11, 2020 at 09:56:14PM -0500, Bill Schmidt wrote:

On 5/11/20 9:48 AM, David Edelsohn wrote:

On Sun, May 10, 2020 at 9:14 AM Bill Schmidt 
wrote:

 * config/rs6000/altivec.md (UNSPEC_EXTRACTL): New constant.
 (UNSPEC_EXTRACTR): Likewise.
 (VEXTRACT_LR): New int iterator.

Well now the previous VSTRIR/VSTRIL patch is inconsistent.  If we're
going to use an iterator for "LR", that's fine, but it needs to be
used consistently for similar situations.  The approach for the two,
similar instructions and issues need to match.

I see your point.  I don't really like the way this was done very much,
since the attributes are tied to the unspecs for extract-{low,high}.
Simple attribute names like LR, lr, rl shouldn't be scoped so narrowly.

Yeah...  The point was to make the resulting code readable.  xx is
readable, but xx is not.


I don't like any of the alternatives very well, either.  I could either
(1) change the names of the int iterators in this patch to incorporate
part of the word "extract", and create similar iterators for the
vstril/vstrir patterns; or (2) remove the iterators from this patch and
just create two expansions and two insns instead of one of each.  I have
a slight preference for (2) since the longer iterator names will make
things ugly.

Do you or Segher have a preference?

Two patterns is the best idea I think.

And all of this will be less code if you can move the decision making
part to the builtin code?


OK, thanks!  The vector string isolation patch went upstream already 
after David's first review, but I will go back and rewrite that code in 
a subsequent patch.


And I will work on fixing this one and ask for re-review when it's ready.

Thanks to both of you!
Bill




Segher


Re: [PATCH] rs6000: Vector string isolate instructions

2020-05-12 Thread Bill Schmidt via Gcc-patches

On 5/12/20 4:54 AM, Segher Boessenkool wrote:

Hi!

Looks fine to me...  Just the same generic things as before, things we
can improve later, not even limited to this series:

On Sat, May 09, 2020 at 08:16:26AM -0500, Bill Schmidt wrote:

* config/rs6000/altivec.md (UNSPEC_VSTRIR): New constant.
(UNSPEC_VSTRIL): Likewise.

Names for these could perhaps be better.  Or maybe not, they are short
now, there's something to say for that as well :-)


(vstrir_): New expansion.
(vstrir_code_): New insn.

Could you make this vstrir and vstrir_internal, like the
rest?


(vstrir_p_): New expansion.
(vstrir_p_code_): New insn.

But, not sure what to do with those.  "Something to improve later" then
I guess, for all of it :-)


+(define_expand "vstrir_"
+  [(set (match_operand:VIshort 0 "altivec_register_operand")
+   (unspec:VIshort [(match_operand:VIshort 1 "altivec_register_operand")]
+   UNSPEC_VSTRIR))]
+  "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+emit_insn (gen_vstrir_code_ (operands[0], operands[1]));
+  else
+emit_insn (gen_vstril_code_ (operands[0], operands[1]));
+  DONE;
+})

So the reason this pattern is special at all is that left and right are
swapped for LE.  Maybe that could/should be done in the code for the
builtin, instead?


+;; This expands into same code as vstrir_ followed by condition logic
+;; so that a single vstribr. or vstrihr. or vstribl. or vstrihl. instruction
+;; can, for example, satisfy the needs of a vec_strir () function paired
+;; with a vec_strir_p () function if both take the same incoming arguments.
+(define_expand "vstrir_p_"
+  [(match_operand:SI 0 "gpc_reg_operand")
+   (match_operand:VIshort 1 "altivec_register_operand")]
+  "TARGET_FUTURE"
+{
+  rtx scratch = gen_reg_rtx (mode);
+  if (BYTES_BIG_ENDIAN)
+emit_insn (gen_vstrir_p_code_ (scratch, operands[1]));
+  else
+emit_insn (gen_vstril_p_code_ (scratch, operands[1]));
+  emit_insn (gen_cr6_test_for_zero (operands[0]));
+  DONE;
+})

And the code for the builtin can do this then, as well.

Not sure how easy that is to fit in with the current code, or after your
work on it.  Either way, it looks fine to me :-)


Thanks, lots of good cleanups here.  I've committed this patch, but will 
open an internal issue to track the cleanups from this series of patches 
and try to get them cleaned up later in the release. Thanks for the review!


Bill



Segher


Re: [PATCH] rs6000: Add vec_extracth and vec_extractl

2020-05-11 Thread Bill Schmidt via Gcc-patches

On 5/11/20 9:48 AM, David Edelsohn wrote:

On Sun, May 10, 2020 at 9:14 AM Bill Schmidt  wrote:

From: Kelvin Nilsen 

Add new insns vextdu[bhw]vlx, vextddvlx, vextdu[bhw]vhx, and
vextddvhx, along with built-in access and overloaded built-in
access to these insns.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, using a Power9 configuration.  Is this okay for
master?

Thanks,
Bill

[gcc]

2020-05-10  Kelvin Nilsen  

 * config/rs6000/altivec.h (vec_extractl): New #define.
 (vec_extracth): Likewise.
 * config/rs6000/altivec.md (UNSPEC_EXTRACTL): New constant.
 (UNSPEC_EXTRACTR): Likewise.
 (VEXTRACT_LR): New int iterator.

Well now the previous VSTRIR/VSTRIL patch is inconsistent.  If we're
going to use an iterator for "LR", that's fine, but it needs to be
used consistently for similar situations.  The approach for the two,
similar instructions and issues need to match.



I see your point.  I don't really like the way this was done very much, 
since the attributes are tied to the unspecs for extract-{low,high}.  
Simple attribute names like LR, lr, rl shouldn't be scoped so narrowly.


I don't like any of the alternatives very well, either.  I could either 
(1) change the names of the int iterators in this patch to incorporate 
part of the word "extract", and create similar iterators for the 
vstril/vstrir patterns; or (2) remove the iterators from this patch and 
just create two expansions and two insns instead of one of each.  I have 
a slight preference for (2) since the longer iterator names will make 
things ugly.


Do you or Segher have a preference?

Thanks!
Bill



Thanks, David


Re: [PATCH] rs6000: Built-in cleanups for vec_clzm, vec_ctzm, and vec_gnb.

2020-05-11 Thread Bill Schmidt via Gcc-patches



On 5/11/20 7:16 AM, Segher Boessenkool wrote:

Hi!

On Sat, May 09, 2020 at 08:08:34PM -0500, Bill Schmidt wrote:

I should have noticed this patch before submitting Kelvin's earlier
related patches, sorry.  I think it should still be fine to apply
the patches in order, but if you'd like me to combine this into the
two earlier ones, I'd be happy to do that.

The intermediary step works just fine as well, so it is fine as-is.

One thing:


* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Change fourth operand for vec_ternarylogic to require
compatibility with unsigned SImode rather than unsigned QImode.

Is it still checked for range 0..255 though?  (If the compiler can
derive that).



Yep, we already have this:

  if (icode == CODE_FOR_xxeval)
    {
  /* Only allow 8-bit unsigned literals.  */
  STRIP_NOPS (arg3);
  if (TREE_CODE (arg3) != INTEGER_CST
  || TREE_INT_CST_LOW (arg3) & ~0xff)
    {
  error ("argument 4 must be an 8-bit unsigned literal");
  return CONST0_RTX (tmode);
    }
    }

Thanks for the review!
Bill



In either case, if that is what the ABI says, that is what the ABI says,
so okay for trunk.

Thanks!


Segher


Re: [PATCH] rs6000: Add xxgenpcvwm and xxgenpcvdm instructions

2020-05-11 Thread Bill Schmidt via Gcc-patches

On 5/11/20 5:21 AM, Segher Boessenkool wrote:

Hi!

On Sat, May 09, 2020 at 12:05:08PM -0500, Bill Schmidt wrote:

From: Carl Love 

Add support for xxgenpcv[dw]m, along with individual and overloaded
built-in functions for access.
(xxgenpcvm_): New insn.
(xxgenpcvm): New expansion.

Eww.  Let's please use or not use underscore in both cases.  Insns that
are not created directly should have a name starting with *.  We have
many examples of an expand with the same name as an insn (other than the
insn having a *), which isn't really confusing because the dexpand
usually is right before the insn.

But, in this case, you *do* call the insn directly (namely, from the
define expand!)  So maybe use a "xxgenpcvm_internal" or similar
name for the define_insn?


Agreed.  I'm fixing that now.  Thanks!

Bill



Okay for trunk with that improved somehow.  Thanks!


Segher


Re: [PATCH] rs6000: Add cntlzdm and cnttzdm

2020-05-11 Thread Bill Schmidt via Gcc-patches

On 5/8/20 6:51 PM, Segher Boessenkool wrote:

On Fri, May 08, 2020 at 08:17:18AM -0500, Bill Schmidt wrote:

From: Kelvin Nilsen 

Add support for new scalar instructions for counting leading or
trailing zeros under control of a bitmask.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Ooh, I found problems!


Thanks for catching these!  Okay with them fixed?

Thanks,
Bill




* config/rs6000/rs6000-builtin.def (__builtin_cntlzdm): New
built-in function definition.
(__builtin_cnttzdm): Likewise.,

Stray comma.


+(define_insn "cntlzdm"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+UNSPEC_CNTLZDM))]
+   "TARGET_FUTURE && TARGET_64BIT"
+   "cntlzdm %0,%1,%2"
+   [(set_attr "type" "integer")])

TARGET_POWERPC64.


--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cntlzdm-0.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */

And powerpc64 here as well then.

Not sure if this is a bigger problem than the comma thing though.


Segher


Re: [PATCH] rs6000: Add pdep/pext

2020-05-11 Thread Bill Schmidt via Gcc-patches

On 5/8/20 3:47 PM, Segher Boessenkool wrote:

Hi,

On Thu, May 07, 2020 at 09:29:03PM -0500, Bill Schmidt wrote:

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 5ef4889ba55..33ba57855bc 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -162,6 +162,8 @@ (define_c_enum "unspec"
 UNSPEC_VRLNM
 UNSPEC_VCLZDM
 UNSPEC_VCTZDM
+   UNSPEC_VPDEPD
+   UNSPEC_VPEXTD

Similarly, maybe UNSPEC_PDEP and UNSPEC_PEXT would be nicer.



Thanks -- I'll plan to go back and do a general cleanup on scalar/vector 
UNSPECs later on.  I expect we have a lot of this sort of redundancy.


Bill



Looks okay for trunk either way :-)


Segher


[PATCH] rs6000: Add vec_extracth and vec_extractl

2020-05-10 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add new insns vextdu[bhw]vlx, vextddvlx, vextdu[bhw]vhx, and
vextddvhx, along with built-in access and overloaded built-in
access to these insns.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, using a Power9 configuration.  Is this okay for
master?

Thanks,
Bill

[gcc]

2020-05-10  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_extractl): New #define.
(vec_extracth): Likewise.
* config/rs6000/altivec.md (UNSPEC_EXTRACTL): New constant.
(UNSPEC_EXTRACTR): Likewise.
(VEXTRACT_LR): New int iterator.
(LR): New int attribute.
(lr): Likewise.
(rl): Likewise.
(vextract_): New expansion.
(vextract_insn_): New insn.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vextdubvlx):
New built-in function.
(__builtin_altivec_vextduhvlx): Likewise.
(__builtin_altivec_vextduwvlx): Likewise.
(__builtin_altivec_vextddvlx): Likewise.
(__builtin_altivec_vextdubvhx): Likewise.
(__builtin_altivec_vextduhvhx): Likewise.
(__builtin_altivec_vextduwvhx): Likewise.
(__builtin_altivec_vextddvhx): Likewise.
(__builtin_vec_extractl): New overloaded built-in function.
(__builtin_vec_extracth): Likewise.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Define overloaded forms of __builtin_vec_extractl and
__builtin_vec_extracth.
(builtin_function_type): Add cases to mark arguments of new
built-in functions as unsigned.
(rs6000_common_init_builtins): Add
opaque_ftype_opaque_opaque_opaque_opaque.
* config/rs6000/rs6000.md (du_or_d): New mode attribute.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add description of vec_extractl and
vec_extractr built-in functions.

[gcc/testsuite]

2020-05-10  Kelvin Nilsen  

* gcc.target/powerpc/vec-extracth-0.c: New.
* gcc.target/powerpc/vec-extracth-1.c: New.
* gcc.target/powerpc/vec-extracth-2.c: New.
* gcc.target/powerpc/vec-extracth-3.c: New.
* gcc.target/powerpc/vec-extracth-4.c: New.
* gcc.target/powerpc/vec-extracth-5.c: New.
* gcc.target/powerpc/vec-extracth-6.c: New.
* gcc.target/powerpc/vec-extracth-7.c: New.
* gcc.target/powerpc/vec-extracth-be-0.c: New.
* gcc.target/powerpc/vec-extracth-be-1.c: New.
* gcc.target/powerpc/vec-extracth-be-2.c: New.
* gcc.target/powerpc/vec-extracth-be-3.c: New.
* gcc.target/powerpc/vec-extractl-0.c: New.
* gcc.target/powerpc/vec-extractl-1.c: New.
* gcc.target/powerpc/vec-extractl-2.c: New.
* gcc.target/powerpc/vec-extractl-3.c: New.
* gcc.target/powerpc/vec-extractl-4.c: New.
* gcc.target/powerpc/vec-extractl-5.c: New.
* gcc.target/powerpc/vec-extractl-6.c: New.
* gcc.target/powerpc/vec-extractl-7.c: New.
* gcc.target/powerpc/vec-extractl-be-0.c: New.
* gcc.target/powerpc/vec-extractl-be-1.c: New.
* gcc.target/powerpc/vec-extractl-be-2.c: New.
* gcc.target/powerpc/vec-extractl-be-3.c: New.
---
 gcc/config/rs6000/altivec.h   |  3 +
 gcc/config/rs6000/altivec.md  | 47 
 gcc/config/rs6000/rs6000-builtin.def  | 13 +
 gcc/config/rs6000/rs6000-call.c   | 39 -
 gcc/config/rs6000/rs6000.md   | 10 
 gcc/doc/extend.texi   | 56 +++
 .../gcc.target/powerpc/vec-extracth-0.c   | 34 +++
 .../gcc.target/powerpc/vec-extracth-1.c   | 32 +++
 .../gcc.target/powerpc/vec-extracth-2.c   | 32 +++
 .../gcc.target/powerpc/vec-extracth-3.c   | 30 ++
 .../gcc.target/powerpc/vec-extracth-4.c   | 32 +++
 .../gcc.target/powerpc/vec-extracth-5.c   | 30 ++
 .../gcc.target/powerpc/vec-extracth-6.c   | 32 +++
 .../gcc.target/powerpc/vec-extracth-7.c   | 30 ++
 .../gcc.target/powerpc/vec-extracth-be-0.c| 33 +++
 .../gcc.target/powerpc/vec-extracth-be-1.c| 31 ++
 .../gcc.target/powerpc/vec-extracth-be-2.c| 31 ++
 .../gcc.target/powerpc/vec-extracth-be-3.c| 31 ++
 .../gcc.target/powerpc/vec-extractl-0.c   | 34 +++
 .../gcc.target/powerpc/vec-extractl-1.c   | 32 +++
 .../gcc.target/powerpc/vec-extractl-2.c   | 32 +++
 .../gcc.target/powerpc/vec-extractl-3.c   | 30 ++
 .../gcc.target/powerpc/vec-extractl-4.c   | 32 +++
 .../gcc.target/powerpc/vec-extractl-5.c   | 30 ++
 .../gcc.target/powerpc/vec-extractl-6.c   | 32 +++
 .../gcc.target/powerpc/vec-extractl-7.c   | 30 ++
 .../gcc.target/powerpc/vec-extractl-be-0.c| 33 +++
 

[PATCH] rs6000: Built-in cleanups for vec_clzm, vec_ctzm, and vec_gnb.

2020-05-09 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Changes to the built-in specification occurred after early patches
added support for these.  The name of vec_clzm became vec_cntlzm,
and vec_ctzm became vec_cnttzm.  Four of the overloaded forms of
vec_gnb were removed, and the fourth argument redefined as an
unsigned int, not an unsigned char.  This patch reflects those
changes in the code and test cases.  Eight of the vec_gnb test
cases are removed as a result.

I should have noticed this patch before submitting Kelvin's earlier
related patches, sorry.  I think it should still be fine to apply
the patches in order, but if you'd like me to combine this into the
two earlier ones, I'd be happy to do that.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, using a Power9 configuration.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-09  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_clzm): Rename to vec_cntlzm.
(vec_ctzm): Rename to vec_cnttzm.
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Change fourth operand for vec_ternarylogic to require
compatibility with unsigned SImode rather than unsigned QImode.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Remove overloaded forms of vec_gnb that are no longer needed.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Replace vec_clzm with vec_cntlzm;
replace vec_ctzm with vec_cntlzm; remove four unwanted forms of
vec_gnb; move vec_ternarylogic documentation into this section
and replace const unsigned char with const unsigned int as its
fourth argument.

[gcc/testsuite]

2020-05-09  Kelvin Nilsen  

* gcc.target/powerpc/vec-clzm-0.c: Rename to...
* gcc.target/powerpc/vec-cntlzm-0.c: ...this.
* gcc.target/powerpc/vec-clzm-1.c: Rename to...
* gcc.target/powerpc/vec-cntlzm-1.c: ...this.
* gcc.target/powerpc/vec-ctzm-0.c: Rename to...
* gcc.target/powerpc/vec-cnttzm-0.c: ...this.
* gcc.target/powerpc/vec-ctzm-1.c: Rename to...
* gcc.target/powerpc/vec-cnttzm-1.c: ...this.
* gcc.target/powerpc/vec-gnb-8.c: Rename to...
* gcc.target/powerpc/vec-gnb-0.c: ...this, deleting the old file.
* gcc.target/powerpc/vec-gnb-9.c: Rename to...
* gcc.target/powerpc/vec-gnb-1.c: ...this, deleting the old file.
* gcc.target/powerpc/vec-gnb-10.c: Rename to...
* gcc.target/powerpc/vec-gnb-2.c: ...this, deleting the old file.
* gcc.target/powerpc/vec-gnb-3.c: Delete.
* gcc.target/powerpc/vec-gnb-4.c: Delete.
* gcc.target/powerpc/vec-gnb-5.c: Delete.
* gcc.target/powerpc/vec-gnb-6.c: Delete.
* gcc.target/powerpc/vec-gnb-7.c: Delete.
---
 gcc/config/rs6000/altivec.h   |  4 +-
 gcc/config/rs6000/rs6000-c.c  |  2 +-
 gcc/config/rs6000/rs6000-call.c   |  8 --
 gcc/doc/extend.texi   | 60 +++
 .../powerpc/{vec-clzm-0.c => vec-cntlzm-0.c}  | 12 +--
 .../powerpc/{vec-clzm-1.c => vec-cntlzm-1.c}  | 12 +--
 .../powerpc/{vec-ctzm-0.c => vec-cnttzm-0.c}  | 12 +--
 .../powerpc/{vec-ctzm-1.c => vec-cnttzm-1.c}  | 12 +--
 gcc/testsuite/gcc.target/powerpc/vec-gnb-0.c  | 22 +++---
 gcc/testsuite/gcc.target/powerpc/vec-gnb-1.c  | 23 +++---
 gcc/testsuite/gcc.target/powerpc/vec-gnb-10.c | 72 --
 gcc/testsuite/gcc.target/powerpc/vec-gnb-2.c  | 31 
 gcc/testsuite/gcc.target/powerpc/vec-gnb-3.c  | 72 --
 gcc/testsuite/gcc.target/powerpc/vec-gnb-4.c  | 71 --
 gcc/testsuite/gcc.target/powerpc/vec-gnb-5.c  | 71 --
 gcc/testsuite/gcc.target/powerpc/vec-gnb-6.c  | 71 --
 gcc/testsuite/gcc.target/powerpc/vec-gnb-7.c  | 71 --
 gcc/testsuite/gcc.target/powerpc/vec-gnb-8.c  | 75 ---
 gcc/testsuite/gcc.target/powerpc/vec-gnb-9.c  | 74 --
 19 files changed, 88 insertions(+), 687 deletions(-)
 rename gcc/testsuite/gcc.target/powerpc/{vec-clzm-0.c => vec-cntlzm-0.c} (79%)
 rename gcc/testsuite/gcc.target/powerpc/{vec-clzm-1.c => vec-cntlzm-1.c} (79%)
 rename gcc/testsuite/gcc.target/powerpc/{vec-ctzm-0.c => vec-cnttzm-0.c} (79%)
 rename gcc/testsuite/gcc.target/powerpc/{vec-ctzm-1.c => vec-cnttzm-1.c} (79%)
 delete mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-10.c
 delete mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-3.c
 delete mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-4.c
 delete mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-5.c
 delete mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-6.c
 delete mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-7.c
 delete mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-8.c
 delete mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-9.c

diff --git a/gcc/config/rs6000/altivec.h 

[PATCH] rs6000: Add xxgenpcvwm and xxgenpcvdm instructions

2020-05-09 Thread Bill Schmidt via Gcc-patches
From: Carl Love 

Add support for xxgenpcv[dw]m, along with individual and overloaded
built-in functions for access.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, using a POWER9 compiler.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-09  Carl Love  

* config/rs6000/altivec.h (vec_genpcvm): New #define.
* config/rs6000/rs6000-builtin.def (XXGENPCVM_V16QI): New built-in
instantiation.
(XXGENPCVM_V8HI): Likewise.
(XXGENPCVM_V4SI): Likewise.
(XXGENPCVM_V2DI): Likewise.
(XXGENPCVM): New overloaded built-in instantiation.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins): Add
entries for FUTURE_BUILTIN_VEC_XXGENPCVM.
(altivec_expand_builtin): Add special handling for
FUTURE_BUILTIN_VEC_XXGENPCVM.
(builtin_function_type): Add handling for
FUTURE_BUILTIN_XXGENPCVM_{V16QI,V8HI,V4SI,V2DI}.
* config/rs6000/vsx.md (VSX_EXTRACT_I4): New mode iterator.
(UNSPEC_XXGENPCV): New constant.
(xxgenpcvm_): New insn.
(xxgenpcvm): New expansion.
* doc/extend.texi: Add documentation for vec_genpcvm built-ins.

[gcc/testsuite]

2020-05-09  Carl Love  

* gcc.target/powerpc/xxgenpc-runnable.c: New.
---
 gcc/config/rs6000/altivec.h   |   1 +
 gcc/config/rs6000/rs6000-builtin.def  |   5 +
 gcc/config/rs6000/rs6000-call.c   |  31 +++
 gcc/config/rs6000/vsx.md  |  31 +++
 gcc/doc/extend.texi   |  12 +
 .../gcc.target/powerpc/xxgenpc-runnable.c | 231 ++
 6 files changed, 311 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/xxgenpc-runnable.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index b29413deb6d..3729ceaf336 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -694,6 +694,7 @@ __altivec_scalar_pred(vec_any_nle,
 #define vec_pdep(a, b) __builtin_altivec_vpdepd (a, b)
 #define vec_pext(a, b) __builtin_altivec_vpextd (a, b)
 #define vec_cfuge(a, b)__builtin_altivec_vcfuged (a, b)
+#define vec_genpcvm(a, b)  __builtin_vec_xxgenpcvm (a, b)
 
 /* Overloaded built-in functions for future architecture.  */
 #define vec_gnb(a, b)  __builtin_vec_gnb (a, b)
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 1f86293d0e2..b5b08be512a 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2611,6 +2611,10 @@ BU_FUTURE_V_2 (VPDEPD, "vpdepd", CONST, vpdepd)
 BU_FUTURE_V_2 (VPEXTD, "vpextd", CONST, vpextd)
 BU_FUTURE_V_2 (VGNB, "vgnb", CONST, vgnb)
 BU_FUTURE_V_4 (XXEVAL, "xxeval", CONST, xxeval)
+BU_FUTURE_V_2 (XXGENPCVM_V16QI, "xxgenpcvm_v16qi", CONST, xxgenpcvmv16qi)
+BU_FUTURE_V_2 (XXGENPCVM_V8HI, "xxgenpcvm_v8hi", CONST, xxgenpcvmv8hi)
+BU_FUTURE_V_2 (XXGENPCVM_V4SI, "xxgenpcvm_v4si", CONST, xxgenpcvmv4si)
+BU_FUTURE_V_2 (XXGENPCVM_V2DI, "xxgenpcvm_v2di", CONST, xxgenpcvmv2di)
 
 BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
 BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
@@ -2627,6 +2631,7 @@ BU_FUTURE_OVERLOAD_2 (CLRL, "clrl")
 BU_FUTURE_OVERLOAD_2 (CLRR, "clrr")
 BU_FUTURE_OVERLOAD_2 (GNB, "gnb")
 BU_FUTURE_OVERLOAD_4 (XXEVAL, "xxeval")
+BU_FUTURE_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm")
 
 BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir")
 BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 64a9ba2818d..0b9ed7ef018 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5532,6 +5532,15 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
 RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI, 0 },
   { FUTURE_BUILTIN_VEC_GNB, FUTURE_BUILTIN_VGNB, RS6000_BTI_unsigned_long_long,
 RS6000_BTI_unsigned_V1TI, RS6000_BTI_UINTQI, 0 },
+  { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V2DI,
+RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0 },
+  { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V4SI,
+RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V8HI,
+RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 },
+  { FUTURE_BUILTIN_VEC_XXGENPCVM, FUTURE_BUILTIN_XXGENPCVM_V16QI,
+RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+RS6000_BTI_INTSI, 0 },
 
   /* The overloaded XXEVAL definitions are handled specially because the
  fourth unsigned char operand is not encoded in this table.  */
@@ -10384,6 +10393,24 @@ altivec_expand_builtin (tree exp, rtx target, bool 
*expandedp)
}
   break;
 
+case FUTURE_BUILTIN_VEC_XXGENPCVM:
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  STRIP_NOPS (arg1);
+
+  /* Generate a normal call if it is invalid.  */
+  if (arg1 == 

[PATCH] rs6000: Vector string isolate instructions

2020-05-09 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Adds new instructions vstribr, vstrihr, vstribl, and vstrihl, with
overloaded built-in support.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, using a compiler configured for Power9.  Is this okay
for master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_strir): New #define.
(vec_stril): Likewise.
(vec_strir_p): Likewise.
(vec_stril_p): Likewise.
* config/rs6000/altivec.md (UNSPEC_VSTRIR): New constant.
(UNSPEC_VSTRIL): Likewise.
(vstrir_): New expansion.
(vstrir_code_): New insn.
(vstrir_p_): New expansion.
(vstrir_p_code_): New insn.
(vstril_): New expansion.
(vstril_code_): New insn.
(vstril_p_): New expansion.
(vstril_p_code_): New insn.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vstribr):
New built-in function.
(__builtin_altivec_vstrihr): Likewise.
(__builtin_altivec_vstribl): Likewise.
(__builtin_altivec_vstrihl): Likewise.
(__builtin_altivec_vstribr_p): Likewise.
(__builtin_altivec_vstrihr_p): Likewise.
(__builtin_altivec_vstribl_p): Likewise.
(__builtin_altivec_vstrihl_p): Likewise.
(__builtin_vec_strir): New overloaded built-in function.
(__builtin_vec_stril): Likewise.
(__builtin_vec_strir_p): Likewise.
(__builtin_vec_stril_p): Likewise.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Define overloaded forms of __builtin_vec_strir,
__builtin_vec_stril, __builtin_vec_strir_p, and
__builtin_vec_stril_p.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add description of vec_stril,
vec_stril_p, vec_strir, and vec_strir_p built-in functions.

[gcc]

2020-05-08  Kelvin Nilsen  

* gcc.target/powerpc/vec-stril-0.c: New.
* gcc.target/powerpc/vec-stril-1.c: New.
* gcc.target/powerpc/vec-stril-10.c: New.
* gcc.target/powerpc/vec-stril-11.c: New.
* gcc.target/powerpc/vec-stril-12.c: New.
* gcc.target/powerpc/vec-stril-13.c: New.
* gcc.target/powerpc/vec-stril-14.c: New.
* gcc.target/powerpc/vec-stril-15.c: New.
* gcc.target/powerpc/vec-stril-16.c: New.
* gcc.target/powerpc/vec-stril-17.c: New.
* gcc.target/powerpc/vec-stril-18.c: New.
* gcc.target/powerpc/vec-stril-19.c: New.
* gcc.target/powerpc/vec-stril-2.c: New.
* gcc.target/powerpc/vec-stril-20.c: New.
* gcc.target/powerpc/vec-stril-21.c: New.
* gcc.target/powerpc/vec-stril-22.c: New.
* gcc.target/powerpc/vec-stril-23.c: New.
* gcc.target/powerpc/vec-stril-3.c: New.
* gcc.target/powerpc/vec-stril-4.c: New.
* gcc.target/powerpc/vec-stril-5.c: New.
* gcc.target/powerpc/vec-stril-6.c: New.
* gcc.target/powerpc/vec-stril-7.c: New.
* gcc.target/powerpc/vec-stril-8.c: New.
* gcc.target/powerpc/vec-stril-9.c: New.
* gcc.target/powerpc/vec-stril_p-0.c: New.
* gcc.target/powerpc/vec-stril_p-1.c: New.
* gcc.target/powerpc/vec-stril_p-10.c: New.
* gcc.target/powerpc/vec-stril_p-11.c: New.
* gcc.target/powerpc/vec-stril_p-2.c: New.
* gcc.target/powerpc/vec-stril_p-3.c: New.
* gcc.target/powerpc/vec-stril_p-4.c: New.
* gcc.target/powerpc/vec-stril_p-5.c: New.
* gcc.target/powerpc/vec-stril_p-6.c: New.
* gcc.target/powerpc/vec-stril_p-7.c: New.
* gcc.target/powerpc/vec-stril_p-8.c: New.
* gcc.target/powerpc/vec-stril_p-9.c: New.
* gcc.target/powerpc/vec-strir-0.c: New.
* gcc.target/powerpc/vec-strir-1.c: New.
* gcc.target/powerpc/vec-strir-10.c: New.
* gcc.target/powerpc/vec-strir-11.c: New.
* gcc.target/powerpc/vec-strir-12.c: New.
* gcc.target/powerpc/vec-strir-13.c: New.
* gcc.target/powerpc/vec-strir-14.c: New.
* gcc.target/powerpc/vec-strir-15.c: New.
* gcc.target/powerpc/vec-strir-16.c: New.
* gcc.target/powerpc/vec-strir-17.c: New.
* gcc.target/powerpc/vec-strir-18.c: New.
* gcc.target/powerpc/vec-strir-19.c: New.
* gcc.target/powerpc/vec-strir-2.c: New.
* gcc.target/powerpc/vec-strir-20.c: New.
* gcc.target/powerpc/vec-strir-21.c: New.
* gcc.target/powerpc/vec-strir-22.c: New.
* gcc.target/powerpc/vec-strir-23.c: New.
* gcc.target/powerpc/vec-strir-3.c: New.
* gcc.target/powerpc/vec-strir-4.c: New.
* gcc.target/powerpc/vec-strir-5.c: New.
* gcc.target/powerpc/vec-strir-6.c: New.
* gcc.target/powerpc/vec-strir-7.c: New.
* gcc.target/powerpc/vec-strir-8.c: New.
* gcc.target/powerpc/vec-strir-9.c: New.
* gcc.target/powerpc/vec-strir_p-0.c: New.
* 

[PATCH] rs6000: Add xxeval and vec_ternarylogic

2020-05-08 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add the xxeval insn and access it via the vec_ternarylogic built-in
function.  As part of this, add support to the built-in function
infrastructure for functions that take four arguments.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, using a native POWER9 compiler.  Is this okay for
master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_ternarylogic): New #define.
(UNSPEC_XXEVAL): New constant.
(xxeval): New insn.
* config/rs6000/predicates.md (u8bit_cint_operand): New predicate.
* config/rs6000/rs6000-builtin.def: Add handling of new macro
RS6000_BUILTIN_4.
(BU_FUTURE_V_4): New macro.
(BU_FUTURE_OVERLOAD_4): Likewise.
* config/rs6000/rs6000-c.c (altivec_build_resolved_builtin): Add
handling for quaternary built-in functions.
(altivec_resolve_overloaded_builtin): Add special-case handling
for __builtin_vec_xxeval.
* config/rs6000/rs6000-call.c: Add handling of new macro
RS6000_BUILTIN_4 in initialization of rs6000_builtin_info,
bdesc0_arg, bdesc1_arg, bdesc2_arg, bdesc_3arg,
bdesc_altivec_preds, bdesc_abs, and bdesc_htm arrays.
(altivec_overloaded_builtins): Add definitions for
FUTURE_BUILTIN_VEC_XXEVAL.
(bdesc_4arg): New array.
(htm_expand_builtin): Add handling for quaternary built-in
functions.
(rs6000_expand_quaternop_builtin): New function.
(rs6000_expand_builtin): Add handling for quaternary built-in
functions.
(rs6000_init_builtins): Initialize builtin_mode_to_type entries
for unsigned QImode and unsigned HImode.
(builtin_quaternary_function_type): New function.
(rs6000_common_init_builtins): Add handling of quaternary
operations.
* config/rs6000/rs6000.h (RS6000_BTC_QUATERNARY): New defined
constant.
(RS6000_BTC_PREDICATE): Change value of constant.
(RS6000_BTC_ABS): Likewise.
(rs6000_builtins): Add support for new macro RS6000_BUILTIN_4.
* doc/extend.texi (PowerPC AltiVec Built-In Functions Available
for a Future Architecture): Add description of vec_ternarylogic
built-in function.

[gcc/testsuite]

2020-05-08  Kelvin Nilsen  

* gcc.target/powerpc/vec-ternarylogic-0.c: New.
* gcc.target/powerpc/vec-ternarylogic-1.c: New.
* gcc.target/powerpc/vec-ternarylogic-10.c: New.
* gcc.target/powerpc/vec-ternarylogic-2.c: New.
* gcc.target/powerpc/vec-ternarylogic-3.c: New.
* gcc.target/powerpc/vec-ternarylogic-4.c: New.
* gcc.target/powerpc/vec-ternarylogic-5.c: New.
* gcc.target/powerpc/vec-ternarylogic-6.c: New.
* gcc.target/powerpc/vec-ternarylogic-7.c: New.
* gcc.target/powerpc/vec-ternarylogic-8.c: New.
* gcc.target/powerpc/vec-ternarylogic-9.c: New.
---
 gcc/config/rs6000/altivec.h   |   1 +
 gcc/config/rs6000/altivec.md  |  11 +
 gcc/config/rs6000/predicates.md   |   5 +
 gcc/config/rs6000/rs6000-builtin.def  |  23 ++
 gcc/config/rs6000/rs6000-c.c  |  47 +++-
 gcc/config/rs6000/rs6000-call.c   | 251 ++
 gcc/config/rs6000/rs6000.h|  12 +-
 gcc/doc/extend.texi   |  21 ++
 .../gcc.target/powerpc/vec-ternarylogic-0.c   | 120 +
 .../gcc.target/powerpc/vec-ternarylogic-1.c   | 119 +
 .../gcc.target/powerpc/vec-ternarylogic-10.c  | 129 +
 .../gcc.target/powerpc/vec-ternarylogic-2.c   | 105 
 .../gcc.target/powerpc/vec-ternarylogic-3.c   | 106 
 .../gcc.target/powerpc/vec-ternarylogic-4.c   | 104 
 .../gcc.target/powerpc/vec-ternarylogic-5.c   | 103 +++
 .../gcc.target/powerpc/vec-ternarylogic-6.c   | 104 
 .../gcc.target/powerpc/vec-ternarylogic-7.c   | 103 +++
 .../gcc.target/powerpc/vec-ternarylogic-8.c   | 128 +
 .../gcc.target/powerpc/vec-ternarylogic-9.c   | 129 +
 19 files changed, 1616 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-10.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-5.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-6.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-7.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-8.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ternarylogic-9.c


[PATCH] rs6000: Add pdepd and pextd

2020-05-08 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add scalar instructions for parallel bit deposit and extract, with
built-in function support.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/rs6000-builtin.def (__builtin_pdepd): New built-in
function.
(__builtin_pextd): Likewise.
* config/rs6000/rs6000.md (UNSPEC_PDEPD): New constant.
(UNSPEC_PEXTD): Likewise.
(pdepd): New insn.
(pextd): Likewise.
* doc/extend.texi (Basic PowerPC Built-in Functions Available for
a Future Architecture): Add descriptions of __builtin_pdepd and
__builtin_pextd functions.

[gcc/testsuite]

2020-05-08  Kelvin Nilsen  

* gcc.target/powerpc/pdep-0.c: New.
* gcc.target/powerpc/pdep-1.c: New.
* gcc.target/powerpc/pextd-0.c: New.
* gcc.target/powerpc/pextd-1.c: New.
---
 gcc/config/rs6000/rs6000-builtin.def   |  2 +
 gcc/config/rs6000/rs6000.md| 20 +
 gcc/doc/extend.texi| 16 +++
 gcc/testsuite/gcc.target/powerpc/pdep-0.c  | 48 +
 gcc/testsuite/gcc.target/powerpc/pdep-1.c  | 48 +
 gcc/testsuite/gcc.target/powerpc/pextd-0.c | 50 ++
 gcc/testsuite/gcc.target/powerpc/pextd-1.c | 49 +
 7 files changed, 233 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pdep-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pdep-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pextd-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pextd-1.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 39e7da5fa50..4b06323a07f 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2577,6 +2577,8 @@ BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set")
 BU_FUTURE_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
 BU_FUTURE_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
 BU_FUTURE_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
+BU_FUTURE_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
+BU_FUTURE_MISC_2 (PEXTD, "pextd", CONST, pextd)
 
 /* Future architecture vector built-ins.  */
 BU_FUTURE_V_2 (VCLRLB, "vclrlb", CONST, vclrlb)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index def48204f86..25af555a0fb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -151,6 +151,8 @@ (define_c_enum "unspec"
UNSPEC_CFUGED
UNSPEC_CNTLZDM
UNSPEC_CNTTZDM
+   UNSPEC_PDEPD
+   UNSPEC_PEXTD
   ])
 
 ;;
@@ -2483,6 +2485,24 @@ (define_insn "cnttzdm"
"cnttzdm %0,%1,%2"
[(set_attr "type" "integer")])
 
+(define_insn "pdepd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+  UNSPEC_PDEPD))]
+   "TARGET_FUTURE && TARGET_POWERPC64"
+   "pdepd %0,%1,%2"
+   [(set_attr "type" "integer")])
+
+(define_insn "pextd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+  UNSPEC_PEXTD))]
+   "TARGET_FUTURE && TARGET_POWERPC64"
+   "pextd %0,%1,%2"
+   [(set_attr "type" "integer")])
+
 (define_insn "cmpb3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 104397823b0..9602a310cbb 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -17572,6 +17572,22 @@ Perform a 64-bit count trailing zeros operation under 
mask, as if
 implemented by the future @code{cnttzdm} instruction.
 @findex __builtin_cnttzdm
 
+@smallexample
+@exdent unsigned long long int
+@exdent __builtin_pdepd (unsigned long long int, unsigned long long int)
+@end smallexample
+Perform a 64-bit parallel bits deposit operation, as if implemented by the
+Future @code{pdepd} instruction.
+@findex __builtin_pdepd
+
+@smallexample
+@exdent unsigned long long int
+@exdent __builtin_pextd (unsigned long long int, unsigned long long int)
+@end smallexample
+Perform a 64-bit parallel bits extract operation, as if implemented by the
+Future @code{pextd} instruction.
+@findex __builtin_pextd
+
 @node PowerPC AltiVec/VSX Built-in Functions
 @subsection PowerPC AltiVec/VSX Built-in Functions
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pdep-0.c 
b/gcc/testsuite/gcc.target/powerpc/pdep-0.c
new file mode 100644
index 000..5c6afb33169
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pdep-0.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc64 } */
+/* { dg-options "-mcpu=future" } */
+
+extern void abort (void);
+
+unsigned long long int
+do_pdepd (unsigned long long int source, 

Re: [PATCH] rs6000: Add pdepd and pextd

2020-05-08 Thread Bill Schmidt via Gcc-patches

Please ignore, I sent the wrong ChangeLog.  Will try again momentarily.

Sorry,
Bill

On 5/8/20 3:05 PM, Bill Schmidt via Gcc-patches wrote:

From: Kelvin Nilsen 

Add scalar instructions for parallel bit deposit and extract, with
built-in function support.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_clrl): New #define.
(vec_clrr): Likewise.
* config/rs6000/altivec.md (UNSPEC_VCLRLB): New constant.
(UNSPEC_VCLRRB): Likewise.
(vclrlb): New insn.
(vclrrb): Likewise.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vclrlb): New
built-in function.
(__builtin_altivec_vclrrb): Likewise.
(__builtin_vec_clrl): New overloaded built-in function.
(__builtin_vec_clrr): Likewise.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Define overloaded forms of __builtin_vec_clrl and
__builtin_vec_clrr.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add descriptions of vec_clrl and
vec_clrr.

[gcc/testsuite]

2020-05-08  Kelvin Nilsen  

* gcc.target/powerpc/vec-clrl-0.c: New.
* gcc.target/powerpc/vec-clrl-1.c: New.
* gcc.target/powerpc/vec-clrr-0.c: New.
* gcc.target/powerpc/vec-clrr-1.c: New.
---
  gcc/config/rs6000/rs6000-builtin.def   |  2 +
  gcc/config/rs6000/rs6000.md| 20 +
  gcc/doc/extend.texi| 16 +++
  gcc/testsuite/gcc.target/powerpc/pdep-0.c  | 48 +
  gcc/testsuite/gcc.target/powerpc/pdep-1.c  | 48 +
  gcc/testsuite/gcc.target/powerpc/pextd-0.c | 50 ++
  gcc/testsuite/gcc.target/powerpc/pextd-1.c | 49 +
  7 files changed, 233 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pdep-0.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pdep-1.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pextd-0.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pextd-1.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 39e7da5fa50..4b06323a07f 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2577,6 +2577,8 @@ BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set")
  BU_FUTURE_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
  BU_FUTURE_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
  BU_FUTURE_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
+BU_FUTURE_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
+BU_FUTURE_MISC_2 (PEXTD, "pextd", CONST, pextd)

  /* Future architecture vector built-ins.  */
  BU_FUTURE_V_2 (VCLRLB, "vclrlb", CONST, vclrlb)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index def48204f86..25af555a0fb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -151,6 +151,8 @@ (define_c_enum "unspec"
 UNSPEC_CFUGED
 UNSPEC_CNTLZDM
 UNSPEC_CNTTZDM
+   UNSPEC_PDEPD
+   UNSPEC_PEXTD
])

  ;;
@@ -2483,6 +2485,24 @@ (define_insn "cnttzdm"
 "cnttzdm %0,%1,%2"
 [(set_attr "type" "integer")])

+(define_insn "pdepd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+  UNSPEC_PDEPD))]
+   "TARGET_FUTURE && TARGET_POWERPC64"
+   "pdepd %0,%1,%2"
+   [(set_attr "type" "integer")])
+
+(define_insn "pextd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+  UNSPEC_PEXTD))]
+   "TARGET_FUTURE && TARGET_POWERPC64"
+   "pextd %0,%1,%2"
+   [(set_attr "type" "integer")])
+
  (define_insn "cmpb3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 104397823b0..9602a310cbb 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -17572,6 +17572,22 @@ Perform a 64-bit count trailing zeros operation under 
mask, as if
  implemented by the future @code{cnttzdm} instruction.
  @findex __builtin_cnttzdm

+@smallexample
+@exdent unsigned long long int
+@exdent __builtin_pdepd (unsigned long long int, unsigned long long int)
+@end smallexample
+Perform a 64-b

[PATCH] rs6000: Add pdepd and pextd

2020-05-08 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add scalar instructions for parallel bit deposit and extract, with
built-in function support.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_clrl): New #define.
(vec_clrr): Likewise.
* config/rs6000/altivec.md (UNSPEC_VCLRLB): New constant.
(UNSPEC_VCLRRB): Likewise.
(vclrlb): New insn.
(vclrrb): Likewise.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vclrlb): New
built-in function.
(__builtin_altivec_vclrrb): Likewise.
(__builtin_vec_clrl): New overloaded built-in function.
(__builtin_vec_clrr): Likewise.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Define overloaded forms of __builtin_vec_clrl and
__builtin_vec_clrr.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add descriptions of vec_clrl and
vec_clrr.

[gcc/testsuite]

2020-05-08  Kelvin Nilsen  

* gcc.target/powerpc/vec-clrl-0.c: New.
* gcc.target/powerpc/vec-clrl-1.c: New.
* gcc.target/powerpc/vec-clrr-0.c: New.
* gcc.target/powerpc/vec-clrr-1.c: New.
---
 gcc/config/rs6000/rs6000-builtin.def   |  2 +
 gcc/config/rs6000/rs6000.md| 20 +
 gcc/doc/extend.texi| 16 +++
 gcc/testsuite/gcc.target/powerpc/pdep-0.c  | 48 +
 gcc/testsuite/gcc.target/powerpc/pdep-1.c  | 48 +
 gcc/testsuite/gcc.target/powerpc/pextd-0.c | 50 ++
 gcc/testsuite/gcc.target/powerpc/pextd-1.c | 49 +
 7 files changed, 233 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pdep-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pdep-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pextd-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pextd-1.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 39e7da5fa50..4b06323a07f 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2577,6 +2577,8 @@ BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set")
 BU_FUTURE_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
 BU_FUTURE_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
 BU_FUTURE_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
+BU_FUTURE_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
+BU_FUTURE_MISC_2 (PEXTD, "pextd", CONST, pextd)
 
 /* Future architecture vector built-ins.  */
 BU_FUTURE_V_2 (VCLRLB, "vclrlb", CONST, vclrlb)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index def48204f86..25af555a0fb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -151,6 +151,8 @@ (define_c_enum "unspec"
UNSPEC_CFUGED
UNSPEC_CNTLZDM
UNSPEC_CNTTZDM
+   UNSPEC_PDEPD
+   UNSPEC_PEXTD
   ])
 
 ;;
@@ -2483,6 +2485,24 @@ (define_insn "cnttzdm"
"cnttzdm %0,%1,%2"
[(set_attr "type" "integer")])
 
+(define_insn "pdepd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+  UNSPEC_PDEPD))]
+   "TARGET_FUTURE && TARGET_POWERPC64"
+   "pdepd %0,%1,%2"
+   [(set_attr "type" "integer")])
+
+(define_insn "pextd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+  UNSPEC_PEXTD))]
+   "TARGET_FUTURE && TARGET_POWERPC64"
+   "pextd %0,%1,%2"
+   [(set_attr "type" "integer")])
+
 (define_insn "cmpb3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 104397823b0..9602a310cbb 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -17572,6 +17572,22 @@ Perform a 64-bit count trailing zeros operation under 
mask, as if
 implemented by the future @code{cnttzdm} instruction.
 @findex __builtin_cnttzdm
 
+@smallexample
+@exdent unsigned long long int
+@exdent __builtin_pdepd (unsigned long long int, unsigned long long int)
+@end smallexample
+Perform a 64-bit parallel bits deposit operation, as if implemented by the
+Future @code{pdepd} instruction.
+@findex __builtin_pdepd
+
+@smallexample
+@exdent unsigned long long int
+@exdent __builtin_pextd (unsigned long long int, unsigned long long int)
+@end smallexample
+Perform a 64-bit parallel bits extract operation, as if implemented by the
+Future @code{pextd} instruction.
+@findex __builtin_pextd
+
 @node PowerPC AltiVec/VSX Built-in Functions
 @subsection PowerPC AltiVec/VSX Built-in Functions
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pdep-0.c 

Re: [PATCH] rs6000: Add vector count under mask

2020-05-08 Thread Bill Schmidt via Gcc-patches

On 5/8/20 2:00 PM, Segher Boessenkool wrote:

On Thu, May 07, 2020 at 09:11:32PM -0500, Bill Schmidt wrote:

From: Kelvin Nilsen 

Add support for new vclzdm and vctzdm vector instructions that
count leading and trailing zeros under control of a mask.
Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

(On what CPU / with what -mcpu= settings?)



Sorry for lack of clarity.  All of these patches are tested on a P9.  
The test cases have appropriate -mcpu= settings.  Those run-time tests 
requiring an architecture that supports these instructions show up as 
UNSUPPORTED in that configuration, of course.  My understanding is that 
Kelvin ran these tests on a simulator, but I do not know that for 
certain and haven't repeated those tests.  Any problems that may have 
crept in since then will get caught at such time that hardware is available.





diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 6b1d987913c..5ef4889ba55 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -160,6 +160,8 @@ (define_c_enum "unspec"
 UNSPEC_BCD_OVERFLOW
 UNSPEC_VRLMI
 UNSPEC_VRLNM
+   UNSPEC_VCLZDM
+   UNSPEC_VCTZDM

Hrm, this can actually be the same unspecs as used for the GPR version,
the mode will make the difference already?  Doesn't really matter of
course.

True!


(This needs an unspec because it isn't viable to describe in RTL what
this op does -- it is not an AND  with the mask and then a count, the
masked-out bits are actually skipped for the count).

Looks fine to me, thanks,


Thanks,
Bill




Segher


[PATCH] rs6000: Add vclrlb and vclrrb

2020-05-08 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add new vector instructions to clear leftmost and rightmost bytes.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_clrl): New #define.
(vec_clrr): Likewise.
* config/rs6000/altivec.md (UNSPEC_VCLRLB): New constant.
(UNSPEC_VCLRRB): Likewise.
(vclrlb): New insn.
(vclrrb): Likewise.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vclrlb): New
built-in function.
(__builtin_altivec_vclrrb): Likewise.
(__builtin_vec_clrl): New overloaded built-in function.
(__builtin_vec_clrr): Likewise.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Define overloaded forms of __builtin_vec_clrl and
__builtin_vec_clrr.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add descriptions of vec_clrl and
vec_clrr.

[gcc/testsuite]

2020-05-08  Kelvin Nilsen  

* gcc.target/powerpc/vec-clrl-0.c: New.
* gcc.target/powerpc/vec-clrl-1.c: New.
* gcc.target/powerpc/vec-clrr-0.c: New.
* gcc.target/powerpc/vec-clrr-1.c: New.
---
 gcc/config/rs6000/altivec.h   |  2 +
 gcc/config/rs6000/altivec.md  | 29 +++
 gcc/config/rs6000/rs6000-builtin.def  |  4 ++
 gcc/config/rs6000/rs6000-call.c   | 11 ++
 gcc/config/rs6000/rs6000.md   |  2 +-
 gcc/doc/extend.texi   | 24 
 gcc/testsuite/gcc.target/powerpc/vec-clrl-0.c | 16 
 gcc/testsuite/gcc.target/powerpc/vec-clrl-1.c | 37 +++
 gcc/testsuite/gcc.target/powerpc/vec-clrl-2.c | 16 
 gcc/testsuite/gcc.target/powerpc/vec-clrl-3.c | 37 +++
 gcc/testsuite/gcc.target/powerpc/vec-clrr-0.c | 16 
 gcc/testsuite/gcc.target/powerpc/vec-clrr-1.c | 37 +++
 gcc/testsuite/gcc.target/powerpc/vec-clrr-2.c | 16 
 gcc/testsuite/gcc.target/powerpc/vec-clrr-3.c | 37 +++
 14 files changed, 283 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clrl-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clrl-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clrl-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clrl-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clrr-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clrr-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clrr-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clrr-3.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 0ecd961485b..74319f13fa6 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -697,6 +697,8 @@ __altivec_scalar_pred(vec_any_nle,
 
 /* Overloaded built-in functions for future architecture.  */
 #define vec_gnb(a, b)  __builtin_vec_gnb (a, b)
+#define vec_clrl(a, b) __builtin_vec_clrl (a, b)
+#define vec_clrr(a, b) __builtin_vec_clrr (a, b)
 #endif
 
 #endif /* _ALTIVEC_H */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1400724fb58..11d2dfe9426 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -166,6 +166,8 @@ (define_c_enum "unspec"
UNSPEC_VGNB
UNSPEC_VPDEPD
UNSPEC_VPEXTD
+   UNSPEC_VCLRLB
+   UNSPEC_VCLRRB
 ])
 
 (define_c_enum "unspecv"
@@ -4156,6 +4158,33 @@ (define_insn "vgnb"
"vgnb %0,%1,%2"
[(set_attr "type" "vecsimple")])
 
+(define_insn "vclrlb"
+  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
+   (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v")
+  (match_operand:SI 2 "gpc_reg_operand" "r")]
+UNSPEC_VCLRLB))]
+   "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+return "vclrlb %0,%1,%2";
+  else
+return "vclrrb %0,%1,%2";
+}
+   [(set_attr "type" "vecsimple")])
+
+(define_insn "vclrrb"
+  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
+   (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v")
+  (match_operand:SI 2 "gpc_reg_operand" "r")]
+UNSPEC_VCLRRB))]
+   "TARGET_FUTURE"
+{
+  if (BYTES_BIG_ENDIAN)
+return "vclrrb %0,%1,%2";
+  else
+return "vclrlb %0,%1,%2";
+}
+   [(set_attr "type" "vecsimple")])
 
 (define_expand "bcd_"
   [(parallel [(set (reg:CCFP CR6_REGNO)
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index c05d9f53d54..39e7da5fa50 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2579,6 +2579,8 @@ BU_FUTURE_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
 BU_FUTURE_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
 
 /* Future architecture vector built-ins.  */

[PATCH] rs6000: Add cntlzdm and cnttzdm

2020-05-08 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add support for new scalar instructions for counting leading or
trailing zeros under control of a bitmask.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/rs6000-builtin.def (__builtin_cntlzdm): New
built-in function definition.
(__builtin_cnttzdm): Likewise.,
* config/rs6000/rs6000.md (UNSPEC_CNTLZDM): New constant.
(UNSPEC_CNTTZDM): Likewise.
(cntlzdm): New insn.
(cnttzdm): Likewise.
* doc/extend.texi (Basic PowerPC Built-in Functions available for
a Future Architecture): Add descriptions of __builtin_cntlzdm and
__builtin_cnttzdm functions.

[gcc/testsuite]

2020-05-08  Kelvin Nilsen  

* gcc.target/powerpc/cntlzdm-0.c: New test.
* gcc.target/powerpc/cntlzdm-1.c: New test.
* gcc.target/powerpc/cnttzdm-0.c: New test.
* gcc.target/powerpc/cnttzdm-1.c: New test.
---
 gcc/config/rs6000/rs6000-builtin.def |  2 +
 gcc/config/rs6000/rs6000.md  | 20 +++
 gcc/doc/extend.texi  | 16 ++
 gcc/testsuite/gcc.target/powerpc/cntlzdm-0.c | 57 
 gcc/testsuite/gcc.target/powerpc/cntlzdm-1.c | 56 +++
 gcc/testsuite/gcc.target/powerpc/cnttzdm-0.c | 53 ++
 gcc/testsuite/gcc.target/powerpc/cnttzdm-1.c | 53 ++
 7 files changed, 257 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/cntlzdm-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/cntlzdm-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/cnttzdm-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/cnttzdm-1.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 9d80d03fe50..c05d9f53d54 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2575,6 +2575,8 @@ BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set")
 
 /* Future architecture scalar built-ins.  */
 BU_FUTURE_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
+BU_FUTURE_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
+BU_FUTURE_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
 
 /* Future architecture vector built-ins.  */
 BU_FUTURE_V_2 (VCFUGED, "vcfuged", CONST, vcfuged)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 001ec27f403..19b3d7263d3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -149,6 +149,8 @@ (define_c_enum "unspec"
UNSPEC_PLTSEQ
UNSPEC_PLT16_HA
UNSPEC_CFUGED
+   UNSPEC_CNTLZDM
+   UNSPEC_CNTTZDM
   ])
 
 ;;
@@ -2463,6 +2465,24 @@ (define_insn "cfuged"
"cfuged %0,%1,%2"
[(set_attr "type" "integer")])
 
+(define_insn "cntlzdm"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+UNSPEC_CNTLZDM))]
+   "TARGET_FUTURE && TARGET_64BIT"
+   "cntlzdm %0,%1,%2"
+   [(set_attr "type" "integer")])
+
+(define_insn "cnttzdm"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+UNSPEC_CNTTZDM))]
+   "TARGET_FUTURE && TARGET_64BIT"
+   "cnttzdm %0,%1,%2"
+   [(set_attr "type" "integer")])
+
 (define_insn "cmpb3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index f7b30c7ead7..8c833d39fec 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -17556,6 +17556,22 @@ Perform a 64-bit centrifuge operation, as if 
implemented by the Future
 @code{cfuged} instruction.
 @findex __builtin_cfuged
 
+@smallexample
+@exdent unsigned long long int
+@exdent __builtin_cntlzdm (unsigned long long int, unsigned long long int)
+@end smallexample
+Perform a 64-bit count leading zeros operation under mask, as if
+implemented by the future @code{cntlzdm} instruction.
+@findex __builtin_cntlzdm
+
+@smallexample
+@exdent unsigned long long int
+@exdent __builtin_cnttzdm (unsigned long long int, unsigned long long int)
+@end smallexample
+Perform a 64-bit count trailing zeros operation under mask, as if
+implemented by the future @code{cnttzdm} instruction.
+@findex __builtin_cnttzdm
+
 @node PowerPC AltiVec/VSX Built-in Functions
 @subsection PowerPC AltiVec/VSX Built-in Functions
 
diff --git a/gcc/testsuite/gcc.target/powerpc/cntlzdm-0.c 
b/gcc/testsuite/gcc.target/powerpc/cntlzdm-0.c
new file mode 100644
index 000..e3ce63c3447
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/cntlzdm-0.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future" } */
+
+extern void abort (void);
+
+unsigned long long int

[PATCH] rs6000: Add vcfuged instruction

2020-05-08 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add the new vector centrifuge-doubleword instruction and built-in
function access.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/altivec.h (vec_cfuge): New #define.
* config/rs6000/altivec.md (UNSPEC_VCFUGED): New constant.
(vcfuged): New insn.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vcfuged):
New built-in function.
* config/rs6000/rs6000-call.c (builtin_function_type): Add
handling for FUTURE_BUILTIN_VCFUGED case.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add description of vec_cfuge built-in
function.

[gcc/testsuite]

2020-05-08  Kelvin Nilsen  

* gcc.target/powerpc/vec-cfuged-0.c: New test.
* gcc.target/powerpc/vec-cfuged-1.c: New test.
---
 gcc/config/rs6000/altivec.h   |  1 +
 gcc/config/rs6000/altivec.md  | 10 +++
 gcc/config/rs6000/rs6000-builtin.def  |  1 +
 gcc/config/rs6000/rs6000-call.c   |  1 +
 gcc/doc/extend.texi   |  9 +++
 .../gcc.target/powerpc/vec-cfuged-0.c | 61 +++
 .../gcc.target/powerpc/vec-cfuged-1.c | 60 ++
 7 files changed, 143 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-cfuged-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-cfuged-1.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index b6ecad6911d..0ecd961485b 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -693,6 +693,7 @@ __altivec_scalar_pred(vec_any_nle,
 #define vec_ctzm(a, b) __builtin_altivec_vctzdm (a, b)
 #define vec_pdep(a, b) __builtin_altivec_vpdepd (a, b)
 #define vec_pext(a, b) __builtin_altivec_vpextd (a, b)
+#define vec_cfuge(a, b)__builtin_altivec_vcfuged (a, b)
 
 /* Overloaded built-in functions for future architecture.  */
 #define vec_gnb(a, b)  __builtin_vec_gnb (a, b)
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7cebb58331e..1400724fb58 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -160,6 +160,7 @@ (define_c_enum "unspec"
UNSPEC_BCD_OVERFLOW
UNSPEC_VRLMI
UNSPEC_VRLNM
+   UNSPEC_VCFUGED
UNSPEC_VCLZDM
UNSPEC_VCTZDM
UNSPEC_VGNB
@@ -4101,6 +4102,15 @@ (define_insn "*bcd_test2"
   "bcd. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "vcfuged"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VCFUGED))]
+   "TARGET_FUTURE"
+   "vcfuged %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
 (define_insn "vclzdm"
   [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
(unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index afc8487515f..9d80d03fe50 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2577,6 +2577,7 @@ BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set")
 BU_FUTURE_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
 
 /* Future architecture vector built-ins.  */
+BU_FUTURE_V_2 (VCFUGED, "vcfuged", CONST, vcfuged)
 BU_FUTURE_V_2 (VCLZDM, "vclzdm", CONST, vclzdm)
 BU_FUTURE_V_2 (VCTZDM, "vctzdm", CONST, vctzdm)
 BU_FUTURE_V_2 (VPDEPD, "vpdepd", CONST, vpdepd)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 952f17c6854..c3ba6b93c46 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -12951,6 +12951,7 @@ builtin_function_type (machine_mode mode_ret, 
machine_mode mode_arg0,
 case P8V_BUILTIN_ORC_V4SI_UNS:
 case P8V_BUILTIN_ORC_V2DI_UNS:
 case P8V_BUILTIN_ORC_V1TI_UNS:
+case FUTURE_BUILTIN_VCFUGED:
 case FUTURE_BUILTIN_VCLZDM:
 case FUTURE_BUILTIN_VCTZDM:
 case FUTURE_BUILTIN_VGNB:
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8b5a51a6973..f7b30c7ead7 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -20719,6 +20719,15 @@ PowerPC family of processors, starting with a 
hypothetical CPU
 which may or may not be available in the future
 (@option{-mcpu=future}) or later:
 
+
+@smallexample
+@exdent vector unsigned long long int
+@exdent vec_cfuge (vector unsigned long long int, vector unsigned long long 
int)
+@end smallexample
+Perform a vector centrifuge operation, as if implemented by the Future
+@code{vcfuged} instruction.
+@findex vec_cfuge
+
 @smallexample
 @exdent vector unsigned long long int
 @exdent vec_clzm (vector unsigned long long int, vector unsigned long long int)
diff --git 

[PATCH] rs6000: Add scalar cfuged instruction

2020-05-08 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add the centrifuge-doubleword instruction and built-in access.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-08  Kelvin Nilsen  

* config/rs6000/rs6000-builtin.def (BU_FUTURE_MISC_0): New
#define.
(BU_FUTURE_MISC_1): Likewise.
(BU_FUTURE_MISC_2): Likewise.
(BU_FUTURE_MISC_3): Likewise.
(__builtin_cfuged): New built-in function definition.
* config/rs6000/rs6000.md (UNSPEC_CFUGED): New constant.
(cfuged): New insn.
* doc/extend.texi (Basic PowerPC Built-in Functions Available for
a Future Architecture): New subsubsection.

[gcc/testsuite]

2020-05-08  Kelvin Nilsen  

* gcc.target.powerpc/cfuged-0.c: New test.
* gcc.target.powerpc/cfuged-1.c: New test.
---
 gcc/config/rs6000/rs6000-builtin.def| 37 +++
 gcc/config/rs6000/rs6000.md | 10 
 gcc/doc/extend.texi | 22 +
 gcc/testsuite/gcc.target/powerpc/cfuged-0.c | 51 +
 gcc/testsuite/gcc.target/powerpc/cfuged-1.c | 50 
 5 files changed, 170 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/cfuged-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/cfuged-1.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index e25249b5418..afc8487515f 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -993,6 +993,40 @@
 | RS6000_BTC_TERNARY), \
CODE_FOR_nothing)   /* ICODE */
 
+/* Miscellaneous (non-vector) builtins for instructions which may be
+   added at some point in the future.  */
+
+#define BU_FUTURE_MISC_0(ENUM, NAME, ATTR, ICODE)  \
+  RS6000_BUILTIN_0 (FUTURE_BUILTIN_ ## ENUM,   /* ENUM */  \
+   "__builtin_" NAME,  /* NAME */  \
+   RS6000_BTM_FUTURE,  /* MASK */  \
+   (RS6000_BTC_ ## ATTR/* ATTR */  \
+| RS6000_BTC_SPECIAL), \
+   CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_FUTURE_MISC_1(ENUM, NAME, ATTR, ICODE)  \
+  RS6000_BUILTIN_1 (FUTURE_BUILTIN_ ## ENUM,   /* ENUM */  \
+   "__builtin_" NAME,  /* NAME */  \
+   RS6000_BTM_FUTURE,  /* MASK */  \
+   (RS6000_BTC_ ## ATTR/* ATTR */  \
+| RS6000_BTC_UNARY),   \
+   CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_FUTURE_MISC_2(ENUM, NAME, ATTR, ICODE)  \
+  RS6000_BUILTIN_2 (FUTURE_BUILTIN_ ## ENUM,   /* ENUM */  \
+   "__builtin_" NAME,  /* NAME */  \
+   RS6000_BTM_FUTURE,  /* MASK */  \
+   (RS6000_BTC_ ## ATTR/* ATTR */  \
+| RS6000_BTC_BINARY),  \
+   CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_FUTURE_MISC_3(ENUM, NAME, ATTR, ICODE)  \
+  RS6000_BUILTIN_3 (FUTURE_BUILTIN_ ## ENUM,   /* ENUM */  \
+   "__builtin_" NAME,  /* NAME */  \
+   RS6000_BTM_FUTURE,  /* MASK */  \
+   (RS6000_BTC_ ## ATTR/* ATTR */  \
+| RS6000_BTC_TERNARY), \
+   CODE_FOR_ ## ICODE) /* ICODE */
 #endif
 
 
@@ -2539,6 +2573,9 @@ BU_P9_OVERLOAD_2 (CMPRB,  "byte_in_range")
 BU_P9_OVERLOAD_2 (CMPRB2,  "byte_in_either_range")
 BU_P9_OVERLOAD_2 (CMPEQB,  "byte_in_set")
 
+/* Future architecture scalar built-ins.  */
+BU_FUTURE_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
+
 /* Future architecture vector built-ins.  */
 BU_FUTURE_V_2 (VCLZDM, "vclzdm", CONST, vclzdm)
 BU_FUTURE_V_2 (VCTZDM, "vctzdm", CONST, vctzdm)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c02c2e1de72..001ec27f403 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -148,6 +148,7 @@ (define_c_enum "unspec"
UNSPEC_SI_FROM_SF
UNSPEC_PLTSEQ
UNSPEC_PLT16_HA
+   UNSPEC_CFUGED
   ])
 
 ;;
@@ -2453,6 +2454,15 @@ (define_insn "parity2_cmpb"
   "prty %0,%1"
   [(set_attr "type" "popcnt")])
 
+(define_insn "cfuged"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r")
+   (match_operand:DI 2 "gpc_reg_operand" "r")]
+UNSPEC_CFUGED))]
+   "TARGET_FUTURE 

[PATCH] rs6000: Add vgnb

2020-05-07 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add support for the vgnb instruction, which gathers every Nth bit
per vector element.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-07  Kelvin Nilsen  
Bill Schmidt  

* config/rs6000/altivec.h (vec_gnb): New #define.
* config/rs6000/altivec.md (UNSPEC_VGNB): New constant.
(vgnb): New insn.
* config/rs6000/rs6000-builtin.def (BU_FUTURE_OVERLOAD_1): New
#define.
(BU_FUTURE_OVERLOAD_2): Likewise.
(BU_FUTURE_OVERLOAD_3): Likewise.
(__builtin_altivec_gnb): New built-in function.
(__buiiltin_vec_gnb): New overloaded built-in function.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Define overloaded forms of __builtin_vec_gnb.
(rs6000_expand_binop_builtin): Add error checking for 2nd argument
of __builtin_vec_gnb.
(builtin_function_type): Mark return value and arguments unsigned
for FUTURE_BUILTIN_VGNB.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add description of vec_gnb built-in
function.

[gcc/testsuite]

2020-05-07  Kelvin Nilsen  
Bill Schmidt  

* gcc.target/powerpc/vec-gnb-0.c: New test.
* gcc.target/powerpc/vec-gnb-1.c: New test.
* gcc.target/powerpc/vec-gnb-10.c: New test.
* gcc.target/powerpc/vec-gnb-2.c: New test.
* gcc.target/powerpc/vec-gnb-3.c: New test.
* gcc.target/powerpc/vec-gnb-4.c: New test.
* gcc.target/powerpc/vec-gnb-5.c: New test.
* gcc.target/powerpc/vec-gnb-6.c: New test.
* gcc.target/powerpc/vec-gnb-7.c: New test.
* gcc.target/powerpc/vec-gnb-8.c: New test.
* gcc.target/powerpc/vec-gnb-9.c: New test.
---
 gcc/config/rs6000/altivec.h   |  2 +
 gcc/config/rs6000/altivec.md  | 10 +++
 gcc/config/rs6000/rs6000-builtin.def  | 29 +++
 gcc/config/rs6000/rs6000-call.c   | 26 +++
 gcc/doc/extend.texi   | 18 -
 gcc/testsuite/gcc.target/powerpc/vec-gnb-0.c  | 75 +++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-1.c  | 75 +++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-10.c | 72 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-2.c  | 73 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-3.c  | 72 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-4.c  | 71 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-5.c  | 71 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-6.c  | 71 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-7.c  | 71 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-8.c  | 75 +++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-9.c  | 74 ++
 16 files changed, 884 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-10.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-5.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-6.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-7.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-8.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-9.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 12dfcd8d2bf..b6ecad6911d 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -694,6 +694,8 @@ __altivec_scalar_pred(vec_any_nle,
 #define vec_pdep(a, b) __builtin_altivec_vpdepd (a, b)
 #define vec_pext(a, b) __builtin_altivec_vpextd (a, b)
 
+/* Overloaded built-in functions for future architecture.  */
+#define vec_gnb(a, b)  __builtin_vec_gnb (a, b)
 #endif
 
 #endif /* _ALTIVEC_H */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 33ba57855bc..7cebb58331e 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -162,6 +162,7 @@ (define_c_enum "unspec"
UNSPEC_VRLNM
UNSPEC_VCLZDM
UNSPEC_VCTZDM
+   UNSPEC_VGNB
UNSPEC_VPDEPD
UNSPEC_VPEXTD
 ])
@@ -4136,6 +4137,15 @@ (define_insn "vpextd"
"vpextd %0,%1,%2"
[(set_attr "type" "vecsimple")])
 
+(define_insn "vgnb"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(unspec:DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+   (match_operand:QI 2 "u3bit_cint_operand" "n")]
+  

[PATCH] rs6000: Add pdep/pext

2020-05-07 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add support for the vpdepd and vpextd instructions which perform
vector parallel bit deposit and vector parallel bit extract.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

2020-05-07  Kelvin Nilsen  
Bill Schmidt  

* config/rs6000/altivec.h (vec_pdep): New macro implementing new
built-in function.
(vec_pext): Likewise.
* config/rs6000/altivec.md (UNSPEC_VPDEPD): New constant.
(UNSPEC_VPEXTD): Likewise.
(vpdepd): New insn.
(vpextd): Likewise.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vpdepd): New
built-in function.
(__builtin_altivec_vpextd): Likewise.
* config/rs6000/rs6000-call.c (builtin_function_type): Add
handling for FUTURE_BUILTIN_VPDEPD and FUTURE_BUILTIN_VPEXTD
cases.
* doc/extend.texi (PowerPC Altivec Built-in Functions Available
for a Future Architecture): Add description of vec_pdep and
vec_pext built-in functions.

2020-05-07  Kelvin Nilsen  

* gcc.target/powerpc/vec-pdep-0.c: New.
* gcc.target/powerpc/vec-pdep-1.c: New.
* gcc.target/powerpc/vec-pext-0.c: New.
* gcc.target/powerpc/vec-pext-1.c: New.
---
 gcc/config/rs6000/altivec.h   |  3 +
 gcc/config/rs6000/altivec.md  | 20 ++
 gcc/config/rs6000/rs6000-builtin.def  |  2 +
 gcc/config/rs6000/rs6000-call.c   |  2 +
 gcc/doc/extend.texi   | 15 +
 gcc/testsuite/gcc.target/powerpc/vec-pdep-0.c | 61 +++
 gcc/testsuite/gcc.target/powerpc/vec-pdep-1.c | 53 
 gcc/testsuite/gcc.target/powerpc/vec-pext-0.c | 53 
 gcc/testsuite/gcc.target/powerpc/vec-pext-1.c | 52 
 9 files changed, 261 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-pdep-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-pdep-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-pext-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-pext-1.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index e1e75ad0f1e..12dfcd8d2bf 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -691,6 +691,9 @@ __altivec_scalar_pred(vec_any_nle,
with support for different vector argument and result types.  */
 #define vec_clzm(a, b) __builtin_altivec_vclzdm (a, b)
 #define vec_ctzm(a, b) __builtin_altivec_vctzdm (a, b)
+#define vec_pdep(a, b) __builtin_altivec_vpdepd (a, b)
+#define vec_pext(a, b) __builtin_altivec_vpextd (a, b)
+
 #endif
 
 #endif /* _ALTIVEC_H */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 5ef4889ba55..33ba57855bc 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -162,6 +162,8 @@ (define_c_enum "unspec"
UNSPEC_VRLNM
UNSPEC_VCLZDM
UNSPEC_VCTZDM
+   UNSPEC_VPDEPD
+   UNSPEC_VPEXTD
 ])
 
 (define_c_enum "unspecv"
@@ -4116,6 +4118,24 @@ (define_insn "vctzdm"
"vctzdm %0,%1,%2"
[(set_attr "type" "vecsimple")])
 
+(define_insn "vpdepd"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VPDEPD))]
+   "TARGET_FUTURE"
+   "vpdepd %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
+(define_insn "vpextd"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VPEXTD))]
+   "TARGET_FUTURE"
+   "vpextd %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
 
 (define_expand "bcd_"
   [(parallel [(set (reg:CCFP CR6_REGNO)
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 9293e7cf4fb..776fc542ebf 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2518,6 +2518,8 @@ BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set")
 /* Future architecture vector built-ins.  */
 BU_FUTURE_V_2 (VCLZDM, "vclzdm", CONST, vclzdm)
 BU_FUTURE_V_2 (VCTZDM, "vctzdm", CONST, vctzdm)
+BU_FUTURE_V_2 (VPDEPD, "vpdepd", CONST, vpdepd)
+BU_FUTURE_V_2 (VPEXTD, "vpextd", CONST, vpextd)
 
 /* 1 argument crypto functions.  */
 BU_CRYPTO_1 (VSBOX,"vsbox",  CONST, crypto_vsbox_v2di)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gc

[PATCH] rs6000: Add vector count under mask

2020-05-07 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add support for new vclzdm and vctzdm vector instructions that
count leading and trailing zeros under control of a mask.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-07  Kelvin Nilsen  
Bill Schmidt  

* config/rs6000/altivec.h (vec_clzm): New macro.
(vec_ctzm): Likewise.
* config/rs6000/altivec.md (UNSPEC_VCLZDM): New constant.
(UNSPEC_VCTZDM): Likewise.
(vclzdm): New insn.
(vctzdm): Likewise.
* config/rs6000/rs6000-builtin.def (BU_FUTURE_V_0): New macro.
(BU_FUTURE_V_1): Likewise.
(BU_FUTURE_V_2): Likewise.
(BU_FUTURE_V_3): Likewise.
(__builtin_altivec_vclzdm): New builtin definition.
(__builtin_altivec_vctzdm): Likewise.
* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Cause
_ARCH_PWR_FUTURE macro to be defined if OPTION_MASK_FUTURE flag is
set.
* config/rs6000/rs6000-call.c (builtin_function_type): Set return
value and parameter types to be unsigned for VCLZDM and VCTZDM.
* config/rs6000/rs6000.c (rs6000_builtin_mask_calculate): Add
support for TARGET_FUTURE flag.
* config/rs6000/rs6000.h (RS6000_BTM_FUTURE): New macro constant.
* doc/extend.texi (PowerPC Altivec Built-in Functions Available
for a Future Architecture): New subsubsection.

[gcc/testsuite]

2020-05-07  Kelvin Nilsen  

* gcc.target/powerpc/vec-clzm-0.c: New test.
* gcc.target/powerpc/vec-clzm-1.c: New test.
* gcc.target/powerpc/vec-ctzm-0.c: New test.
* gcc.target/powerpc/vec-ctzm-1.c: New test.
---
 gcc/config/rs6000/altivec.h   |  7 +++
 gcc/config/rs6000/altivec.md  | 21 
 gcc/config/rs6000/rs6000-builtin.def  | 40 ++
 gcc/config/rs6000/rs6000-c.c  |  2 +
 gcc/config/rs6000/rs6000-call.c   |  2 +
 gcc/config/rs6000/rs6000.c|  3 +-
 gcc/config/rs6000/rs6000.h|  2 +
 gcc/doc/extend.texi   | 27 ++
 gcc/testsuite/gcc.target/powerpc/vec-clzm-0.c | 54 +++
 gcc/testsuite/gcc.target/powerpc/vec-clzm-1.c | 54 +++
 gcc/testsuite/gcc.target/powerpc/vec-ctzm-0.c | 54 +++
 gcc/testsuite/gcc.target/powerpc/vec-ctzm-1.c | 53 ++
 12 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clzm-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clzm-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ctzm-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ctzm-1.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 5f1f5924488..e1e75ad0f1e 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -686,4 +686,11 @@ __altivec_scalar_pred(vec_any_nle,
to #define vec_step to __builtin_vec_step.  */
 #define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0)
 
+#ifdef _ARCH_PWR_FUTURE
+/* May modify these macro definitions if future capabilities overload
+   with support for different vector argument and result types.  */
+#define vec_clzm(a, b) __builtin_altivec_vclzdm (a, b)
+#define vec_ctzm(a, b) __builtin_altivec_vctzdm (a, b)
+#endif
+
 #endif /* _ALTIVEC_H */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 6b1d987913c..5ef4889ba55 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -160,6 +160,8 @@ (define_c_enum "unspec"
UNSPEC_BCD_OVERFLOW
UNSPEC_VRLMI
UNSPEC_VRLNM
+   UNSPEC_VCLZDM
+   UNSPEC_VCTZDM
 ])
 
 (define_c_enum "unspecv"
@@ -4096,6 +4098,25 @@ (define_insn "*bcd_test2"
   "bcd. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "vclzdm"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VCLZDM))]
+   "TARGET_FUTURE"
+   "vclzdm %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
+(define_insn "vctzdm"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VCTZDM))]
+   "TARGET_FUTURE"
+   "vctzdm %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
+
 (define_expand "bcd_"
   [(parallel [(set (reg:CCFP CR6_REGNO)
  

[PATCH] rs6000: powerpc_future_ok and powerpc_future_hw

2020-05-07 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Dejagnu targets for these.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Patch shepherded by Bill Schmidt on behalf of Kelvin Nilsen.

Thanks!
Bill

2020-03-04  Kelvin Nilsen  

* gcc.target/powerpc/dg-future-0.c: New.
* gcc.target/powerpc/dg-future-1.c: New.
* lib/target-supports.exp (check_powerpc_future_hw_available):
Replace -mfuture with -mcpu=future.
(check_effective_target_powerpc_future_ok): Likewise.
(is-effective-target): Add powerpc_future_hw.
---
 .../gcc.target/powerpc/dg-future-0.c  | 14 ++
 .../gcc.target/powerpc/dg-future-1.c  | 19 +++
 gcc/testsuite/lib/target-supports.exp |  5 +++--
 3 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dg-future-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dg-future-1.c

diff --git a/gcc/testsuite/gcc.target/powerpc/dg-future-0.c 
b/gcc/testsuite/gcc.target/powerpc/dg-future-0.c
new file mode 100644
index 000..abfda82a684
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dg-future-0.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-options "-mdejagnu-cpu=future" } */
+
+/* This tests that powerpc_future_ok works.  */
+
+extern void abort (void);
+
+int futurity (void) {
+  long int e = -1;
+  asm ("pli %0,%1": "+r" (e) : "n" (0x12345));
+  return (e == 0x12345);
+}
+
+/* { dg-final { scan-assembler {\mpli\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/dg-future-1.c 
b/gcc/testsuite/gcc.target/powerpc/dg-future-1.c
new file mode 100644
index 000..6087fe7c894
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dg-future-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_future_hw } */
+/* { dg-options "-mdejagnu-cpu=future" } */
+
+/* This tests that powerpc_future_hw works.  */
+
+extern void abort (void);
+
+int futurity (void) {
+  long int e = -1;
+  asm ("pli %0,%1": "+r" (e) : "n" (0x12345));
+  return (e == 0x12345);
+}
+
+int main (int argc, char *argv [])
+{
+  if (!futurity ())
+abort ();
+}
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 13761491e63..40978e580a3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2204,7 +2204,7 @@ proc check_powerpc_future_hw_available { } {
asm ("pli %0,%1" : "+r" (e) : "n" (0x12345));
return (e == 0x12345);
}
-   } "-mfuture"
+   } "-mcpu=future"
 }]
 }
 
@@ -5816,7 +5816,7 @@ proc check_effective_target_powerpc_future_ok { } {
asm ("pli %0,%1" : "=r" (e) : "n" (0x12345));
return e;
}
-   } "-mfuture"]
+   } "-mcpu=future"]
 } else {
return 0
 }
@@ -7763,6 +7763,7 @@ proc is-effective-target { arg } {
  "named_sections" { set selected [check_named_sections_available] }
  "gc_sections"{ set selected [check_gc_sections_available] }
  "cxa_atexit" { set selected [check_cxa_atexit_available] }
+ "powerpc_future_hw" { set selected 
[check_powerpc_future_hw_available] }
  default  { error "unknown effective target keyword `$arg'" }
}
 }
-- 
2.17.1



Re: [PATCH 0/4] rs6000: setbnc and friends [pu]

2020-05-06 Thread Bill Schmidt via Gcc-patches

On 5/6/20 6:48 PM, Segher Boessenkool wrote:

On Wed, May 06, 2020 at 03:41:35PM -0500, Bill Schmidt wrote:

For all of these, I forgot to mention that they have been bootstrapped
and tested on powerpc64le-unknown-linux-gnu with no regressions.  Are
these okay for trunk, after GCC 10 is fully released?

These all look fine to me.  But maybe David can find something?  :-)

Thank you for handling this!


Segher



Bill Schmidt (4):
   Add insns for setbc and setbcr
   Add tests for setbc and setbcr
   Add insns for setnbc and setnbcr
   Add tests for setnbc and setnbcr

(I assume you will get the author info correct -- git log --format=fuller
to check, git commit --amend --author= (or similar) to fix things).
Yes, I forgot to set the author on these; will fix them when applying to 
master.


Re: [PATCH 0/4] rs6000: setbnc and friends [pu]

2020-05-06 Thread Bill Schmidt via Gcc-patches
For all of these, I forgot to mention that they have been bootstrapped 
and tested on powerpc64le-unknown-linux-gnu with no regressions.  Are 
these okay for trunk, after GCC 10 is fully released?


Thanks,
Bill

On 5/6/20 3:31 PM, Bill Schmidt via Gcc-patches wrote:

*** BLURB HERE ***

Bill Schmidt (4):
   Add insns for setbc and setbcr
   Add tests for setbc and setbcr
   Add insns for setnbc and setnbcr
   Add tests for setnbc and setnbcr

  gcc/config/rs6000/rs6000.md | 100 +---
  gcc/testsuite/gcc.target/powerpc/setbc.h|  27 ++
  gcc/testsuite/gcc.target/powerpc/setbceq.c  |   9 ++
  gcc/testsuite/gcc.target/powerpc/setbcge.c  |  12 +++
  gcc/testsuite/gcc.target/powerpc/setbcgt.c  |  10 ++
  gcc/testsuite/gcc.target/powerpc/setbcle.c  |  10 ++
  gcc/testsuite/gcc.target/powerpc/setbclt.c  |  12 +++
  gcc/testsuite/gcc.target/powerpc/setbcne.c  |   9 ++
  gcc/testsuite/gcc.target/powerpc/setnbc.h   |  27 ++
  gcc/testsuite/gcc.target/powerpc/setnbceq.c |   9 ++
  gcc/testsuite/gcc.target/powerpc/setnbcge.c |  12 +++
  gcc/testsuite/gcc.target/powerpc/setnbcgt.c |  10 ++
  gcc/testsuite/gcc.target/powerpc/setnbcle.c |  10 ++
  gcc/testsuite/gcc.target/powerpc/setnbclt.c |  12 +++
  gcc/testsuite/gcc.target/powerpc/setnbcne.c |   9 ++
  15 files changed, 263 insertions(+), 15 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setbc.h
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setbceq.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcge.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcgt.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcle.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setbclt.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcne.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbc.h
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbceq.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcge.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcgt.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcle.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbclt.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcne.c



[PATCH 0/4] rs6000: setbnc and friends [pu]

2020-05-06 Thread Bill Schmidt via Gcc-patches
*** BLURB HERE ***

Bill Schmidt (4):
  Add insns for setbc and setbcr
  Add tests for setbc and setbcr
  Add insns for setnbc and setnbcr
  Add tests for setnbc and setnbcr

 gcc/config/rs6000/rs6000.md | 100 +---
 gcc/testsuite/gcc.target/powerpc/setbc.h|  27 ++
 gcc/testsuite/gcc.target/powerpc/setbceq.c  |   9 ++
 gcc/testsuite/gcc.target/powerpc/setbcge.c  |  12 +++
 gcc/testsuite/gcc.target/powerpc/setbcgt.c  |  10 ++
 gcc/testsuite/gcc.target/powerpc/setbcle.c  |  10 ++
 gcc/testsuite/gcc.target/powerpc/setbclt.c  |  12 +++
 gcc/testsuite/gcc.target/powerpc/setbcne.c  |   9 ++
 gcc/testsuite/gcc.target/powerpc/setnbc.h   |  27 ++
 gcc/testsuite/gcc.target/powerpc/setnbceq.c |   9 ++
 gcc/testsuite/gcc.target/powerpc/setnbcge.c |  12 +++
 gcc/testsuite/gcc.target/powerpc/setnbcgt.c |  10 ++
 gcc/testsuite/gcc.target/powerpc/setnbcle.c |  10 ++
 gcc/testsuite/gcc.target/powerpc/setnbclt.c |  12 +++
 gcc/testsuite/gcc.target/powerpc/setnbcne.c |   9 ++
 15 files changed, 263 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbc.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbceq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcge.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcgt.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcle.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbclt.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcne.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbc.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbceq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcge.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcgt.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcle.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbclt.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcne.c

-- 
2.17.1



[PATCH 4/4] rs6000: Tests for setnbc

2020-05-06 Thread Bill Schmidt via Gcc-patches
2020-05-06  Segher Boessenkool  

* gcc.target/powerpc/setnbc.h: New.
* gcc.target/powerpc/setnbceq.c: New.
* gcc.target/powerpc/setnbcge.c: New.
* gcc.target/powerpc/setnbcgt.c: New.
* gcc.target/powerpc/setnbcle.c: New.
* gcc.target/powerpc/setnbclt.c: New.
* gcc.target/powerpc/setnbcne.c: New.
---
 gcc/testsuite/gcc.target/powerpc/setnbc.h   | 27 +
 gcc/testsuite/gcc.target/powerpc/setnbceq.c |  9 +++
 gcc/testsuite/gcc.target/powerpc/setnbcge.c | 12 +
 gcc/testsuite/gcc.target/powerpc/setnbcgt.c | 10 
 gcc/testsuite/gcc.target/powerpc/setnbcle.c | 10 
 gcc/testsuite/gcc.target/powerpc/setnbclt.c | 12 +
 gcc/testsuite/gcc.target/powerpc/setnbcne.c |  9 +++
 7 files changed, 89 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbc.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbceq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcge.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcgt.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcle.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbclt.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setnbcne.c

diff --git a/gcc/testsuite/gcc.target/powerpc/setnbc.h 
b/gcc/testsuite/gcc.target/powerpc/setnbc.h
new file mode 100644
index 000..d278d4a687f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setnbc.h
@@ -0,0 +1,27 @@
+#define XSTR(a,b) a ## b
+#define T(a,b) XSTR(a,b)
+
+int  T(NAME,ii)(int a, int b)   { return -(a CODE b); }
+int  T(NAME,il)(long a, long b) { return -(a CODE b); }
+long T(NAME,li)(int a, int b)   { return -(a CODE b); }
+long T(NAME,ll)(long a, long b) { return -(a CODE b); }
+
+int  T(NAME,iin0)(int a)  { return -(a CODE 0); }
+int  T(NAME,iln0)(long a) { return -(a CODE 0); }
+long T(NAME,lin0)(int a)  { return -(a CODE 0); }
+long T(NAME,lln0)(long a) { return -(a CODE 0); }
+
+int  T(NAME,iin1)(int a)  { return -(a CODE 1); }
+int  T(NAME,iln1)(long a) { return -(a CODE 1); }
+long T(NAME,lin1)(int a)  { return -(a CODE 1); }
+long T(NAME,lln1)(long a) { return -(a CODE 1); }
+
+int  T(NAME,iinm1)(int a)  { return -(a CODE -1); }
+int  T(NAME,ilnm1)(long a) { return -(a CODE -1); }
+long T(NAME,linm1)(int a)  { return -(a CODE -1); }
+long T(NAME,llnm1)(long a) { return -(a CODE -1); }
+
+int  T(NAME,iin42)(int a)  { return -(a CODE 42); }
+int  T(NAME,iln42)(long a) { return -(a CODE 42); }
+long T(NAME,lin42)(int a)  { return -(a CODE 42); }
+long T(NAME,lln42)(long a) { return -(a CODE 42); }
diff --git a/gcc/testsuite/gcc.target/powerpc/setnbceq.c 
b/gcc/testsuite/gcc.target/powerpc/setnbceq.c
new file mode 100644
index 000..ff4af8f71fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setnbceq.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME eq
+#define CODE ==
+
+#include "setnbc.h"
+
+/* { dg-final { scan-assembler-times {\msetnbc\M} 20 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/setnbcge.c 
b/gcc/testsuite/gcc.target/powerpc/setnbcge.c
new file mode 100644
index 000..68ee6bda0d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setnbcge.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME ge
+#define CODE >=
+
+#include "setnbc.h"
+
+/* "x >= 0" is done without setnbc.
+   The generic code sometimes transforms "x >= A" to "x > A-1"; we allow
+   either here.  */
+/* { dg-final { scan-assembler-times {\msetnbcr?\M} 16 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/setnbcgt.c 
b/gcc/testsuite/gcc.target/powerpc/setnbcgt.c
new file mode 100644
index 000..e0f51d49bd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setnbcgt.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME gt
+#define CODE >
+
+#include "setnbc.h"
+
+/* "x > -1" is done without setnbc.  */
+/* { dg-final { scan-assembler-times {\msetnbc\M} 16 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/setnbcle.c 
b/gcc/testsuite/gcc.target/powerpc/setnbcle.c
new file mode 100644
index 000..33a5da96964
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setnbcle.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME le
+#define CODE <=
+
+#include "setnbc.h"
+
+/* "x <= -1" is done without setnbc.  */
+/* { dg-final { scan-assembler-times {\msetnbcr\M} 16 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/setnbclt.c 
b/gcc/testsuite/gcc.target/powerpc/setnbclt.c
new file mode 100644
index 000..127d7bfece4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setnbclt.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME lt
+#define CODE <
+
+#include "setnbc.h"
+
+/* "x < 0" is done without setnbc.
+   The generic code sometimes transforms "x < A" to "x <= A-1"; 

[PATCH 2/4] rs6000: Tests for setbc

2020-05-06 Thread Bill Schmidt via Gcc-patches
2020-05-06  Segher Boessenkool  

* gcc.target/powerpc/setbc.h: New.
* gcc.target/powerpc/setbceq.c: New.
* gcc.target/powerpc/setbcge.c: New.
* gcc.target/powerpc/setbcgt.c: New.
* gcc.target/powerpc/setbcle.c: New.
* gcc.target/powerpc/setbclt.c: New.
* gcc.target/powerpc/setbcne.c: New.
---
 gcc/testsuite/gcc.target/powerpc/setbc.h   | 27 ++
 gcc/testsuite/gcc.target/powerpc/setbceq.c |  9 
 gcc/testsuite/gcc.target/powerpc/setbcge.c | 12 ++
 gcc/testsuite/gcc.target/powerpc/setbcgt.c | 10 
 gcc/testsuite/gcc.target/powerpc/setbcle.c | 10 
 gcc/testsuite/gcc.target/powerpc/setbclt.c | 12 ++
 gcc/testsuite/gcc.target/powerpc/setbcne.c |  9 
 7 files changed, 89 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbc.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbceq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcge.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcgt.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcle.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbclt.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/setbcne.c

diff --git a/gcc/testsuite/gcc.target/powerpc/setbc.h 
b/gcc/testsuite/gcc.target/powerpc/setbc.h
new file mode 100644
index 000..51334246eca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setbc.h
@@ -0,0 +1,27 @@
+#define XSTR(a,b) a ## b
+#define T(a,b) XSTR(a,b)
+
+int  T(NAME,ii)(int a, int b)   { return a CODE b; }
+int  T(NAME,il)(long a, long b) { return a CODE b; }
+long T(NAME,li)(int a, int b)   { return a CODE b; }
+long T(NAME,ll)(long a, long b) { return a CODE b; }
+
+int  T(NAME,iin0)(int a)  { return a CODE 0; }
+int  T(NAME,iln0)(long a) { return a CODE 0; }
+long T(NAME,lin0)(int a)  { return a CODE 0; }
+long T(NAME,lln0)(long a) { return a CODE 0; }
+
+int  T(NAME,iin1)(int a)  { return a CODE 1; }
+int  T(NAME,iln1)(long a) { return a CODE 1; }
+long T(NAME,lin1)(int a)  { return a CODE 1; }
+long T(NAME,lln1)(long a) { return a CODE 1; }
+
+int  T(NAME,iinm1)(int a)  { return a CODE -1; }
+int  T(NAME,ilnm1)(long a) { return a CODE -1; }
+long T(NAME,linm1)(int a)  { return a CODE -1; }
+long T(NAME,llnm1)(long a) { return a CODE -1; }
+
+int  T(NAME,iin42)(int a)  { return a CODE 42; }
+int  T(NAME,iln42)(long a) { return a CODE 42; }
+long T(NAME,lin42)(int a)  { return a CODE 42; }
+long T(NAME,lln42)(long a) { return a CODE 42; }
diff --git a/gcc/testsuite/gcc.target/powerpc/setbceq.c 
b/gcc/testsuite/gcc.target/powerpc/setbceq.c
new file mode 100644
index 000..ee3cbffa6f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setbceq.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME eq
+#define CODE ==
+
+#include "setbc.h"
+
+/* { dg-final { scan-assembler-times {\msetbc\M} 20 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/setbcge.c 
b/gcc/testsuite/gcc.target/powerpc/setbcge.c
new file mode 100644
index 000..06d58159768
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setbcge.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME ge
+#define CODE >=
+
+#include "setbc.h"
+
+/* "x >= 0" is done without setbc.
+   The generic code sometimes transforms "x >= A" to "x > A-1"; we allow
+   either here.  */
+/* { dg-final { scan-assembler-times {\msetbcr?\M} 16 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/setbcgt.c 
b/gcc/testsuite/gcc.target/powerpc/setbcgt.c
new file mode 100644
index 000..864ae3a7e44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setbcgt.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME gt
+#define CODE >
+
+#include "setbc.h"
+
+/* "x > -1" is done without setbc.  */
+/* { dg-final { scan-assembler-times {\msetbc\M} 16 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/setbcle.c 
b/gcc/testsuite/gcc.target/powerpc/setbcle.c
new file mode 100644
index 000..05df4075b1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setbcle.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME le
+#define CODE <=
+
+#include "setbc.h"
+
+/* "x <= -1" is done without setbc.  */
+/* { dg-final { scan-assembler-times {\msetbcr\M} 16 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/setbclt.c 
b/gcc/testsuite/gcc.target/powerpc/setbclt.c
new file mode 100644
index 000..52ffb1fd7e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/setbclt.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=future" } */
+
+#define NAME lt
+#define CODE <
+
+#include "setbc.h"
+
+/* "x < 0" is done without setbc.
+   The generic code sometimes transforms "x < A" to "x <= A-1"; we allow
+   either here.  */
+/* { dg-final { scan-assembler-times {\msetbcr?\M} 16 } } */
diff --git 

[PATCH 1/4] rs6000: New insns setbc and setbcr

2020-05-06 Thread Bill Schmidt via Gcc-patches
New instructions setbc and setbcr.  setbc sets a GPR to 1 if some
condition register bit is set, and 0 otherwise; setbcr does it the
other way around.

2020-05-06  Segher Boessenkool  

* config/rs6000/rs6000.md (setbc_signed_): New
define_insn.
(*setbcr_signed_): Likewise.
(cstore4): Use setbc[r] if available.
(2_isel): Avoid for TARGET_FUTURE.
(eq3): Use setbc for TARGET_FUTURE.
(*eq3): Avoid for TARGET_FUTURE.
(ne3): Replace :P with :GPR; use setbc for TARGET_FUTURE;
else for non-Pmode, use gen_eq and gen_xor.
(*ne3): Avoid for TARGET_FUTURE.
(*eqsi3_ext): Avoid for TARGET_FUTURE; fix missing && 1.
---
 gcc/config/rs6000/rs6000.md | 73 +++--
 1 file changed, 62 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6173994797c..e8dc576779a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5138,6 +5138,25 @@ (define_insn "*isel_reversed_signed_"
 }
   [(set_attr "type" "isel")])
 
+; Set Boolean Condition (Reverse)
+(define_insn "setbc_signed_"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+   (match_operator:GPR 1 "scc_comparison_operator"
+   [(match_operand:CCEITHER 2 "cc_reg_operand" "y")
+(const_int 0)]))]
+  "TARGET_FUTURE"
+  "setbc %0,%j1"
+  [(set_attr "type" "isel")])
+
+(define_insn "*setbcr_signed_"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+   (match_operator:GPR 1 "scc_rev_comparison_operator"
+   [(match_operand:CCEITHER 2 "cc_reg_operand" "y")
+(const_int 0)]))]
+  "TARGET_FUTURE"
+  "setbcr %0,%j1"
+  [(set_attr "type" "isel")])
+
 ;; Floating point conditional move
 (define_expand "movcc"
[(set (match_operand:SFDF 0 "gpc_reg_operand")
@@ -11425,6 +11444,10 @@ (define_expand "cstore4"
(clobber (match_operand:GPR 0 "gpc_reg_operand"))]
   ""
 {
+  /* Everything is best done with setbc[r] if available.  */
+  if (TARGET_FUTURE)
+rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
+
   /* Expanding EQ and NE directly to some machine instructions does not help
  but does hurt combine.  So don't.  */
   if (GET_CODE (operands[1]) == EQ)
@@ -11837,7 +11860,7 @@ (define_insn_and_split 
"2_isel"
(clobber (match_scratch:GPR 3 "=r"))
(clobber (match_scratch:GPR 4 "=r"))
(clobber (match_scratch: 5 "=y"))]
-  "TARGET_ISEL
+  "!TARGET_FUTURE && TARGET_ISEL
&& !( == EQ && operands[2] == const0_rtx)
&& !( == NE && operands[2] == const0_rtx
&& mode == Pmode && mode == Pmode)"
@@ -11917,6 +11940,16 @@ (define_expand "eq3"
  (clobber (match_scratch:GPR 4 "=r"))])]
   ""
 {
+  if (TARGET_FUTURE)
+{
+  rtx cc = gen_reg_rtx (CCmode);
+  rtx compare = gen_rtx_COMPARE (CCmode, operands[1], operands[2]);
+  emit_insn (gen_rtx_SET (cc, compare));
+  rtx eq = gen_rtx_fmt_ee (EQ, mode, cc, const0_rtx);
+  emit_insn (gen_setbc_signed_ (operands[0], eq, cc));
+  DONE;
+}
+
   if (TARGET_ISEL && operands[2] != const0_rtx)
 {
   emit_insn (gen_eq2_isel (operands[0], operands[1],
@@ -11931,7 +11964,7 @@ (define_insn_and_split "*eq3"
(match_operand:GPR 2 "scc_eq_operand" "")))
(clobber (match_scratch:GPR 3 "=r"))
(clobber (match_scratch:GPR 4 "=r"))]
-  "!(TARGET_ISEL && operands[2] != const0_rtx)"
+  "!TARGET_FUTURE && !(TARGET_ISEL && operands[2] != const0_rtx)"
   "#"
   "&& 1"
   [(set (match_dup 4)
@@ -11955,14 +11988,32 @@ (define_insn_and_split "*eq3"
 
 (define_expand "ne3"
   [(parallel [
- (set (match_operand:P 0 "gpc_reg_operand" "=r")
- (ne:P (match_operand:P 1 "gpc_reg_operand" "r")
-   (match_operand:P 2 "scc_eq_operand" "")))
- (clobber (match_scratch:P 3 "=r"))
- (clobber (match_scratch:P 4 "=r"))
- (clobber (reg:P CA_REGNO))])]
+ (set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+ (ne:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+   (match_operand:GPR 2 "scc_eq_operand" "")))
+ (clobber (match_scratch:GPR 3 "=r"))
+ (clobber (match_scratch:GPR 4 "=r"))
+ (clobber (reg:GPR CA_REGNO))])]
   ""
 {
+  if (TARGET_FUTURE)
+{
+  rtx cc = gen_reg_rtx (CCmode);
+  rtx compare = gen_rtx_COMPARE (CCmode, operands[1], operands[2]);
+  emit_insn (gen_rtx_SET (cc, compare));
+  rtx ne = gen_rtx_fmt_ee (NE, mode, cc, const0_rtx);
+  emit_insn (gen_setbc_signed_ (operands[0], ne, cc));
+  DONE;
+}
+
+  if (mode != Pmode)
+{
+  rtx x = gen_reg_rtx (mode);
+  emit_insn (gen_eq3 (x, operands[1], operands[2]));
+  emit_insn (gen_xor3 (operands[0], x, const1_rtx));
+  DONE;
+}
+
   if (TARGET_ISEL && operands[2] != const0_rtx)
 {
   emit_insn (gen_ne2_isel (operands[0], operands[1],
@@ -11978,7 +12029,7 @@ (define_insn_and_split "*ne3"

[PATCH 3/4] rs6000: New insns setnbc and setnbcr

2020-05-06 Thread Bill Schmidt via Gcc-patches
setnbc[r] is like setbc[r], but it writes -1 instead of 1 to the GPR.

2020-05-06  Segher Boessenkool  

* config/rs6000/rs6000.md (*setnbc_signed_): New
define_insn.
(*setnbcr_signed_): New define_insn.
(*neg_eq_): Avoid for TARGET_FUTURE; add missing && 1.
(*neg_ne_): Likewise.
---
 gcc/config/rs6000/rs6000.md | 27 +++
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index e8dc576779a..c02c2e1de72 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5157,6 +5157,25 @@ (define_insn "*setbcr_signed_"
   "setbcr %0,%j1"
   [(set_attr "type" "isel")])
 
+; Set Negative Boolean Condition (Reverse)
+(define_insn "*setnbc_signed_"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+   (neg:GPR (match_operator:GPR 1 "scc_comparison_operator"
+   [(match_operand:CCEITHER 2 "cc_reg_operand" "y")
+(const_int 0)])))]
+  "TARGET_FUTURE"
+  "setnbc %0,%j1"
+  [(set_attr "type" "isel")])
+
+(define_insn "*setnbcr_signed_"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+   (neg:GPR (match_operator:GPR 1 "scc_rev_comparison_operator"
+   [(match_operand:CCEITHER 2 "cc_reg_operand" "y")
+(const_int 0)])))]
+  "TARGET_FUTURE"
+  "setnbcr %0,%j1"
+  [(set_attr "type" "isel")])
+
 ;; Floating point conditional move
 (define_expand "movcc"
[(set (match_operand:SFDF 0 "gpc_reg_operand")
@@ -12062,9 +12081,9 @@ (define_insn_and_split "*neg_eq_"
(clobber (match_scratch:P 3 "=r"))
(clobber (match_scratch:P 4 "=r"))
(clobber (reg:P CA_REGNO))]
-  ""
+  "!TARGET_FUTURE"
   "#"
-  ""
+  "&& 1"
   [(parallel [(set (match_dup 4)
   (plus:P (match_dup 3)
   (const_int -1)))
@@ -12094,9 +12113,9 @@ (define_insn_and_split "*neg_ne_"
(clobber (match_scratch:P 3 "=r"))
(clobber (match_scratch:P 4 "=r"))
(clobber (reg:P CA_REGNO))]
-  ""
+  "!TARGET_FUTURE"
   "#"
-  ""
+  "&& 1"
   [(parallel [(set (match_dup 4)
   (neg:P (match_dup 3)))
  (set (reg:P CA_REGNO)
-- 
2.17.1



Re: [PATCH] rs6000, Fix header comment for intrinsic function

2020-04-29 Thread Bill Schmidt via Gcc-patches

On 4/22/20 1:20 PM, Carl Love wrote:

GCC maintainers:

The following is a trivial patch to fix a comment describing the
intrinsic function _mm_movemask_epi8.  The comment was expanded to
clarify the layout of the returned result.

The patch does not make any functional changes.

Please let me know if the patch is OK for mainline and backporting as
appropriate.

Thanks.

  Carl Love
---
rs6000, Fix header comment for intrinsic function _mm_movemask_epi8

gcc/ChangeLog

2020-04-22  Carl Love  

* config/rs6000/emmintrin.h (_mm_movemask_epi8): Fix comment for the
function.


Drop "for the function" as Will suggested.



Signed-off-by: Carl Love 
---
  gcc/config/rs6000/emmintrin.h | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
index 2462cf5bdac..0872a75c0de 100644
--- a/gcc/config/rs6000/emmintrin.h
+++ b/gcc/config/rs6000/emmintrin.h
@@ -2033,7 +2033,9 @@ _mm_min_epu8 (__m128i __A, __m128i __B)
  #ifdef _ARCH_PWR8
  /* Intrinsic functions that require PowerISA 2.07 minimum.  */

-/* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
+/* Creates a 16-bit mask from the most significant bits of the sixteen 8-bit
+   values.  The 16-bit result is placed in bits[48:63], bits [0:47] and
+   bits [64:127] are  set to zero.  */


The description of the emulated function is

"Create mask from the most significant bit of each 8-bit element in a, and store the 
result in dst." [*]

Therefore I suggest you change the comment to

"Return a mask created from the most significant bit of each 8-bit element in 
A."

OK to commit as obvious with these changes.

Thanks,
Bill

[*] 
https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ep=3864


  extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
  _mm_movemask_epi8 (__m128i __A)
  {


Re: [PATCH v2] c++, middle-end, rs6000: Fix C++17 ABI incompatibilities during class layout and [[no_unique_address]] handling [PR94707]

2020-04-28 Thread Bill Schmidt via Gcc-patches



On 4/28/20 10:42 AM, Jakub Jelinek wrote:

On Tue, Apr 28, 2020 at 10:16:24AM -0500, Bill Schmidt via Gcc-patches wrote:

I think this looks good.  My only comment would be to please add some
comments in the test cases about the purpose, or at least to explain
the regexes in the scan-assembler-* directives, to save us all some
mental cycles in the future.

Ok, below in the updated patch:


Thanks!  Perfect.

Bill



2020-04-28  Jakub Jelinek  

PR target/94707
* tree-core.h (tree_decl_common): Note decl_flag_0 used for
DECL_FIELD_ABI_IGNORED.
* tree.h (DECL_FIELD_ABI_IGNORED): Define.
* calls.h (cxx17_empty_base_field_p): Change into a temporary
macro, check DECL_FIELD_ABI_IGNORED flag with no "no_unique_address"
attribute.
* calls.c (cxx17_empty_base_field_p): Remove.
* tree-streamer-out.c (pack_ts_decl_common_value_fields): Handle
DECL_FIELD_ABI_IGNORED.
* tree-streamer-in.c (unpack_ts_decl_common_value_fields): Likewise.
* lto-streamer-out.c (hash_tree): Likewise.
* config/rs6000/rs6000-call.c (rs6000_aggregate_candidate): Rename
cxx17_empty_base_seen to empty_base_seen, change type to int *,
adjust recursive calls, use DECL_FIELD_ABI_IGNORED instead of
cxx17_empty_base_field_p, if "no_unique_address" attribute is
present, propagate that to the caller too.
(rs6000_discover_homogeneous_aggregate): Adjust
rs6000_aggregate_candidate caller, emit different diagnostics
when c++17 empty base fields are present and when empty
[[no_unique_address]] fields are present.
* config/rs6000/rs6000.c (rs6000_special_round_type_align,
darwin_rs6000_special_round_type_align): Skip DECL_FIELD_ABI_IGNORED
fields.

* class.c (build_base_field): Set DECL_FIELD_ABI_IGNORED on C++17 empty
base artificial FIELD_DECLs.
(layout_class_type): Set DECL_FIELD_ABI_IGNORED on empty class
field_poverlapping_p FIELD_DECLs.

* g++.target/powerpc/pr94707-1.C: New test.
* g++.target/powerpc/pr94707-2.C: New test.
* g++.target/powerpc/pr94707-3.C: New test.
* g++.target/powerpc/pr94707-4.C: New test.
* g++.target/powerpc/pr94707-5.C: New test.
* g++.target/powerpc/pr94707-4.C: New test.

--- gcc/tree-core.h.jj  2020-04-08 18:15:36.936946772 +0200
+++ gcc/tree-core.h 2020-04-28 15:14:06.598814022 +0200
@@ -1709,7 +1709,8 @@ struct GTY(()) tree_decl_common {
unsigned lang_flag_8 : 1;
  
/* In VAR_DECL and PARM_DECL, this is DECL_REGISTER

- IN TRANSLATION_UNIT_DECL, this is TRANSLATION_UNIT_WARN_EMPTY_P.  */
+ In TRANSLATION_UNIT_DECL, this is TRANSLATION_UNIT_WARN_EMPTY_P.
+ In FIELD_DECL, this is DECL_FIELD_ABI_IGNORED.  */
unsigned decl_flag_0 : 1;
/* In FIELD_DECL, this is DECL_BIT_FIELD
   In VAR_DECL and FUNCTION_DECL, this is DECL_EXTERNAL.
--- gcc/tree.h.jj   2020-04-08 18:15:36.939946727 +0200
+++ gcc/tree.h  2020-04-28 15:13:07.579695258 +0200
@@ -2750,6 +2750,13 @@ extern void decl_value_expr_insert (tree
  /* In a FIELD_DECL, indicates this field should be bit-packed.  */
  #define DECL_PACKED(NODE) (FIELD_DECL_CHECK (NODE)->base.u.bits.packed_flag)
  
+/* In a FIELD_DECL, indicates this field should be ignored for ABI decisions

+   like passing/returning containing struct by value.
+   Set for C++17 empty base artificial FIELD_DECLs as well as
+   empty [[no_unique_address]] non-static data members.  */
+#define DECL_FIELD_ABI_IGNORED(NODE) \
+  (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_0)
+
  /* Nonzero in a FIELD_DECL means it is a bit field, and must be accessed
 specially.  */
  #define DECL_BIT_FIELD(NODE) (FIELD_DECL_CHECK 
(NODE)->decl_common.decl_flag_1)
--- gcc/calls.h.jj  2020-04-27 14:31:09.123020831 +0200
+++ gcc/calls.h 2020-04-28 15:26:29.221724466 +0200
@@ -135,6 +135,9 @@ extern tree get_attr_nonstring_decl (tre
  extern void maybe_warn_nonstring_arg (tree, tree);
  extern bool get_size_range (tree, tree[2], bool = false);
  extern rtx rtx_for_static_chain (const_tree, bool);
-extern bool cxx17_empty_base_field_p (const_tree);
+/* FIXME: Remove after all backends are converted.  */
+#define cxx17_empty_base_field_p(t) \
+  (DECL_FIELD_ABI_IGNORED (t)  \
+   && !lookup_attribute ("no_unique_address", DECL_ATTRIBUTES (t)))
  
  #endif // GCC_CALLS_H

--- gcc/calls.c.jj  2020-04-27 14:31:09.117020922 +0200
+++ gcc/calls.c 2020-04-28 15:26:42.276529517 +0200
@@ -6261,23 +6261,5 @@ must_pass_va_arg_in_stack (tree type)
return targetm.calls.must_pass_in_stack (arg);
  }
  
-/* Return true if FIELD is the C++17 empty base field that should

-   be ignored for ABI calling convention decisions in order to
-   maintain ABI compatibility between C++14 and earlier, which doesn't
-   add this FIELD to classes 

Re: [PATCH] middle-end, rs6000: Handle empty [[no_unique_address]] fields like empty bases on powerpc64le-linux [PR94707]

2020-04-28 Thread Bill Schmidt via Gcc-patches

On 4/28/20 6:38 AM, Jakub Jelinek via Gcc-patches wrote:

Hi!

Ok, I've tried:
struct X { };
struct Y { int : 0; };
struct Z { int : 0; Y y; };
struct U : public X { X q; };
struct A { float a, b, c, d; };
struct B : public X { float a, b, c, d; };
struct C : public Y { float a, b, c, d; };
struct D : public Z { float a, b, c, d; };
struct E : public U { float a, b, c, d; };
struct F { [[no_unique_address]] X x; float a, b, c, d; };
struct G { [[no_unique_address]] Y y; float a, b, c, d; };
struct H { [[no_unique_address]] Z z; float a, b, c, d; };
struct I { [[no_unique_address]] U u; float a, b, c, d; };
struct J { float a, b; [[no_unique_address]] X x; float c, d; };
struct K { float a, b; [[no_unique_address]] Y y; float c, d; };
struct L { float a, b; [[no_unique_address]] Z z; float c, d; };
struct M { float a, b; [[no_unique_address]] U u; float c, d; };
#define T(S, s) extern S s; extern void foo##s (S); int bar##s () { foo##s (s); 
return 0; }
T (A, a)
T (B, b)
T (C, c)
T (D, d)
T (E, e)
T (F, f)
T (G, g)
T (H, h)
T (I, i)
T (J, j)
T (K, k)
T (L, l)
T (M, m)
testcase on powerpc64-linux.  Results:
G++ 9 -std=c++14A, B, C passed in fprs, the rest in gprs
G++ 9 -std=c++17A passed in fprs, the rest in gprs
current trunk -std=c++14 & 17   A, B, C passed in fprs, the rest in gprs
patched trunk -std=c++14 & 17   A, B, C, F, G, J, K passed in fprs, the 
rest in gprs
clang++ [*] -std=c++14 & 17 A, B, C, F, G, J, K passed in fprs, the rest in gprs
[*] clang version 11.0.0 (g...@github.com:llvm/llvm-project.git 
5c352e69e76a26e4eda075e20aa6a9bb7686042c)

Is that what we want?  I think it matches the stated intent of P0840R2 or
what Jason/Jonathan said, and doing something different like e.g. not
treating C, G and K as homogenous because of the int : 0 in empty bases
or in zero sized [[no_unique_address] fields would be quite hard to
implement (because for C++14 the FIELD_DECL just isn't there).


Without commenting on the patch itself, I agree that this is what we want.
Thank you for the thorough testing!

Same comment as the other patch about test case comments.

Bill



I've included the above testcase as g++.target/powerpc/ testcases.

2020-04-28  Jakub Jelinek  

PR target/94707
* calls.h (no_unique_address_empty_field_p): Declare.
* calls.c (no_unique_address_empty_field_p): New function.
* rs6000-call.c (rs6000_aggregate_candidate): Rename
cxx17_empty_base_seen to empty_base_seen, change type to int *,
adjust recursive calls, ignore also no_unique_address_empty_field_p
fields and propagate that fact to caller.
(rs6000_discover_homogeneous_aggregate): Adjust
rs6000_aggregate_candidate caller, emit different diagnostics
when c++17 empty base fields are present and when empty
[[no_unique_address]] fields are present.

* g++.target/powerpc/pr94707-1.C: New test.
* g++.target/powerpc/pr94707-2.C: New test.
* g++.target/powerpc/pr94707-3.C: New test.
* g++.target/powerpc/pr94707-4.C: New test.

--- gcc/calls.h.jj  2020-04-27 14:31:09.123020831 +0200
+++ gcc/calls.h 2020-04-28 12:38:35.292851412 +0200
@@ -136,5 +136,6 @@ extern void maybe_warn_nonstring_arg (tr
  extern bool get_size_range (tree, tree[2], bool = false);
  extern rtx rtx_for_static_chain (const_tree, bool);
  extern bool cxx17_empty_base_field_p (const_tree);
+extern bool no_unique_address_empty_field_p (const_tree);
  
  #endif // GCC_CALLS_H

--- gcc/calls.c.jj  2020-04-27 14:31:09.117020922 +0200
+++ gcc/calls.c 2020-04-28 12:39:11.936308866 +0200
@@ -6279,5 +6279,24 @@ cxx17_empty_base_field_p (const_tree fie
  && !integer_zerop (TYPE_SIZE (TREE_TYPE (field;
  }
  
+/* Return true if FIELD is a non-static data member with empty

+   type and [[no_unique_address]] attribute that should be
+   ignored for ABI calling convention decisions, in order to make
+   struct S {};
+   struct T : S { float x; };
+   and
+   struct T2 : { [[no_unique_address]] S s; float x; };
+   ABI compatible.  */
+
+bool
+no_unique_address_empty_field_p (const_tree field)
+{
+  return (TREE_CODE (field) == FIELD_DECL
+ && RECORD_OR_UNION_TYPE_P (TREE_TYPE (field))
+ && DECL_SIZE (field)
+ && integer_zerop (DECL_SIZE (field))
+ && lookup_attribute ("no_unique_address", DECL_ATTRIBUTES (field)));
+}
+
  /* Tell the garbage collector about GTY markers in this source file.  */
  #include "gt-calls.h"
--- gcc/config/rs6000/rs6000-call.c.jj  2020-04-23 14:42:26.323839084 +0200
+++ gcc/config/rs6000/rs6000-call.c 2020-04-28 12:43:28.277513460 +0200
@@ -5529,7 +5529,7 @@ const struct altivec_builtin_types altiv
  
  static int

  rs6000_aggregate_candidate (const_tree type, machine_mode *modep,
-   bool *cxx17_empty_base_seen)
+   int *empty_base_seen)
  {
machine_mode mode;
HOST_WIDE_INT size;
@@ 

Re: [PATCH v2] c++, middle-end, rs6000: Fix C++17 ABI incompatibilities during class layout and [[no_unique_address]] handling [PR94707]

2020-04-28 Thread Bill Schmidt via Gcc-patches

Jakub, thanks for continuing to track down and fix all these cases.

I think this looks good.  My only comment would be to please add some
comments in the test cases about the purpose, or at least to explain
the regexes in the scan-assembler-* directives, to save us all some
mental cycles in the future.

Need Segher/David to approve the rs6000 bits, of course.

Thanks!
Bill


On 4/28/20 9:04 AM, Jakub Jelinek via Gcc-patches wrote:

Hi!

On Tue, Apr 28, 2020 at 08:53:31AM -0400, Jason Merrill wrote:

That sounds good.

So like this?  Or better name for the new macro?

The calls.h macro is there only after all the backends are converted
to use ABI_IGNORED_FIELD_P.

Not sure if I shouldn't
if (lookup_attribute ("no_unique_address", DECL_ATTRIBUTES (field))
ABI_IGNORED_FIELD_P (field) = 1;
in end_of_class, as there seems to be some ObjC++ partially overlapping
case too and probably we don't want to change ABI for that.

This patch is a merge of the powerpc64le-linux patch for
[[no_unique_address]] and the powerpc*-{darwin,aix}* patch with the
langhook etc. removed for ABI_IGNORED_FIELD_P.

Untested so far.

2020-04-28  Jakub Jelinek  

PR target/94707
* tree-core.h (tree_decl_common): Note decl_flag_0 used for
ABI_IGNORED_FIELD_P.
* tree.h (ABI_IGNORED_FIELD_P): Define.
* calls.h (cxx17_empty_base_field_p): Change into a temporary
macro, check ABI_IGNORED_FIELD_P flag with no "no_unique_address"
attribute.
* calls.c (cxx17_empty_base_field_p): Remove.
* tree-streamer-out.c (pack_ts_decl_common_value_fields): Handle
ABI_IGNORED_FIELD_P.
* tree-streamer-in.c (unpack_ts_decl_common_value_fields): Likewise.
* lto-streamer-out.c (hash_tree): Likewise.
* config/rs6000/rs6000-call.c (rs6000_aggregate_candidate): Rename
cxx17_empty_base_seen to empty_base_seen, change type to int *,
adjust recursive calls, use ABI_IGNORED_FIELD_P instead of
cxx17_empty_base_field_p, if "no_unique_address" attribute is
present, propagate that to the caller too.
(rs6000_discover_homogeneous_aggregate): Adjust
rs6000_aggregate_candidate caller, emit different diagnostics
when c++17 empty base fields are present and when empty
[[no_unique_address]] fields are present.
* config/rs6000/rs6000.c (rs6000_special_round_type_align,
darwin_rs6000_special_round_type_align): Skip ABI_IGNORED_FIELD_P
fields.

* class.c (build_base_field): Set ABI_IGNORED_FIELD_P on C++17 empty
base artificial FIELD_DECLs.
(end_of_class): Set ABI_IGNORED_FIELD_P on empty class
field_poverlapping_p FIELD_DECLs.

* g++.target/powerpc/pr94707-1.C: New test.
* g++.target/powerpc/pr94707-2.C: New test.
* g++.target/powerpc/pr94707-3.C: New test.
* g++.target/powerpc/pr94707-4.C: New test.
* g++.target/powerpc/pr94707-5.C: New test.
* g++.target/powerpc/pr94707-4.C: New test.

--- gcc/tree-core.h.jj  2020-04-08 18:15:36.936946772 +0200
+++ gcc/tree-core.h 2020-04-28 15:14:06.598814022 +0200
@@ -1709,7 +1709,8 @@ struct GTY(()) tree_decl_common {
unsigned lang_flag_8 : 1;
  
/* In VAR_DECL and PARM_DECL, this is DECL_REGISTER

- IN TRANSLATION_UNIT_DECL, this is TRANSLATION_UNIT_WARN_EMPTY_P.  */
+ In TRANSLATION_UNIT_DECL, this is TRANSLATION_UNIT_WARN_EMPTY_P.
+ In FIELD_DECL, this is ABI_IGNORED_FIELD_P.  */
unsigned decl_flag_0 : 1;
/* In FIELD_DECL, this is DECL_BIT_FIELD
   In VAR_DECL and FUNCTION_DECL, this is DECL_EXTERNAL.
--- gcc/tree.h.jj   2020-04-08 18:15:36.939946727 +0200
+++ gcc/tree.h  2020-04-28 15:13:07.579695258 +0200
@@ -2750,6 +2750,13 @@ extern void decl_value_expr_insert (tree
  /* In a FIELD_DECL, indicates this field should be bit-packed.  */
  #define DECL_PACKED(NODE) (FIELD_DECL_CHECK (NODE)->base.u.bits.packed_flag)
  
+/* In a FIELD_DECL, indicates this field should be ignored for ABI decisions

+   like passing/returning containing struct by value.
+   Set for C++17 empty base artificial FIELD_DECLs as well as
+   empty [[no_unique_address]] non-static data members.  */
+#define ABI_IGNORED_FIELD_P(NODE) \
+  (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_0)
+
  /* Nonzero in a FIELD_DECL means it is a bit field, and must be accessed
 specially.  */
  #define DECL_BIT_FIELD(NODE) (FIELD_DECL_CHECK 
(NODE)->decl_common.decl_flag_1)
--- gcc/calls.h.jj  2020-04-27 14:31:09.123020831 +0200
+++ gcc/calls.h 2020-04-28 15:26:29.221724466 +0200
@@ -135,6 +135,9 @@ extern tree get_attr_nonstring_decl (tre
  extern void maybe_warn_nonstring_arg (tree, tree);
  extern bool get_size_range (tree, tree[2], bool = false);
  extern rtx rtx_for_static_chain (const_tree, bool);
-extern bool cxx17_empty_base_field_p (const_tree);
+/* FIXME: Remove after all backends are converted.  */
+#define cxx17_empty_base_field_p(t) \
+  

[PATCH] rs6000: Replace outdated link to ELFv2 ABI

2020-04-23 Thread Bill Schmidt via Gcc-patches
A user reported that we are still referring to a public review
draft of the ELFv2 ABI specification.  Replace that by a permalink.

Tested with "make pdf" and verified the link is hot.  Is this okay
for master?

Thanks,
Bill

2020-04-24  Bill Schmidt  

* gcc/doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions):
Replace outdated link to ELFv2 ABI.
---
 gcc/doc/extend.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index c17b1040bde..936c22e2fe7 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -17633,7 +17633,7 @@ subject to change without notice.
 
 GCC complies with the OpenPOWER 64-Bit ELF V2 ABI Specification,
 which may be found at
-@uref{http://openpowerfoundation.org/wp-content/uploads/resources/leabi-prd/content/index.html}.
+@uref{https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture}.
 Appendix A of this document lists the vector API interfaces that must be
 provided by compliant compilers.  Programmers should preferentially use
 the interfaces described therein.  However, historically GCC has provided
-- 
2.17.1



Re: [PATCH] rs6000: Fix C++14 vs. C++17 ABI bug on powerpc64le [PR94707]

2020-04-22 Thread Bill Schmidt via Gcc-patches

On 4/22/20 11:49 AM, Jakub Jelinek wrote:

On Wed, Apr 22, 2020 at 11:24:09AM -0500, Bill Schmidt wrote:

Hm, but this patch violates the ELFv2 ABI as written.  The ABI includes:

"Floating-point and vector aggregates that contain padding words and
integer fields with a width of 0 should not be treated as homogeneous
aggregates."

struct S {};
struct T : public S { float x, y, z; };
doesn't really contain any padding words nor integer fields with a width of
0, it is laid out as x at offset 0, y at offset sizeof (float) and
z at offset sizeof (float) * 2, the struct has sizeof (T) == sizeof (float) * 3.
The ELFv2 ABI certainly can't talk about how GCC implements them and what
TYPE_FIELDS chain elts should be honored and what should be ignored,
it must talk about what is in the source.
struct S {};
struct U { struct S s; float x, y, z; };
in C is considered homogeneous and is laid out exactly the same, and so is
T in C++14, just in C++17 as an implementation detail the C++ FE adds a base
field.


OK, on reflection I'll accept that.  Thanks for the explanation.

Bill


Short description + the patch that introduced it is
https://gcc.gnu.org/legacy-ml/gcc-patches/2016-10/msg01213.html

Jakub



Re: [PATCH] rs6000: Fix C++14 vs. C++17 ABI bug on powerpc64le [PR94707]

2020-04-22 Thread Bill Schmidt via Gcc-patches

On 4/22/20 8:11 AM, Jakub Jelinek via Gcc-patches wrote:

Hi!

As mentioned in the PR and on IRC, the recently added struct-layout-1.exp
new tests FAIL on powerpc64le-linux (among other targets).
FAIL: tmpdir-g++.dg-struct-layout-1/t032 cp_compat_x_tst.o-cp_compat_y_tst.o 
execute
FAIL: tmpdir-g++.dg-struct-layout-1/t058 cp_compat_x_tst.o-cp_compat_y_tst.o 
execute
FAIL: tmpdir-g++.dg-struct-layout-1/t059 cp_compat_x_tst.o-cp_compat_y_tst.o 
execute
in particular.  The problem is that the presence or absence of the C++17
artificial empty base fields, which have non-zero TYPE_SIZE, but zero
DECL_SIZE, change the ABI decisions, if it is present (-std=c++17), the type
might not be considered homogeneous, while if it is absent (-std=c++14), it
can be.

The following patch fixes that and emits a -Wpsabi inform; perhaps more
often than it could, because the fact that rs6000_discover_homogeneous_aggregate
returns true when it didn't in in GCC 7/8/9 with -std=c++17 doesn't still
mean it will make a different ABI decision, but the warning triggered only
on the test I've changed (the struct-layout-1.exp tests use -w -Wno-psabi
already).


Hm, but this patch violates the ELFv2 ABI as written.  The ABI includes:

"Floating-point and vector aggregates that contain padding words and
integer fields with a width of 0 should not be treated as homogeneous
aggregates."

So if this patch is accepted, it requires an exception in the ABI
document specifically for C++17 empty base fields.  Are these base fields
required by the C++17 specification?  We can't change the ABI just
based on a single implementation if it is not required.  If it is
required, I don't immediately foresee a problem with updating the ABI.

Thanks,
Bill



Bootstrapped/regtested on powerpc64le-linux, bootstrapped on powerpc64-linux
where regtest is still pending, but homogeneous aggregates are an ELFv2
thing, so I don't expect it to change anything (and so far no such messages
appear in the testsuite log files).

Ok for trunk?

2020-04-22  Jakub Jelinek  

PR target/94707
* config/rs6000/rs6000-call.c (rs6000_aggregate_candidate): Add
CXX17_EMPTY_BASE_SEEN argument.  Pass it to recursive calls.
Ignore cxx17_empty_base_field_p fields after setting
*CXX17_EMPTY_BASE_SEEN to true.
(rs6000_discover_homogeneous_aggregate): Adjust
rs6000_aggregate_candidate caller.  With -Wpsabi, diagnose homogeneous
aggregates with C++17 empty base fields.

* g++.dg/tree-ssa/pr27830.C: Use -Wpsabi -w for -std=c++17 and higher.

--- gcc/config/rs6000/rs6000-call.c.jj  2020-03-30 22:53:40.746640328 +0200
+++ gcc/config/rs6000/rs6000-call.c 2020-04-22 13:05:07.947809888 +0200
@@ -5528,7 +5528,8 @@ const struct altivec_builtin_types altiv
 sub-tree.  */
  
  static int

-rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
+rs6000_aggregate_candidate (const_tree type, machine_mode *modep,
+   bool *cxx17_empty_base_seen)
  {
machine_mode mode;
HOST_WIDE_INT size;
@@ -5598,7 +5599,8 @@ rs6000_aggregate_candidate (const_tree t
|| TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
  return -1;
  
-	count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);

+   count = rs6000_aggregate_candidate (TREE_TYPE (type), modep,
+   cxx17_empty_base_seen);
if (count == -1
|| !index
|| !TYPE_MAX_VALUE (index)
@@ -5636,7 +5638,14 @@ rs6000_aggregate_candidate (const_tree t
if (TREE_CODE (field) != FIELD_DECL)
  continue;
  
-	sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);

+   if (cxx17_empty_base_field_p (field))
+ {
+   *cxx17_empty_base_seen = true;
+   continue;
+ }
+
+   sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep,
+   cxx17_empty_base_seen);
if (sub_count < 0)
  return -1;
count += sub_count;
@@ -5669,7 +5678,8 @@ rs6000_aggregate_candidate (const_tree t
if (TREE_CODE (field) != FIELD_DECL)
  continue;
  
-	sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);

+   sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep,
+   cxx17_empty_base_seen);
if (sub_count < 0)
  return -1;
count = count > sub_count ? count : sub_count;
@@ -5710,7 +5720,9 @@ rs6000_discover_homogeneous_aggregate (m
&& AGGREGATE_TYPE_P (type))
  {
machine_mode field_mode = VOIDmode;
-  int field_count = rs6000_aggregate_candidate (type, _mode);
+  bool cxx17_empty_base_seen = false;
+  int field_count = rs6000_aggregate_candidate (type, _mode,
+   _empty_base_seen);
  
   

Re: [RFC] split pseudos during loop unrolling in RTL unroller

2020-04-17 Thread Bill Schmidt via Gcc-patches

On 4/17/20 1:53 AM, Richard Biener wrote:

Yeah well, but RTL is not in SSA form and there's no RTL IL verification
in place to track degradation.  And we even work in the opposite way
when expanding to RTL from SSA, coalescing as much as we can ...



Which is itself problematic, introducing unnecessary antidependences at 
the start of RTL.  We've seen performance issues with this on several 
occasions.


Bill




Re: [PATCH,rs6000] Add command line and builtin compatibility

2020-03-11 Thread Bill Schmidt via Gcc-patches



On 3/11/20 2:00 PM, Carl Love wrote:

GCC maintianers:

The following patch add a check to make sure the user did not specify
-mno_fprnd with the builtins  __builtin_vsx_xsrdpim and
__builtin_vsx_xsrdpip.  These builtins are incompatible with the
-mno_fprnd command line.  The check prevents GCC crashing under these
conditions.

Manually tested the patch on

   powerpc64le-unknown-linux-gnu (Power 8 LE)
   powerpc64le-unknown-linux-gnu (Power 9 LE)

as follows:

gcc -mno-fprnd -g -c vsx-builtin-3.c
vsx-builtin-3.c: In function ‘do_math’:
vsx-builtin-3.c:145:3: error: __builtin_vsx_xsrdpim is incompatible
with mno-fprnd option
  145 |   z[i][0] = __builtin_vsx_xsrdpim (z[i][1]); i++;
  |   ^
vsx-builtin-3.c:146:3: error: __builtin_vsx_xsrdpip is incompatible
with mno-fprnd option
  146 |   z[i][0] = __builtin_vsx_xsrdpip (z[i][1]); i++;
  |   ^

I read thru the source code looking for other builtins with the same
issue.  I also created a script to compile all of the tests in
gcc/testsuite/gcc.target/powerpc with the -mno-fprnd option to check
for additional builtins that are incompatible with the -mno-fprnd
option.  These were the only two builtins that were identified as being
incompatible with the -mno-fprnd option.

Please let me know if the patch looks OK for mainline.  Thanks.

  Carl Love

---
rs6000: Add command line and builtin compatibility check

PR/target 87583

gcc/ChangeLog

2020-03-10  Carl Love  

* gcc/config/rs6000/rs6000-c.c
(altivec_resolve_overloaded_builtin):
Add check for TARGET_FRND and VSX_BUILTIN_XSRDPIM,
VSX_BUILTIN_XSRDPIP
compatibility.
---
  gcc/config/rs6000/rs6000-c.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-
c.c
index 8c1fbbf..6820782 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -915,6 +915,14 @@ altivec_resolve_overloaded_builtin (location_t
loc, tree fndecl,
const struct altivec_builtin_types *desc;
unsigned int n;

+  /* Check builtin for command line argument conflicts.  */
+  if (!TARGET_FPRND &&
+  (fcode == VSX_BUILTIN_XSRDPIM || fcode == VSX_BUILTIN_XSRDPIP))
{
+  error ("%s is incompatible with mno-fprnd option",



I believe you need %qs here.  Also replace mno-fprnd with %qs and put 
"-mno-fprnd" as the associated parameter.


Example from nearby code:   error ("%qs requires %qs", "-mdirect-move", 
"-mvsx");


Thanks,
Bill


+IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+  return error_mark_node;
+  }
+
if (!rs6000_overloaded_builtin_p (fcode))
  return NULL_TREE;



[PATCH] rs6000: Fix -mpower9-vector -mno-altivec ICE (PR87560)

2020-03-02 Thread Bill Schmidt
PR87560 reports an ICE when a test case is compiled with -mpower9-vector
and -mno-altivec.  This patch terminates compilation with an error when
this combination (and other unreasonable ones) are requested.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Reported error is now:

f951: Error: '-mno-altivec' turns off '-mpower9-vector'

Is this okay for master, and for backport to releases/gcc-9 after the
9.3 release?  There's no urgency in getting this in 9.3.

Thanks,
Bill

2020-03-02  Bill Schmidt  

* rs6000-cpus.def (OTHER_ALTIVEC_MASKS): New #define.
* rs6000.c (rs6000_disable_incompatible_switches): Add table entry
for OPTION_MASK_ALTIVEC.
---
 gcc/config/rs6000/rs6000-cpus.def | 4 
 gcc/config/rs6000/rs6000.c| 1 +
 2 files changed, 5 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 193d77eb954..ff1db6019de 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -101,6 +101,10 @@
 | OPTION_MASK_FLOAT128_KEYWORD \
 | OPTION_MASK_P8_VECTOR)
 
+/* Flags that need to be turned off if -mno-altivec.  */
+#define OTHER_ALTIVEC_MASKS(OTHER_VSX_VECTOR_MASKS \
+| OPTION_MASK_VSX)
+
 #define POWERPC_7400_MASK  (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
 
 /* Deal with ports that do not have -mstrict-align.  */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9910b27ed24..ecbf7ae0c59 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -23632,6 +23632,7 @@ rs6000_disable_incompatible_switches (void)
 { OPTION_MASK_P9_VECTOR,   OTHER_P9_VECTOR_MASKS,  "power9-vector" },
 { OPTION_MASK_P8_VECTOR,   OTHER_P8_VECTOR_MASKS,  "power8-vector" },
 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx"   },
+{ OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS,"altivec"   },
   };
 
   for (i = 0; i < ARRAY_SIZE (flags); i++)
-- 
2.17.1



Re: GLIBC libmvec status

2020-03-02 Thread Bill Schmidt

On 3/2/20 11:10 AM, Tulio Magno Quites Machado Filho wrote:

Bill Schmidt  writes:


One tiny nit on the document:  For the "b"  value, let's just say "VSX" 
rather than
"VSX as defined in PowerISA v2.07)."  We will plan to only change  values 
in case
a different vector length is defined in future.

That change would have more implications: all libmvec functions would have to
work on Power ISA v2.06 HW too.  But half of the functions do use v2.07
instructions now.


Ah, I see.  Well, then language such as "VSX defined at least at the level of
PowerISA v2.07" would be appropriate.  We want to define a minimum subset 
without
further implied constraint.  (Higher levels can be handled with ifunc without
needing to reference this in the ABI, as previously discussed.)

Thanks,
Bill



Re: GLIBC libmvec status

2020-03-02 Thread Bill Schmidt

In 2/28/20 10:31 AM, Jakub Jelinek wrote:

On Fri, Feb 28, 2020 at 04:23:03PM +, GT wrote:

Do we want to change the name and title of the document since Segher doesn't 
believe it
is an ABI. My initial suggestion: "POWER Architecture Specification of Scalar 
Function
to Vector Function Mapping".

It is an ABI, similarly like e.g. the C++ Itanium ABI is an ABI, it specifies
mangling of certain functions and how the function argument types and return
types are transformed.


Agreed, let's leave that as is.

One tiny nit on the document:  For the "b"  value, let's just say "VSX" 
rather than
"VSX as defined in PowerISA v2.07)."  We will plan to only change  values 
in case
a different vector length is defined in future.

Looks good otherwise!

Thanks,
Bill



Jakub



Re: GLIBC libmvec status

2020-02-27 Thread Bill Schmidt

On 2/27/20 2:21 PM, Bill Schmidt wrote:



On 2/27/20 12:48 PM, GT wrote:


Done.

The updated document is at:
https://sourceware.org/glibc/wiki/HomePage?action=AttachFile=view=powerarchvectfuncabi.html


Looks good.  Can you please also remove the 'c' ABI from the mangling, as 
earlier agreed?

Thanks!
Bill



Re: GLIBC libmvec status

2020-02-27 Thread Bill Schmidt



On 2/27/20 12:48 PM, GT wrote:

‐‐‐ Original Message ‐‐‐
On Thursday, February 27, 2020 9:26 AM, Bill Schmidt  
wrote:


Upon reflection, I agree.  Bert, we need to make changes to the document to
reflect this:

(1) "Calling convention" should refer to ELFv1 for powerpc64 and ELFv2 for
powerpc64le.

Done. Have provided names and links to respective ABI documents but no longer
explicitly refer to ELF version.


(2) "Vector Length" should remove bullet 3, strike the word
"nonhomogeneous" in bullet 4, and strike the parenthetical clause in
bullet 4.
(3) "Ordering of Vector Arguments" should remove the example involving
homogeneous aggregates.


Done.


It also occurs to me that for bullets 4 and 5 in "Vector Length", the
CDT should be long long, not int, since we pass aggregates in pieces in
64-bit registers and/or chunks of memory.


That determination of Vector Length is common for all architectures and is
implemented in function simd_clone_compute_base_data_type. If we do really
need PPC64 to be different, we'll have to allow the function to be replaced
by architecture-specific versions. Before we do that, do you have
an example of code which ends up with incorrect vectorization with the
existing CDT of int?


No, and I'll withdraw the suggestion.  It seems rather arbitrary in any event.

Thanks for the updates!

Bill




Other small bugs:
  - Bullet 4 says "the CDT determine by a) or b) above", but the referents
should be "(1) or (2)" instead.
  - First line of "Compiler generated variants of vector functions" has
a typo ("umasked").


Done.

The updated document is at:
https://sourceware.org/glibc/wiki/HomePage?action=AttachFile=view=powerarchvectfuncabi.html


Re: GLIBC libmvec status

2020-02-27 Thread Bill Schmidt



On 2/27/20 9:30 AM, Jakub Jelinek wrote:

On Thu, Feb 27, 2020 at 09:19:25AM -0600, Bill Schmidt wrote:

On 2/27/20 8:52 AM, Jakub Jelinek wrote:

On Thu, Feb 27, 2020 at 08:47:19AM -0600, Bill Schmidt wrote:

But is this actually a good idea?  It seems to me this will generate lousy
code in the absence of hardware support.  Won't we be better off warning and
ignoring the directive, leaving the code in scalar form?

Depends on the exact code, I think sometimes it will be just fine and will
allow vectorizing something that really couldn't be otherwise.
Isn't it better to leave it for the user to decide?
They can always ask for it not to be generated (add notinbranch) if it isn't
worthwhile.

You need a high ratio of unguarded code to guarded code in order to pay for all
those vector extract and reconstruct operations.  Sure, some code will be fine,
but a lot of code will be lousy.  This will be particularly true on older
hardware with a less exhaustive set of vector operations.

Why?  E.g. for integral code other than division or memory loads/stores where
nothing will really trap, you can just perform it unguarded.
Just use whatever the vectorizer does right now for conditional code, and if
that isn't as efficient as it could be given a particular HW/ISA, try to improve
it?


If that's how the vectorizer is working today, then my concerns are certainly
lessened.  It's been a while since I've seen how the vectorizer and 
if-conversion
interact, so my perspective is probably outdated.  We'll take a look at it.

Thanks for the discussion!

Bill



I really don't see how is it different say from SSE2 on x86 or even AVX.

Jakub



Re: GLIBC libmvec status

2020-02-27 Thread Bill Schmidt

On 2/27/20 8:52 AM, Jakub Jelinek wrote:

On Thu, Feb 27, 2020 at 08:47:19AM -0600, Bill Schmidt wrote:

But is this actually a good idea?  It seems to me this will generate lousy
code in the absence of hardware support.  Won't we be better off warning and
ignoring the directive, leaving the code in scalar form?

Depends on the exact code, I think sometimes it will be just fine and will
allow vectorizing something that really couldn't be otherwise.
Isn't it better to leave it for the user to decide?
They can always ask for it not to be generated (add notinbranch) if it isn't
worthwhile.


You need a high ratio of unguarded code to guarded code in order to pay for all
those vector extract and reconstruct operations.  Sure, some code will be fine,
but a lot of code will be lousy.  This will be particularly true on older
hardware with a less exhaustive set of vector operations.

In the lousy-code case, my concern is that the user won't be savvy enough to
understand they should add notinbranch.  They'll just notice that their code
runs badly on Power and either complain (good, then we can explain it) or
abandon porting existing code to Power (very bad, and we may never know).
I don't like the downside, and the upside is quite unpredictable.

Bill



Jakub



Re: GLIBC libmvec status

2020-02-27 Thread Bill Schmidt

On 2/26/20 8:31 AM, Jakub Jelinek wrote:

On Wed, Feb 26, 2020 at 07:55:53AM -0600, Bill Schmidt wrote:

The hope is that we can create a vectorized version that returns values
in registers rather than the by-ref parameters, and add code to GCC to
copy things around correctly following the call.  Ideally the signature of
the vectorized version would be sth like

   struct retval {vector double, vector double};
   retval vecsincos (vector double);

In the typical case where calls to sincos are of the form

   sincos (val[i], [i], [i]);

this would allow us to only store the values in the caller upon return,
rather than store them in the callee and potentially reload them
immediately in the caller.  On some Power CPUs, the latter behavior can
result in somewhat costly stalls if the consecutive accesses hit a timing
window.

But can't you do
#pragma omp declare simd linear(sinp, cosp)
void sincos (double x, double *sinp, double *cosp);
?
That is something the vectorizer code could handle and for
   for (int i = 0; i < 1024; i++)
 sincos (val[i], [i], [i]);
just vectorize it as
   for (int i = 0; i < 1024; i += vf)
 _ZGVbN8vl8l8_sincos (*(vector double *)[i], [i], [i]);
Anything else will need specialized code to handle sincos specially in the
vectorizer.


After reading all the discussion on this thread, yes, I agree for now.
It will be good for everybody if we can get the vectorized cexpi sorted
out at some point, which will give us a superior interface.


If you feel it isn't possible to do this, then we can abandon it.  Right
now my understanding is that GCC doesn't vectorize calls to sincos yet
for any targets, so it would be moot except that we really should define
what happens for the future.

This calling convention would also be useful in the future for vectorizing
functions that return complex values either by value or by reference.

Only by value, you really don't know what the code does if something is
passed by reference, whether it is read, written into, or both etc.
And for _Complex {float,double}, e.g. the Intel ABI already specifies how to
pass them, just GCC isn't able to do that right now.


Per the fork of the thread with Segher, I've cried uncle on the specifics
of the calling convention. :)




Well, as a matter of practicality, we don't have any of that implemented
in the rs6000 back end, and we don't have any free resources to do that
in GCC 11.  Is there any documentation about what needs to be done to
support this?  I've always been under the impression that vectorizing for
masking when there isn't any hardware support is a losing proposition, so
we've not investigated it.

You don't need to do pretty much anything, except set
clonei->mask_mode = VOIDmode, I think the generic code should handle that
everything beyond that, in particular add the mask argument and use it
both on the caller side and on the expansion of the to be vectorized clone.


But is this actually a good idea?  It seems to me this will generate lousy
code in the absence of hardware support.  Won't we be better off warning and
ignoring the directive, leaving the code in scalar form?

If and when we have hardware support for vector masking, I'll be happy to
remove this restriction, but I need more convincing to do it now.

Thanks,
Bill



Jakub



Re: GLIBC libmvec status

2020-02-27 Thread Bill Schmidt



On 2/27/20 4:52 AM, Segher Boessenkool wrote:

On Tue, Feb 25, 2020 at 07:43:09PM -0600, Bill Schmidt wrote:

The reason that homogeneous aggregates matter (at least somewhat) is that
the ABI ^H^H^H^HAPI requires establishing a calling convention and a name-
mangling formula that includes the length of parameters and return values.
Since ELFv2 and ELFv1 do not have the same calling convention, and ELFv2
has a superior one, we chose to use ELFv2's calling convention and make use
of homogeneous aggregates for return values in registers for the case of
vectorized sincos.

Please look at the document to see the constraints we're under to fit into
the different OpenMP clauses and attributes.  It seems to me that we can
only define this for both powerpc64 and powerpc64le by establishing two
different calling conventions, which provides two different vector length
calculations for the sincos return value, and therefore requires two
different function implementations with different mangled names.  (Either
that, or we cripple vectorized sincos by requiring it to return values
through memory.)

I still don't see it.  For all ABIs the length of the arguments and
return value is the same, and homogeneous aggregates doesn't factor
in at all; that is just a detail whether something is passed in
registers or memory (as we have with many other ABIs as well, fwiw).

So why make this part of the mangling rules?

It is perfectly fine to design this with ELFv2 in mind, of course, but
making a dependency on the (current!) (very complex!) ELFv2 rules for
absolutely no reason at all is a mistake, in my opinion.


Upon reflection, I agree.  Bert, we need to make changes to the document to
reflect this:

(1) "Calling convention" should refer to ELFv1 for powerpc64 and ELFv2 for
powerpc64le.
(2) "Vector Length" should remove bullet 3, strike the word
"nonhomogeneous" in bullet 4, and strike the parenthetical clause in
bullet 4.
(3) "Ordering of Vector Arguments" should remove the example involving
homogeneous aggregates.

It also occurs to me that for bullets 4 and 5 in "Vector Length", the
CDT should be long long, not int, since we pass aggregates in pieces in
64-bit registers and/or chunks of memory.

Other small bugs:
 - Bullet 4 says "the CDT determine by a) or b) above", but the referents
should be "(1) or (2)" instead.
 - First line of "Compiler generated variants of vector functions" has
a typo ("umasked").

Segher, thanks for smacking my recalcitrant head until it understands...

Thanks,
Bill




Segher


Re: GLIBC libmvec status

2020-02-26 Thread Bill Schmidt

On 2/26/20 2:18 AM, Jakub Jelinek wrote:

On Tue, Feb 25, 2020 at 07:43:09PM -0600, Bill Schmidt wrote:

The reason that homogeneous aggregates matter (at least somewhat) is that
the ABI ^H^H^H^HAPI requires establishing a calling convention and a name-
mangling formula that includes the length of parameters and return values.
Since ELFv2 and ELFv1 do not have the same calling convention, and ELFv2
has a superior one, we chose to use ELFv2's calling convention and make use
of homogeneous aggregates for return values in registers for the case of
vectorized sincos.

Can you please explain how do you want to pass the
void sincos (double, double *, double *);
arguments?  I must say it isn't entirely clear from the document.
You talk there about double[2], but sincos certainly doesn't have such an
argument.


The hope is that we can create a vectorized version that returns values
in registers rather than the by-ref parameters, and add code to GCC to
copy things around correctly following the call.  Ideally the signature of
the vectorized version would be sth like

  struct retval {vector double, vector double};
  retval vecsincos (vector double);

In the typical case where calls to sincos are of the form

  sincos (val[i], [i], [i]);

this would allow us to only store the values in the caller upon return,
rather than store them in the callee and potentially reload them
immediately in the caller.  On some Power CPUs, the latter behavior can
result in somewhat costly stalls if the consecutive accesses hit a timing
window.

If you feel it isn't possible to do this, then we can abandon it.  Right
now my understanding is that GCC doesn't vectorize calls to sincos yet
for any targets, so it would be moot except that we really should define
what happens for the future.

This calling convention would also be useful in the future for vectorizing
functions that return complex values either by value or by reference.



Also, I'd say ignoring the masked variants is a mistake, are you going to
warn any time the user uses inbranch or even doesn't specify notinbranch?
The masking can be implemented even without highly specialized instructions,
e.g. on x86 only AVX512F has full masking support, for older ISAs all that
is there is conditional store or e.g. for integral operations that can't
trap/raise exceptions just doing blend-like operations (or even and/or) is
all that is needed; just let the vectorizer do its job.


Well, as a matter of practicality, we don't have any of that implemented
in the rs6000 back end, and we don't have any free resources to do that
in GCC 11.  Is there any documentation about what needs to be done to
support this?  I've always been under the impression that vectorizing for
masking when there isn't any hardware support is a losing proposition, so
we've not investigated it.

Thanks,
Bill



Even if you don't want it for libmvec, just use
__attribute__((simd ("notinbranch")))
for those, but allow the user to use it where it makes sense.

Jakub



Re: GLIBC libmvec status

2020-02-25 Thread Bill Schmidt



On 2/25/20 12:45 PM, Segher Boessenkool wrote:

Hi!

On Tue, Feb 25, 2020 at 04:53:17PM +, GT wrote:

‐‐‐ Original Message ‐‐‐
On Sunday, February 23, 2020 11:45 AM, Bill Schmidt  
wrote:

As I just wrote on gcc-patches, we should disable libmvec for powerpc64.
The vector ABI as written isn't compatible with ELFv1.  We would need
a modified ABI that doesn't allow homogeneous aggregates of vectors to
be returned in registers in order to support ELFv1.  I do not believe
that is worth pursuing until and unless there is demand for it (which
I do not expect).

Are we all agreed that the POWER Vector Function ABI will be implemented only
for powerpc64le?

I do not agree.

I don't agree we should have a new ABI, and an API (which this *is* as
far as I can tell) works fine on *any* ABI.  Homogeneous aggregates has
nothing to do with anything either.

It is fine to only *support* powerpc64le-linux, sure.  But don't fragment
the implementation, it only hurts, never helps -- we will end up having
to support ten or twenty different compilers, instead of one compiler
with a few (mostly) orthogonal variations.  And yes, we should also test
everything everywhere, whenever reasonable.


Thanks, Segher.  Let me ask for some clarification here on how you'd like
us to proceed.

The reason that homogeneous aggregates matter (at least somewhat) is that
the ABI ^H^H^H^HAPI requires establishing a calling convention and a name-
mangling formula that includes the length of parameters and return values.
Since ELFv2 and ELFv1 do not have the same calling convention, and ELFv2
has a superior one, we chose to use ELFv2's calling convention and make use
of homogeneous aggregates for return values in registers for the case of
vectorized sincos.

Please look at the document to see the constraints we're under to fit into
the different OpenMP clauses and attributes.  It seems to me that we can
only define this for both powerpc64 and powerpc64le by establishing two
different calling conventions, which provides two different vector length
calculations for the sincos return value, and therefore requires two
different function implementations with different mangled names.  (Either
that, or we cripple vectorized sincos by requiring it to return values
through memory.)

Now, we can either write a document that handles both cases now (describes
both calling conventions), and force glibc to have two different functions at
least for the sincos case; or we can restrict this particular document to
ELFv2 and leave open the possibility of writing a very similar but slightly
different document for ELFv1 at such time as someone wants to use ELFv1 for
libmvec.  I'd personally rather push that extra work out until we know
there's a market for it.  That is, I don't want to preclude its use for
ELFv1, but this *particular* API is specific to ELFv2, so we need to
acknowledge that in the code.

Ultimately it's your call, but if we need to rewrite the ABI/API we're
going to need concrete proposals for how to do that.

Thanks,
Bill



For the glibc side I have no opinion.


Segher


Re: [PATCH] rs6000: Use -mno-pcrel for tests requiring TOC

2020-02-25 Thread Bill Schmidt

On 2/25/20 4:00 PM, Segher Boessenkool wrote:

On Mon, Feb 24, 2020 at 03:20:30PM -0600, Bill Schmidt wrote:

These two test cases have assembly code that requires a valid TOC.
Make sure that we don't use the PC-relative ABI extensions that
violate this requirement.

Tested and verified on powerpc64le-unknown-linux-gnu.  Is this
okay for trunk?

Okay, thanks!

Should there be a testsuite/gcc.target/powerpc/ppc64-abi-pcrel.c as well?


I don't think we need anything to replace these particular tests.  They
are testing correct parameter passing, and we have exhaustive testing
for those aspects in the ABI compatibility testsuite (and they don't
change for PC-relative).  We also have a number of tests that were
added specifically for the TOC-removal aspects of PC-relative addressing
when those pieces went in.  It's of course always true that more tests
are better, but I don't think we're losing coverage with this patch.

Thanks,
Bill




Segher


[PATCH] rs6000: Use -mno-pcrel for tests requiring TOC

2020-02-24 Thread Bill Schmidt
These two test cases have assembly code that requires a valid TOC.
Make sure that we don't use the PC-relative ABI extensions that
violate this requirement.

Tested and verified on powerpc64le-unknown-linux-gnu.  Is this
okay for trunk?

Thanks,
Bill

2020-02-24  Bill Schmidt  

* gcc.target/powerpc/ppc64-abi-1.c: Compile with -mno-pcrel.
* gcc.target/powerpc/ppc64-abi-2.c: Likewise.
---
 gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c | 2 +-
 gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c 
b/gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c
index 921c46862e2..731c0d880aa 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc64-abi-1.c
@@ -1,6 +1,6 @@
 /* { dg-do run { target { powerpc*-*-* && lp64 } } } */
 /* { dg-skip-if "" { *-*-darwin* } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mno-pcrel" } */
 
 typedef __builtin_va_list va_list;
 #define va_start(ap, arg) __builtin_va_start (ap, arg)
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c 
b/gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c
index eb1df7a2dc9..b490fc3c2fd 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc64-abi-2.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { { powerpc*-*-linux* && lp64 } && powerpc_altivec_ok 
} } } */
-/* { dg-options "-O2 -fprofile -mprofile-kernel -maltivec -mabi=altivec" } */
+/* { dg-options "-O2 -fprofile -mprofile-kernel -maltivec -mabi=altivec 
-mno-pcrel" } */
 #include 
 #include 
 #include 
-- 
2.17.1



Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-24 Thread Bill Schmidt



On 2/24/20 11:08 AM, Jakub Jelinek wrote:

On Mon, Feb 24, 2020 at 11:04:55AM -0600, Bill Schmidt wrote:

+  if (clonei->simdlen
+  && (clonei->simdlen < 2
+      || clonei->simdlen > 1024

Assuming that clonei->simdlen matches "vector length" in the ABI, 1024 is
too large a number.  We can have at most 8 vector registers containing
a homogeneous aggregate, each having up to 16 elements, so the correct
limit would be 128.

Well, further arguments can be passed on the stack...


Well, ELFv2 doesn't define such a thing as a qualified homogeneous aggregate.
See rs6000_discover_homogeneous_aggregate and "Parameter Passing in
Registers" in ELFv2.  So the entire aggregate would be passed in memory,
not just the excess after 128 bytes.  I don't think this is necessarily
something we want to encourage in an interface intended to improve
performance.  Is there any reason we need to permit a larger value?  Do we
need to add this constraint to rs6000_simd_clone_usable?

Thanks,
Bill



Jakub



Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-24 Thread Bill Schmidt

On 2/14/20 2:24 PM, GT wrote:

Function rs6000_simd_clone_adjust, even though it's body is empty,
cannot simply be removed. I tried it. It resulted in ICE. In my
view, leaving it empty is preferable to modifying other files
unrelated to rs6000.c in order to avoid having a function whose
body is empty.

Bert.
From 1e8feec5e90ff1a879849714c8d2ea143e77e154 Mon Sep 17 00:00:00 2001
From: Bert Tenjy 
Date: Fri, 14 Feb 2020 13:31:53 -0600
Subject: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function
 ABI.

The Vector Function ABI document is tentatively located at:
<https://github.com/power8-abi-doc/vector-function-abi>

Bill Schmidt of IBM Linux Tech. Center has committed to eventually
integrating this ABI into the official POWER Architecture specifications.
He is a GCC and Toolchain Architect so this should ease concerns over
how much to trust an ABI which is not an official release.

The implementation is very similar to those of x86_64 SSE and Aarch64.

The major test of this patch autovectorizes math functions and so requires
libmvec. PPC64 libmvec functionality is only available on GLIBC branch
tuliom/libmvec. Until that branch is merged to master, testing this ABI
will mean checking out GLIBC branch tuliom/libmvec, building and 
installing
to a non-system directory. Likewise, GCC will have to be built then 
installed

so that it doesn't interfere with the system's GCC.

Compiling with newly-built GCC against newly-built GLIBC requires these
options to GCC:
-L "${glibc_install_dir}/lib"
-I "${glibc_install_dir}/include"
-Wl,--rpath="${glibc_install_dir}/lib"
-Wl,--dynamic-linker="${glibc_install_dir}/lib/ld64.so.2"
---
 gcc/config/rs6000/rs6000.c | 152 +
 1 file changed, 152 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index fc36bb6714b..3329c96e6cc 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1266,6 +1266,147 @@ static const struct attribute_spec 
rs6000_attribute_table[] =

 #endif
   { NULL,    0, 0, false, false, false, false, NULL, NULL }
 };
+
+/* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN.  */
+
+static int
+rs6000_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+    struct cgraph_simd_clone *clonei,
+    tree base_type, int num)
+{
+  int ret = 1;
+
+  if (clonei->simdlen
+  && (clonei->simdlen < 2
+      || clonei->simdlen > 1024


Assuming that clonei->simdlen matches "vector length" in the ABI, 1024 is
too large a number.  We can have at most 8 vector registers containing
a homogeneous aggregate, each having up to 16 elements, so the correct
limit would be 128.


+      || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
+    {
+  warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+          "unsupported simdlen %d", clonei->simdlen);
+  return 0;
+    }
+
+  tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
+  if (TREE_CODE (ret_type) != VOID_TYPE)
+    switch (TYPE_MODE (ret_type))
+  {
+  case E_QImode:
+  case E_HImode:
+  case E_SImode:
+  case E_DImode:
+  case E_SFmode:
+  case E_DFmode:
+  /* case E_SCmode: */
+  /* case E_DCmode: */


Remove the two preceding lines.


+    if (!AGGREGATE_TYPE_P (ret_type))
+      break;
+    /* FALLTHRU */
+  default:
+    warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+        "unsupported return type %qT for simd", ret_type);
+    return 0;
+  }
+
+  tree t;
+  int i;
+  tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
+  bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
+
+  for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : 
type_arg_types), i = 0;

+   t && t != void_list_node; t = TREE_CHAIN (t), i++)
+    {
+  tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
+  switch (TYPE_MODE (arg_type))
+    {
+    case E_QImode:
+    case E_HImode:
+    case E_SImode:
+    case E_DImode:
+    case E_SFmode:
+    case E_DFmode:
+    /* case E_SCmode: */
+    /* case E_DCmode: */


Again, remove the two preceding lines.


+      if (!AGGREGATE_TYPE_P (arg_type))
+    break;
+      /* FALLTHRU */
+    default:
+      if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
+    break;
+      warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+          "unsupported argument type %qT for simd", arg_type);
+      return 0;
+    }
+    }
+
+  if (TARGET_VSX)
+    {
+  clonei->vecsize_mangle = 'b';
+  ret = 1;
+    }
+  clonei->mask_mode = VOIDmode;
+  switch (clonei->vecsize_mangle)
+    {
+    case 'b':
+  clonei->vecsize_int = 128;
+  clonei->vecsize_float = 128;
+  break;
+    default:
+  gcc_unreacha

Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-24 Thread Bill Schmidt



On 2/23/20 1:12 PM, Segher Boessenkool wrote:

On Sun, Feb 23, 2020 at 10:55:53AM -0600, Bill Schmidt wrote:

Though I'm usually uncomfortable with kicking the can down the road on these
sorts of things, I can probably be convinced in this case.  Tulio and I were
wondering why the libmvec interface doesn't make use of ifunc capability for
this sort of thing.  Something to look into when more advanced
implementations
are added later, I guess, and a valid reason to not lock ourselves into the
'c' ABI today.

I have some questions, too.  Why is this called an ABI at all?  Why will
it not work with *any* underlying ABI?  What *is* this, what is it *for*,
where is the documentation, where is the design documentation?  Etc.


So, I can answer a small amount of this, but I will say that overall, design
or implementation documentation seems to be between lacking and nonexistent.

This has to do with "#pragma omp simd" and providing the rules for vectorizing
functions into calls to libmvec.  I tend to agree that it is a bit more like
API than ABI, but we are just fitting into existing frameworks and using the
same nomenclature.

The ABI that Bert has produced for us is available at
https://sourceware.org/glibc/wiki/HomePage?action=AttachFile=view=powerarchvectfuncabi.html.

It is based on a similar document for x86.  I believe this is the most up-to-
date version:
https://software.intel.com/sites/default/files/managed/b4/c8/Intel-Vector-Function-ABI.pdf

There's been ongoing work in the same area for AArch64, with the most recent
version available here, I believe:
https://developer.arm.com/docs/101129/latest

I am not certain of the completion status of the AArch64 implementation.
The libmvec implementation for Intel has been around for a long time.

Hope that at least starts to help,
Bill



Segher


Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-24 Thread Bill Schmidt



On 2/23/20 11:33 AM, Jakub Jelinek wrote:

On Sun, Feb 23, 2020 at 10:42:17AM -0600, Bill Schmidt wrote:

Have I missed something crucial?

I haven't seen anything in the patch that would only enable it for ELFv2,
and while powerpc64le-linux probably assumes TARGET_VSX unconditionally
(haven't verified), powerpc64-linux or powerpc-linux certainly doesn't.
And it is just fine to have the ABI for those pass/return vectors in VSX
registers too, after all, it won't be used if the vectorized caller isn't
TARGET_VSX, the definitions of the declare simd functions could be compiled
with different ISA options.  And, if the ABI sais that the 'b' stuff assumes
certain ISA extensions, if the declare simd function definition is compiled
with e.g. -mno-vsx -mno-altivec, it would either not be able to get the
arguments/return values at all, or wouldn't benefit from the ISA guarantees
the ABI gives to it.

It's a problem with the patch that it doesn't limit the ABI to ELFv2.  That is
necessary, because there are aspects of the vector ABI that are incompatible
with ELFv1.  In particular, ELFv1 doesn't support returning homogeneous
aggregates of vectors in vector registers, which is called for in the proposed
sincos interface, and would also be needed for vectorized complex functions.

Is it really a problem?  I mean, it is perfectly fine if the declare simd
variants have a different ABI from the normal ABI, just for #pragma omp
declare variant it will be desirable if there is some attribute for such
different (or just slightly amended) ABI.  And vector complex is not an
issue right now, we punt on those on all architectures.


Well, it's a problem in the sense that the ABI as written does not work on
ELFv1, so until we have an ABI that does, we shouldn't allow it for anything
but ELFv2.  (Sections that don't apply to ELFv1 are Calling Convention, Vector
Length, Ordering of Vector Arguments.)

I know vector complex isn't currently an issue; just thinking ahead for
future potential improvements.

Bill



Jakub



Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-23 Thread Bill Schmidt

On 2/20/20 1:14 PM, GT wrote:

‐‐‐ Original Message ‐‐‐
On Wednesday, February 19, 2020 12:33 PM, Bill Schmidt  
wrote:


The reason 'c' was added to the ABI is this mailing list discussion:
https://sourceware.org/ml/libc-alpha/2019-11/msg00765.html
As long as 'b' specifies that the VSX functionality is that specified in ISA 
v2.07,
I suggest that we delete the reference to 'c' in the ABI. Bill, Tulio?

No, I don't think that's the right call.  We want to leverage ISA 3.0
instructionsin vector implementations when they are available, so we
need the 'c' ABI for that purpose.  In future we are likely to add a
'd' ABI for a future processor if it adds more vector capability.  So
emitting both and letting the vectorized callers choose, as Jakub
suggests, seems like the right way to go.  This is true even if the
current implementations are identical (i.e., don't exploit any ISA
3.0 instructions).


Because of the issue at 
https://gcc.gnu.org/ml/gcc-patches/2020-02/msg01171.html, I
am coming back to whether or not to include VSX extensions for ISA 3.0 in the 
Vector
Function ABI Specification.

If we retain 'c' in the ABI Spec., then GCC will expect libmvec functions such 
as
_ZGVcN2v_sin. The changes made to GLIBC for POWER libmvec don't have these 
functions
with  == 'c'. Only those with  == 'b' have been implemented. So we 
have to
do either of:

1. Create all those 'c' variants in GLIBC libmvec, even though they will be 
identical
to the existing 'b' versions.
2. Remove all references to 'c' in the ABI Specification, and leave GCC 
expecting to
find only 'b' variants in libmvec.

If/when it becomes necessary to have 'c' variants of functions, then a new 
version of
the Vector Function ABI document will be created. And GLIBC and GCC 
modifications to
comply with that new ABI will be made then.


Though I'm usually uncomfortable with kicking the can down the road on these
sorts of things, I can probably be convinced in this case.  Tulio and I were
wondering why the libmvec interface doesn't make use of ifunc capability for
this sort of thing.  Something to look into when more advanced implementations
are added later, I guess, and a valid reason to not lock ourselves into the
'c' ABI today.

Tulio, any concerns?

Bill



Bert.


Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-23 Thread Bill Schmidt



On 2/14/20 4:09 PM, Jakub Jelinek wrote:

On Fri, Feb 14, 2020 at 10:02:39PM +, GT wrote:

Function rs6000_simd_clone_adjust, even though it's body is empty,
cannot simply be removed. I tried it. It resulted in ICE. In my
view, leaving it empty is preferable to modifying other files
unrelated to rs6000.c in order to avoid having a function whose
body is empty.

So shouldn't the callback set target attribute (on definitions) to "vsx"?


I did consider doing something similar to aarch64_simd_clone_adjust. But the 
reason
Aarch64 has a new attribute aarch64_vector_pcs is that they implemented a 
modified
function calling sequence for vector functions. PPC64 vector functions use the 
existing
function calling sequence spelled out in the 64-bit ELFv2 ABI. So with no new 
attribute
here, the function body ends up empty.

Have I missed something crucial?

I haven't seen anything in the patch that would only enable it for ELFv2,
and while powerpc64le-linux probably assumes TARGET_VSX unconditionally
(haven't verified), powerpc64-linux or powerpc-linux certainly doesn't.
And it is just fine to have the ABI for those pass/return vectors in VSX
registers too, after all, it won't be used if the vectorized caller isn't
TARGET_VSX, the definitions of the declare simd functions could be compiled
with different ISA options.  And, if the ABI sais that the 'b' stuff assumes
certain ISA extensions, if the declare simd function definition is compiled
with e.g. -mno-vsx -mno-altivec, it would either not be able to get the
arguments/return values at all, or wouldn't benefit from the ISA guarantees
the ABI gives to it.


It's a problem with the patch that it doesn't limit the ABI to ELFv2.  That is
necessary, because there are aspects of the vector ABI that are incompatible
with ELFv1.  In particular, ELFv1 doesn't support returning homogeneous
aggregates of vectors in vector registers, which is called for in the proposed
sincos interface, and would also be needed for vectorized complex functions.

Bill



BTW, in the ABI document there isn't just 'b', but also 'c' ABI, it is
unclear if one needs to always emit both (e.g. like on x86 we emit 'b', 'c',
'd' and 'e') and then let the vectorized callers choose based on what ISA
options it is compiled with.

Jakub



Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-19 Thread Bill Schmidt



On 2/19/20 1:10 PM, GT wrote:

‐‐‐ Original Message ‐‐‐
On Wednesday, February 19, 2020 12:33 PM, Bill Schmidt  
wrote:


The reason 'c' was added to the ABI is this mailing list discussion:
https://sourceware.org/ml/libc-alpha/2019-11/msg00765.html
As long as 'b' specifies that the VSX functionality is that specified in ISA 
v2.07,
I suggest that we delete the reference to 'c' in the ABI. Bill, Tulio?

No, I don't think that's the right call.  We want to leverage ISA 3.0
instructionsin vector implementations when they are available, so we
need the 'c' ABI for that purpose.  In future we are likely to add a
'd' ABI for a future processor if it adds more vector capability.  So
emitting both and letting the vectorized callers choose, as Jakub
suggests, seems like the right way to go.  This is true even if the
current implementations are identical (i.e., don't exploit any ISA
3.0 instructions).


Here are proposed modifications:

1. In the Vector Function ABI document, under section "Vector Function Name 
Mangling",
state that all  vector variants will be created by the compiler. And that 
it will
be up to the caller of vectorized functions to select the preferred version 
('b' or 'c'
are the only choices presently).

2. Change rs6000_simd_clone_usable so that it more closely resembles 
ix86_simd_clone_usable.
The switch statement in that function will add a clause for 'c' to the existing 
one for 'b'.
I'm not sure what to test for in the 'c' clause. In x86_64, they have 
TARGET_SSE2, TARGET_AVX,
TARGET_AVX2 and TARGET_AVX512. PPC64 has only TARGET_VSX as best I can 
determine. There are
macros PPC_FEATURE2_ARCH_2_07 and PPC_FEATURE2_ARCH_3_00 in ppc-auxv.h. Can we 
use these 2
macros where x86_64 uses the macros with prefix TARGET_ ?



You can use TARGET_P9_VECTOR for this.

Thanks,
Bill



Bert.


Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-19 Thread Bill Schmidt
Sorry I missed this discussion until now, I have been out of the office 
much of the last week.


On 2/16/20 2:10 PM, GT wrote:

‐‐‐ Original Message ‐‐‐
On Friday, February 14, 2020 5:09 PM, Jakub Jelinek ja...@redhat.com wrote:


On Fri, Feb 14, 2020 at 10:02:39PM +, GT wrote:


Function rs6000_simd_clone_adjust, even though it's body is empty,
cannot simply be removed. I tried it. It resulted in ICE. In my
view, leaving it empty is preferable to modifying other files
unrelated to rs6000.c in order to avoid having a function whose
body is empty.

So shouldn't the callback set target attribute (on definitions) to "vsx"?

I did consider doing something similar to aarch64_simd_clone_adjust. But the 
reason
Aarch64 has a new attribute aarch64_vector_pcs is that they implemented a 
modified
function calling sequence for vector functions. PPC64 vector functions use the 
existing
function calling sequence spelled out in the 64-bit ELFv2 ABI. So with no new 
attribute
here, the function body ends up empty.
Have I missed something crucial?

I haven't seen anything in the patch that would only enable it for ELFv2,


The idea is that the vector functionality defined in the ABI is guaranteed only
on systems that implement the ELFv2 ABI. It's possible that the functionality 
also
works on ELFv1 Big-Endian PPC64. I'll check if that's the case. If so, then the 
ABI
will need modification.


and while powerpc64le-linux probably assumes TARGET_VSX unconditionally
(haven't verified), powerpc64-linux or powerpc-linux certainly doesn't.


The last function in the patch, rs6000_simd_clone_usable, returns a value that 
will
disable use of vector variants if TARGET_VSX is undefined.


And it is just fine to have the ABI for those pass/return vectors in VSX
registers too, after all, it won't be used if the vectorized caller isn't
TARGET_VSX,

Don't quite understand the comment here. Are you stating the possibility of
a system that has VSX hardware but does not define macro TARGET_VSX?


the definitions of the declare simd functions could be compiled
with different ISA options.

Do you mean the 'b' vs 'c' in the ABI's vector function name mangling?


And, if the ABI sais that the 'b' stuff assumes
certain ISA extensions, if the declare simd function definition is compiled
with e.g. -mno-vsx -mno-altivec, it would either not be able to get the
arguments/return values at all, or wouldn't benefit from the ISA guarantees
the ABI gives to it.


Not sure if you expect a response here.


BTW, in the ABI document there isn't just 'b', but also 'c' ABI, it is
unclear if one needs to always emit both (e.g. like on x86 we emit 'b', 'c',
'd' and 'e') and then let the vectorized callers choose based on what ISA
options it is compiled with.


The reason 'c' was added to the ABI is this mailing list discussion:

https://sourceware.org/ml/libc-alpha/2019-11/msg00765.html

As long as 'b' specifies that the VSX functionality is that specified in ISA 
v2.07,
I suggest that we delete the reference to 'c' in the ABI. Bill, Tulio?



No, I don't think that's the right call.  We want to leverage ISA 3.0
instructionsin vector implementations when they are available, so we
need the 'c' ABI for that purpose.  In future we are likely to add a
'd' ABI for a future processor if it adds more vector capability.  So
emitting both and letting the vectorized callers choose, as Jakub
suggests, seems like the right way to go.  This is true even if the
current implementations are identical (i.e., don't exploit any ISA
3.0 instructions).

Again, sorry for the tardy response!

Bill



Bert.


Re: [PATCH], Rename and document PowerPC -mprefixed-addr to -mprefixed

2020-02-10 Thread Bill Schmidt



On 2/10/20 9:24 PM, Segher Boessenkool wrote:

Hi!

On Mon, Feb 10, 2020 at 01:45:42PM -0500, Michael Meissner wrote:

This patch renames the PowerPC internal switch -mprefixed-addr to be
-mprefixed.
If you use -mpcrel, you must be using the 64-bit ELF v2 ABI, and the code model
must be medium.

Currently, anyway.


If you use -mpcrel, the compiler will generate PC-relative
loads and stores to access items, rather than the current TOC based loads and
stores.

Where that is the best thing to do.  Is that always now?  :-)



Yes.  :-)

Bill




If you use -mpcrel, it implies -mprefixed.  If you use -mno-prefixed, you
cannot use -mpcrel.

-mno-prefixed should imply -mno-pcrel; does it?


* doc/invoke.texi (RS/6000 and PowerPC Options): Docment the

(typo)


--- /tmp/1ySv8k_invoke.texi 2020-02-07 17:56:52.700489015 -0500
+++ gcc/doc/invoke.texi 2020-02-07 17:34:02.925611138 -0500
@@ -22327,7 +22328,6 @@ faster on processors with 32-bit busses
  aligns structures containing the above types differently than
  most published application binary interface specifications for the m68k.
  
-@item -mpcrel

  @opindex mpcrel
  Use the pc-relative addressing mode of the 68000 directly, instead of
  using a global offset table.  At present, this option implies @option{-fpic},

This isn't a correct change.

Okay for trunk modulo the m68k change.  Thanks!


Segher


rs6000: Correct documentation for __builtin_mtfsf

2020-02-06 Thread Bill Schmidt

Hi,

PR93570 reports that the documentation shows __builtin_mtfsf to return a double,
but that is incorrect.  The return signature should be void.  Corrected herein.

Built on powerpc64le-unknown-linux-gnu and verified correct PDF output.  
Committed
as obvious.

Thanks!
Bill


2020-02-06  Bill Schmidt  

PR target/93570
* doc/extend.texi (Basic PowerPC Built-in Functions): Correct
prototype for __builtin_mtfsf.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ec99c38a607..5739063b330 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -17166,7 +17166,7 @@ unsigned long __builtin_ppc_mftb ();
 double __builtin_unpack_ibm128 (__ibm128, int);
 __ibm128 __builtin_pack_ibm128 (double, double);
 double __builtin_mffs (void);
-double __builtin_mtfsf (const int, double);
+void __builtin_mtfsf (const int, double);
 void __builtin_mtfsb0 (const int);
 void __builtin_mtfsb1 (const int);
 void __builtin_set_fpscr_rn (int);



Re: [PATCH 00/14] rs6000: Begin replacing built-in support

2020-02-05 Thread Bill Schmidt



On 2/5/20 6:30 AM, Segher Boessenkool wrote:

Hi!

On Wed, Feb 05, 2020 at 08:57:16AM +0100, Richard Biener wrote:

On Tue, Feb 4, 2020 at 6:40 PM Segher Boessenkool
 wrote:

On Mon, Feb 03, 2020 at 08:26:01PM -0600, Bill Schmidt wrote:

My intent is to make adding new built-in functions as simple as adding
a few lines to a couple of files, and automatically generating as much
of the initialization, overload resolution, and expansion logic as
possible.  This patch series establishes the format of the input files
and creates a new program (rs6000-genbif) to:

Let's call it rs6000-gen-builtins or similar.  Not as cryptic.

I believe we talked about this a few years ago.  Any reason this is powerpc
specific?  If sufficiently generic most targets would benefit and maybe even
frontends and the middle-end could make use of this.  The generator
program, that is.  (disclaimer: I didn't look into the patches at all)



One thing that's powerpc-unique (I believe) is our peculiar overloading 
infrastructure for the original AltiVec interface (extended to cover 
quite a bit more territory since).  But that's largely an extra level of 
abstraction that could eventually be optional.


There's also some specificity to our vector types (things like vector 
bool and vector pixel) that would need to be abstracted away.


Finally, there's a set of flags for special handling that are definitely 
Power-specific and would have to be abstracted away also.


Nothing that couldn't be dealt with given enough attention, so far as I 
can see.  But honestly I have not looked a great deal into other 
targets' built-in handling to see what other landmines might be present.



Absolutely, but we first want to solve the urgent problem for Power
(because that is what it is); it's a huge job with that reduction of
scope, already.  After *that* is done, it will be clearer how to do
things for what is wanted generically, will be clearer what is wanted
in the first place :-)



Yes, this is a necessary first step to even be able to see what's going 
on...





I always wondered if we can make our C frontend spit out things from
C declarations (with maybe extra #pragmas for some of the more obscure
details) and how to fit that into the bootstrap.

I think there will be too many problem cases, a direct description of
the builtins will work better (but is more verbose of course).

In any case, Bill's patches keep the exact same approach in rs6000 as
we had before, just with some more pre-processing and macros etc.;
which results in a much shorter description, many cases folded into one,
which as a bonus also fixes bugs (directly, when two things you fold
should be the same but are not, at least one of them is wrong; and maybe
more importantly indirectly: a reader of the tables will spot errors
much more easily if they fit on one screen, if you have similar entries
on the screen at the same time so you *can* compare; and there will be
more readers as well even, people are actually scared of having to look
at it currently).

So, yes, this same approach might be a good fit generically, but we'll
do it for rs6000 only, in the interest of ever getting it done ;-)
The generator programs etc. can move to generic code later, if that
helps and there is interest in it, there isn't much (if anything) in
here that is specific to our arch.



I'll keep this possibility in mind as we move forward.  It's probably a 
matter of months to get everything converted over just for Power.  But 
this set of patches is the most generic; the remaining patches will all 
be quite Power-specific.


Thanks,
Bill



Segher


Re: [PATCH 01/14] Initial create of rs6000-genbif.c.

2020-02-04 Thread Bill Schmidt

On 2/4/20 4:36 PM, Segher Boessenkool wrote:

On Tue, Feb 04, 2020 at 03:10:32PM -0600, Bill Schmidt wrote:

I really don't think using the new acronym "bif" helps; built-in
functions already are often called "builtins" (or "intrinsics", which is
problematic itself).

Until we manage to replace the old methods, we already have
rs6000-builtin.def, so I am a bit constrained in my choices. Given that
restriction, what name would you prefer?  I can use rs6000-builtins.def
(the plural) if you like.

As we discussed (offline), maybe rs6000-builtin-new.def is best (and at
the end of this conversion, just move it).

+1



+ ldv Needs special handling for vec_ld semantics
+ stv Needs special handling for vec_st semantics

Call those "vec_ld" and "vec_st", then?  Or should I get used to it, the
names aren't obvious, but cut-and-paste always is ;-)

Hm.  Well, vec_ld is a specific built-in, but this applies to a few more
than just that one.  But sure, if you want.

"ldv" certainly is shorter and nicer in principle, but it is a bit
cryptic.  As I said, it's probably not too hard to get used to it; and
maybe a better name will present itself?
Maybe ldvec and stvec would serve without introducing specific builtin 
confusion.



+[TARGET_ALTIVEC]

Can this be a C expression?  Most gen* programs just copy similar things
to the generated C code, which can be interesting to debug, but works
perfectly well otherwise.

I rather prefer the way it is.  I do generate C code from this in the
subsequent patches.  But I like table-driven code to use things that
look like tables for input. :-)

That's not what I meant...  Can you say
   [TARGET_ALTIVEC && TARGET_64BIT]
here?  Or even just
   [!TARGET_ALTIVEC]
or
   [1]
for always, or
   [0]
for never ("commented out").
Ah!  Sorry for misunderstanding.  Right now just an identifier is 
allowed, but we could certainly grab the whole string between the [] and 
drop it in with no concerns.  Hopefully we both remember when we get to 
the patch that reads the stanzas...



+  Blank lines may be used as desired in these files.

Between stanzas and stuff only?  There are places where newlines are
significant and not just whitespace, right?

I don't believe so, although there may be places where I forgot to allow
a line to be advanced -- that would be a bug, though, so let me know if
you see any.  Blank lines don't have any inherent meaning in the input
files.

Not blank lines, I'm asking about newlines :-)  But those are not allowed
to be inserted just anywhere, a line has to be one line, iiuc?


Yes.  Additional newlines can follow a newline, but the individual lines 
must contain everything that's expected in them.


Bill




Segher


Re: [PATCH 01/14] Initial create of rs6000-genbif.c.

2020-02-04 Thread Bill Schmidt

On 2/4/20 12:27 PM, Segher Boessenkool wrote:

Hi!

On Mon, Feb 03, 2020 at 08:26:02PM -0600, Bill Schmidt wrote:

Includes header documentation and initial set of include directives.

Please use full sentences in commit messages.



OK.




+/* This program generates built-in function initialization and
+   recognition code for Power targets, based on text files that
+   describe the built-in functions and vector overloads:
+
+ rs6000-bif.def   Table of built-in functions
+ rs6000-overload.def  Table of overload functions

I really don't think using the new acronym "bif" helps; built-in
functions already are often called "builtins" (or "intrinsics", which is
problematic itself).



Until we manage to replace the old methods, we already have 
rs6000-builtin.def, so I am a bit constrained in my choices. Given that 
restriction, what name would you prefer?  I can use rs6000-builtins.def 
(the plural) if you like.


I didn't think I was inventing "bif" as shorthand, but maybe that was an 
LLVM thing...





+ ext Process as a vec_extract function

Please spell out "extract"?  There are too many other words starting with
"ext", some of which you could expect here ("extend", "extension", maybe
even "extra");



OK.




+ ldv Needs special handling for vec_ld semantics
+ stv Needs special handling for vec_st semantics

Call those "vec_ld" and "vec_st", then?  Or should I get used to it, the
names aren't obvious, but cut-and-paste always is ;-)



Hm.  Well, vec_ld is a specific built-in, but this applies to a few more 
than just that one.  But sure, if you want.





+[TARGET_ALTIVEC]

Can this be a C expression?  Most gen* programs just copy similar things
to the generated C code, which can be interesting to debug, but works
perfectly well otherwise.



I rather prefer the way it is.  I do generate C code from this in the 
subsequent patches.  But I like table-driven code to use things that 
look like tables for input. :-)





+  const vector signed char __builtin_altivec_abs_v16qi (vector signed char);
+ABS_V16QI absv16qi2 {abs}
+  const vector signed short __builtin_altivec_abs_v8hi (vector signed short);
+ABS_V8HI absv8hi2 {abs}
+
+   Note the use of indentation, which is recommended but not required.

It does require a single newline at the end of each such line, right?
Does that work aout almost always, or do you get very long lines?



Yes, for now I am requiring the newline at the end of each line. I found 
that it does indeed get very long (unreadably long) lines for vector 
signatures.  I forgot to update this documentation when I changed my 
format.  I am now using abbreviations for vector types that match those 
we use often in our test cases ("vuc" for "vector unsigned char", "vsll" 
for "vector signed long long", etc.).  This makes for very nicely 
readable inputs (see patch #2).


The above now becomes

  const vsc __builtin_altivec_abs_v16qi (vsc);
    ABS_V16QI absv16qi2 {abs}
  const vss __builtin_altivec_abs_v8hi (vss);
    ABS_V8HI absv8hi2 {abs}

I will fix the documentation!




+ [, , ]

Hrm, "internal" suggests "name within the GCC code", but that is not what
it means.  Maybe something like abi-name and builtin-name?



OK, that's reasonable.




+  Blank lines may be used as desired in these files.

Between stanzas and stuff only?  There are places where newlines are
significant and not just whitespace, right?



I don't believe so, although there may be places where I forgot to allow 
a line to be advanced -- that would be a bug, though, so let me know if 
you see any.  Blank lines don't have any inherent meaning in the input 
files.




Great docs, thanks!



Thanks for the review!
Bill




Segher


[PATCH 14/14] Incorporate new code into the build machinery.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config.gcc (powerpc-*-*-*): Add rs6000-bif.o to extra_objs.
* config/rs6000/t-rs6000 (rs6000-genbif.o): New target.
(rbtree.o): Likewise.
(rs6000-genbif): Likewise.
(rs6000-bif.c): Likewise.
(rs6000-bif.o): Likewise.
---
 gcc/config.gcc |  3 ++-
 gcc/config/rs6000/t-rs6000 | 22 ++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index ae5a845fcce..72448e43017 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -504,7 +504,8 @@ or1k*-*-*)
;;
 powerpc*-*-*)
cpu_type=rs6000
-   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o 
rs6000-call.o"
+   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+   extra_objs="${extra_objs} rs6000-call.o rs6000-bif.o"
extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 170a69591dd..a3a214b2bfb 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -47,6 +47,28 @@ rs6000-call.o: $(srcdir)/config/rs6000/rs6000-call.c
$(COMPILE) $<
$(POSTCOMPILE)
 
+rs6000-genbif.o: $(srcdir)/config/rs6000/rs6000-genbif.c
+   $(COMPILE) $<
+   $(POSTCOMPILE)
+
+rbtree.o: $(srcdir)/config/rs6000/rbtree.c
+   $(COMPILE) $<
+   $(POSTCOMPILE)
+
+rs6000-genbif: rs6000-genbif.o rbtree.o
+   +$(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ \
+   $(filter-out $(BUILD_LIBDEPS), $^) $(BUILD_LIBS)
+
+rs6000-bif.c: rs6000-genbif $(srcdir)/config/rs6000/rs6000-bif.def \
+   $(srcdir)/config/rs6000/rs6000-overload.def
+   ./rs6000-genbif $(srcdir)/config/rs6000/rs6000-bif.def \
+   $(srcdir)/config/rs6000/rs6000-overload.def rs6000-bif.h \
+   rs6000-bif.c rs6000-vecdefines.h
+
+rs6000-bif.o: rs6000-bif.c
+   $(COMPILE) $<
+   $(POSTCOMPILE)
+
 $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
   $(srcdir)/config/rs6000/rs6000-cpus.def
$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
-- 
2.17.1



[PATCH 11/14] Write #defines to rs6000-vecdefines.h.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (write_defines_file): Implement.
---
 gcc/config/rs6000/rs6000-genbif.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index 7bb7d2b24a4..0bcd035060d 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -1635,6 +1635,10 @@ write_init_file ()
 static int
 write_defines_file ()
 {
+  for (int i = 0; i < num_ovld_stanzas; i++)
+fprintf (defines_file, "#define %s %s\n",
+ovld_stanzas[i].extern_name,
+ovld_stanzas[i].intern_name);
   return 1;
 }
 
-- 
2.17.1



[PATCH 10/14] Build function type identifiers and store them.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (complete_vector_type): New
function.
(complete_base_type): New function.
(construct_fntype_id): New function.
(parse_bif_entry): Call construct_fntype_id.
(parse_ovld_entry): Likewise.
---
 gcc/config/rs6000/rs6000-genbif.c | 180 ++
 1 file changed, 180 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index 22b5b1df3b9..7bb7d2b24a4 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -999,6 +999,178 @@ htm = %d.\n",
   return 1;
 }
 
+/* Convert a vector type into a mode string.  */
+static void
+complete_vector_type (typeinfo *typeptr, char *buf, int *bufi)
+{
+  buf[(*bufi)++] = 'v';
+  if (typeptr->ispixel)
+{
+  memcpy ([*bufi], "p8hi", 4);
+  *bufi += 4;
+}
+  else
+{
+  if (typeptr->isbool)
+   buf[(*bufi)++] = 'b';
+  switch (typeptr->base)
+   {
+   case BT_CHAR:
+ memcpy ([*bufi], "16qi", 4);
+ *bufi += 4;
+ break;
+   case BT_SHORT:
+ memcpy ([*bufi], "8hi", 3);
+ *bufi += 3;
+ break;
+   case BT_INT:
+ memcpy ([*bufi], "4si", 3);
+ *bufi += 3;
+ break;
+   case BT_LONGLONG:
+ memcpy ([*bufi], "2di", 3);
+ *bufi += 3;
+ break;
+   case BT_FLOAT:
+ memcpy ([*bufi], "4sf", 3);
+ *bufi += 3;
+ break;
+   case BT_DOUBLE:
+ memcpy ([*bufi], "2df", 3);
+ *bufi += 3;
+ break;
+   case BT_INT128:
+ memcpy ([*bufi], "1ti", 3);
+ *bufi += 3;
+ break;
+   case BT_FLOAT128:
+ memcpy ([*bufi], "1tf", 3);
+ *bufi += 3;
+ break;
+   default:
+ (*diag) ("unhandled basetype %d.\n", typeptr->base);
+ exit (EC_INTERR);
+   }
+}
+}
+
+/* Convert a base type into a mode string.  */
+static void
+complete_base_type (typeinfo *typeptr, char *buf, int *bufi)
+{
+  switch (typeptr->base)
+{
+case BT_CHAR:
+  memcpy ([*bufi], "qi", 2);
+  break;
+case BT_SHORT:
+  memcpy ([*bufi], "hi", 2);
+  break;
+case BT_INT:
+  memcpy ([*bufi], "si", 2);
+  break;
+case BT_LONGLONG:
+  memcpy ([*bufi], "di", 2);
+  break;
+case BT_FLOAT:
+  memcpy ([*bufi], "sf", 2);
+  break;
+case BT_DOUBLE:
+  memcpy ([*bufi], "df", 2);
+  break;
+case BT_INT128:
+  memcpy ([*bufi], "ti", 2);
+  break;
+case BT_FLOAT128:
+  memcpy ([*bufi], "tf", 2);
+  break;
+default:
+  (*diag) ("unhandled basetype %d.\n", typeptr->base);
+  exit (EC_INTERR);
+}
+
+  *bufi += 2;
+}
+
+/* Build a function type descriptor identifier from the return type
+   and argument types, and store it if it does not already exist.
+   Return the identifier.  */
+static char *
+construct_fntype_id (prototype *protoptr)
+{
+  /* Determine the maximum space for a function type descriptor id.
+ Each type requires at most 8 characters (6 for the mode*, 1 for
+ the optional 'u' preceding the mode, and 1 for an underscore
+ following the mode).  We also need 5 characters for the string
+ "ftype" that separates the return mode from the argument modes.
+ The last argument doesn't need a trailing underscore, but we
+ count that as the one trailing "ftype" instead.  For the special
+ case of zero arguments, we need 8 for the return type and 7
+ for "ftype_v".  Finally, we need one character for the
+ terminating null.  Thus for a function with N arguments, we
+ need at most 8N+14 characters for N>0, otherwise 16.
+ 
+   *Worst case is vb16qi for "vector bool char".  */
+  int len = protoptr->nargs ? (protoptr->nargs + 1) * 8 + 6 : 16;
+  char *buf = (char *) malloc (len);
+  int bufi = 0;
+
+  if (protoptr->rettype.ispointer)
+{
+  assert (protoptr->rettype.isvoid);
+  buf[bufi++] = 'p';
+}
+  if (protoptr->rettype.isvoid)
+buf[bufi++] = 'v';
+  else
+{
+  if (protoptr->rettype.isunsigned)
+   buf[bufi++] = 'u';
+  if (protoptr->rettype.isvector)
+   complete_vector_type (>rettype, buf, );
+  else
+   complete_base_type (>rettype, buf, );
+}
+
+  memcpy ([bufi], "_ftype", 6);
+  bufi += 6;
+
+  if (!protoptr->nargs)
+{
+  memcpy ([bufi], "_v", 2);
+  bufi += 2;
+}
+  else
+{
+  typelist *argptr = protoptr->args;
+  for (int i = 0; i < protoptr->nargs; i++)
+   {
+ assert (argptr);
+ buf[bufi++]

[PATCH 13/14] Write code to rs6000-bif.c.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (typemap): New struct.
(TYPE_MAP_SIZE): New defined constant.
(type_map): New filescope variable.
(write_fntype): New callback function.
(map_token_to_type_node): New function.
(write_type_node): New function.
(write_fntype_init): New function.
(write_init_bif_table): New function.
(write_init_ovld_table): New function.
(write_init_file): Implement.
---
 gcc/config/rs6000/rs6000-genbif.c | 367 ++
 1 file changed, 367 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index c84df1aa30f..ac640e14def 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -311,6 +311,52 @@ static rbt_strings bif_rbt;
 static rbt_strings ovld_rbt;
 static rbt_strings fntype_rbt;
 
+/* Mapping from type tokens to type node names.  */
+struct typemap
+{
+  const char *key;
+  const char *value;
+};
+
+/* This table must be kept in alphabetical order, as we use binary
+   search for table lookups in map_token_to_type_node.  */
+#define TYPE_MAP_SIZE 32
+static typemap type_map[TYPE_MAP_SIZE] =
+  {
+{ "df","double" },
+{ "di","intDI" },
+{ "hi","intHI" },
+{ "pv","ptr" },
+{ "qi","intQI" },
+{ "sf","float" },
+{ "si","intSI" },
+{ "tf","long_double" },
+{ "ti","intTI" },
+{ "udi",   "unsigned_intDI" },
+{ "uhi",   "unsigned_intHI" },
+{ "uqi",   "unsigned_intQI" },
+{ "usi",   "unsigned_intSI" },
+{ "uti",   "unsigned_intTI" },
+{ "uv16qi","unsigned_V16QI" },
+{ "uv1ti", "unsigned_V1TI" },
+{ "uv2di", "unsigned_V2DI" },
+{ "uv4si", "unsigned_V4SI" },
+{ "uv8hi", "unsigned_V8HI" },
+{ "v", "void" },
+{ "v16qi", "V16QI" },
+{ "v1ti",  "V1TI" },
+{ "v2df",  "V2DF" },
+{ "v2di",  "V2DI" },
+{ "v4sf",  "V4SF" },
+{ "v4si",  "V4SI" },
+{ "v8hi",  "V8HI" },
+{ "vb16qi","bool_V16QI" },
+{ "vb2di", "bool_V2DI" },
+{ "vb4si", "bool_V4SI" },
+{ "vb8hi", "bool_V8HI" },
+{ "vp8hi", "pixel_V8HI" },
+  };
+
 /* Pointer to a diagnostic function.  */
 void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2)))
   = NULL;
@@ -1761,6 +1807,80 @@ write_extern_fntype (char *str)
   fprintf (header_file, "extern tree %s;\n", str);
 }
 
+void
+write_fntype (char *str)
+{
+  fprintf (init_file, "tree %s;\n", str);
+}
+
+/* Look up TOK in the type map and return the corresponding string used
+   to build the type node.  */
+static const char *
+map_token_to_type_node (char *tok)
+{
+  int low = 0;
+  int high = TYPE_MAP_SIZE - 1;
+  int mid = (low + high) >> 1;
+  int cmp;
+
+  while ((cmp = strcmp (type_map[mid].key, tok)) && low < high)
+{
+  if (cmp < 0)
+   low = (low == mid ? mid + 1 : mid);
+  else
+   high = (high == mid ? mid - 1: mid);
+  mid = (low + high) >> 1;
+}
+
+  if (low > high)
+{
+  (*diag) ("token '%s' doesn't appear in the type map!\n", tok);
+  exit (EC_INTERR);
+}
+
+  return type_map[mid].value;
+}
+
+/* Write the type node corresponding to TOK.  */
+static void
+write_type_node (char *tok)
+{
+  const char *str = map_token_to_type_node (tok);
+  fprintf (init_file, "%s_type_node", str);
+}
+
+/* Write an initializer for a function type identified by STR.  */
+void
+write_fntype_init (char *str)
+{
+  char *tok;
+
+  /* Avoid side effects of strtok on the original string by using a copy.  */
+  char *buf = (char *) malloc (strlen (str) + 1);
+  strcpy (buf, str);
+
+  fprintf (init_file, "  %s\n= build_function_type_list (", buf);
+  tok = strtok (buf, "_");
+  write_type_node (tok);
+  tok = strtok (0, "_");
+  assert (tok);
+  assert (!strcmp (tok, "ftype"));
+
+  tok = strtok (0, "_");
+  if (tok)
+fprintf (init_file, ",\n\t\t\t\t");
+
+  /* Note:  A function with no arguments ends with '_ftype_v'.  */
+  while (tok && strcmp (tok, "v"))
+{
+  write_type_node (tok);
+  tok = strtok (0, "_");
+  fprintf (init_file, ",\n\t\t\

[PATCH 12/14] Write code to rs6000-bif.h.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (write_autogenerated_header): New
function.
(write_bif_enum): New callback function.
(write_ovld_enum): New callback function.
(write_decls): New function.
(write_extern_fntype): New callback function.
(write_header_file): Implement.
---
 gcc/config/rs6000/rs6000-genbif.c | 160 ++
 1 file changed, 160 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index 0bcd035060d..c84df1aa30f 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -1617,10 +1617,170 @@ parse_ovld ()
   return result;
 }
 
+/* Write a comment at the top of FILE about how the code was generated.  */
+static void
+write_autogenerated_header (FILE *file)
+{
+  fprintf (file, "/* Automatically generated by the program '%s'\n",
+  pgm_path);
+  fprintf (file, "   from the files '%s' and '%s'.  */\n\n",
+  bif_path, ovld_path);
+}
+
+/* Callback functions used in creating enumerations.  */
+void write_bif_enum (char *str)
+{
+  fprintf (header_file, "  RS6000_BIF_%s,\n", str);
+}
+
+void write_ovld_enum (char *str)
+{
+  fprintf (header_file, "  RS6000_OVLD_%s,\n", str);
+}
+
+/* Write declarations into the header file.  */
+static void
+write_decls ()
+{
+  fprintf (header_file, "enum rs6000_gen_builtins\n{\n  RS6000_BIF_NONE,\n");
+  rbt_inorder_callback (_rbt, bif_rbt.rbt_root, write_bif_enum);
+  fprintf (header_file, "  RS6000_BIF_MAX\n};\n\n");
+
+  fprintf (header_file, "enum restriction {\n");
+  fprintf (header_file, "  RES_NONE,\n");
+  fprintf (header_file, "  RES_BITS,\n");
+  fprintf (header_file, "  RES_RANGE,\n");
+  fprintf (header_file, "  RES_VALUES\n");
+  fprintf (header_file, "};\n\n");
+
+  fprintf (header_file, "struct bifdata\n");
+  fprintf (header_file, "{\n");
+  fprintf (header_file, "  const char *bifname;\n");
+  fprintf (header_file, "  tree fntype;\n");
+  fprintf (header_file, "  insn_code icode;\n");
+  fprintf (header_file, "  int  bifattrs;\n");
+  fprintf (header_file, "  int  restr_opnd;\n");
+  fprintf (header_file, "  restriction restr;\n");
+  fprintf (header_file, "  int  restr_val1;\n");
+  fprintf (header_file, "  int  restr_val2;\n");
+  fprintf (header_file, "};\n\n");
+
+  fprintf (header_file, "#define bif_const_bit\t(0x001)\n");
+  fprintf (header_file, "#define bif_pure_bit\t(0x002)\n");
+  fprintf (header_file, "#define bif_round_bit\t(0x004)\n");
+  fprintf (header_file, "#define bif_init_bit\t(0x008)\n");
+  fprintf (header_file, "#define bif_set_bit\t(0x010)\n");
+  fprintf (header_file, "#define bif_ext_bit\t(0x020)\n");
+  fprintf (header_file, "#define bif_nosoft_bit\t(0x040)\n");
+  fprintf (header_file, "#define bif_ldv_bit\t(0x080)\n");
+  fprintf (header_file, "#define bif_stv_bit\t(0x100)\n");
+  fprintf (header_file, "#define bif_reve_bit\t(0x200)\n");
+  fprintf (header_file, "#define bif_abs_bit\t(0x400)\n");
+  fprintf (header_file, "#define bif_pred_bit\t(0x800)\n");
+  fprintf (header_file, "#define bif_htm_bit\t(0x0001000)\n");
+  fprintf (header_file, "\n");
+  fprintf (header_file,
+  "#define bif_is_const(x)\t\t((x).bifattrs & bif_const_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_pure(x)\t\t((x).bifattrs & bif_pure_bit)\n");
+  fprintf (header_file,
+  "#define bif_has_rounding(x)\t((x).bifattrs & bif_round_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_init(x)\t\t((x).bifattrs & bif_init_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_extract(x)\t((x).bifattrs & bif_ext_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_nosoft(x)\t((x).bifattrs & bif_nosoft_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_ldv(x)\t\t((x).bifattrs & bif_ldv_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_stv(x)\t\t((x).bifattrs & bif_stv_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_reve(x)\t\t((x).bifattrs & bif_reve_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_abs(x)\t\t((x).bifattrs & bif_abs_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_predicate(x)\t((x).bifattrs & bif_pred_bit)\n");
+  fprintf (header_file,
+  "#define bif_is_htm(x)\t\t((x).bifattrs & bif_htm_bit)\n");
+  fprintf (header_file, "\n")

[PATCH 09/14] Add parsing support for rs6000-overload.def.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (ovld_stanza): New struct.
(MAXOVLDSTANZAS): New defined constant.
(ovld_stanzas): New filescope variable.
(curr_ovld_stanza): Likewise.
(MAXOVLDS): New defined constant.
(ovlddata): New struct.
(ovlds): New filescope variable.
(curr_ovld): Likewise.
(parse_ovld_entry): New function.
(parse_ovld_stanza): New function.
(parse_ovld): Implement.
---
 gcc/config/rs6000/rs6000-genbif.c | 207 +-
 1 file changed, 206 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index e7ce777afbb..22b5b1df3b9 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -263,8 +263,30 @@ static bifdata bifs[MAXBIFS];
 static int num_bifs;
 static int curr_bif;
 
+/* Stanzas are groupings of built-in functions and overloads by some
+   common feature/attribute.  These definitions are for overload stanzas.  */
+struct ovld_stanza {
+  char *stanza_id;
+  char *extern_name;
+  char *intern_name;
+};
+
+#define MAXOVLDSTANZAS 256
+static ovld_stanza ovld_stanzas[MAXOVLDSTANZAS];
 static int num_ovld_stanzas;
+static int curr_ovld_stanza;
+
+#define MAXOVLDS 16384
+struct ovlddata {
+  int stanza;
+  prototype proto;
+  char *idname;
+  char *fndecl;
+};
+
+static ovlddata ovlds[MAXOVLDS];
 static int num_ovlds;
+static int curr_ovld;
 
 /* Exit codes for the shell.  */
 enum exit_codes {
@@ -1225,11 +1247,194 @@ parse_bif ()
   return result;
 }
 
+/* Parse one two-line entry in the overload file.  Return 0 for EOF, 1 for
+   success, 2 for end-of-stanza, and 6 for a parsing failure.  */
+static int
+parse_ovld_entry ()
+{
+  /* Check for end of stanza.  */
+  pos = 0;
+  consume_whitespace ();
+  if (linebuf[pos] == '[')
+return 2;
+
+  /* Allocate an entry in the overload table.  */
+  if (num_ovlds >= MAXOVLDS - 1)
+{
+  (*diag) ("too many overloads.\n");
+  return 6;
+}
+
+  curr_ovld = num_ovlds++;
+  ovlds[curr_ovld].stanza = curr_ovld_stanza;
+
+  if (!parse_prototype ([curr_ovld].proto))
+return 6;
+
+  /* Now process line 2, which just contains the builtin id.  */
+  if (!advance_line (ovld_file))
+{
+  (*diag) ("unexpected EOF.\n");
+  return 0;
+}
+
+  pos = 0;
+  consume_whitespace ();
+  int oldpos = pos;
+  char *id = match_identifier ();
+  ovlds[curr_ovld].idname = id;
+  if (!id)
+{
+  (*diag) ("missing overload id at column %d.\n", pos + 1);
+  return 6;
+}
+
+#ifdef DEBUG
+  (*diag) ("ID name is '%s'.\n", id);
+#endif
+
+  /* The builtin id has to match one from the bif file.  */
+  if (!rbt_find (_rbt, id))
+{
+  (*diag) ("builtin ID '%s' not found in bif file.\n", id);
+  return 6;
+}
+
+  /* Save the ID in a lookup structure.  */
+  if (!rbt_insert (_rbt, id))
+{
+  (*diag) ("duplicate function ID '%s' at column %d.\n", id, oldpos + 1);
+  return 6;
+}
+
+  consume_whitespace ();
+  if (linebuf[pos] != '\n')
+{
+  (*diag) ("garbage at end of line at column %d.\n", pos + 1);
+  return 6;
+}
+  return 1;
+}
+
+/* Parse one stanza of the input overload file.  linebuf already contains the
+   first line to parse.  Return 1 for success, 0 for EOF, 6 for failure.  */
+static int
+parse_ovld_stanza ()
+{
+  /* Parse the stanza header.  */
+  pos = 0;
+  consume_whitespace ();
+
+  if (linebuf[pos] != '[')
+{
+  (*diag) ("ill-formed stanza header at column %d.\n", pos + 1);
+  return 6;
+}
+  safe_inc_pos ();
+
+  char *stanza_name = match_identifier ();
+  if (!stanza_name)
+{
+  (*diag) ("no identifier found in stanza header.\n");
+  return 6;
+}
+
+  /* Add the identifier to a table and set the number to be recorded
+ with subsequent overload entries.  */
+  if (num_ovld_stanzas >= MAXOVLDSTANZAS)
+{
+  (*diag) ("too many stanza headers.\n");
+  return 6;
+}
+
+  curr_ovld_stanza = num_ovld_stanzas++;
+  ovld_stanza *stanza = _stanzas[curr_ovld_stanza];
+  stanza->stanza_id = stanza_name;
+
+  consume_whitespace ();
+  if (linebuf[pos] != ',')
+{
+  (*diag) ("missing comma at column %d.\n", pos + 1);
+  return 6;
+}
+  safe_inc_pos ();
+
+  consume_whitespace ();
+  stanza->extern_name = match_identifier ();
+  if (!stanza->extern_name)
+{
+  (*diag) ("missing external name at column %d.\n", pos + 1);
+  return 6;
+}
+
+  consume_whitespace ();
+  if (linebuf[pos] != ',')
+{
+  (*diag) ("missing comma at column %d.\n", pos + 1);
+  return 6;
+}
+  safe_inc_pos ();
+
+  consume_whitespace ();
+  stanza->intern_name = match_identifier ();
+  if (!stanza->intern_name)
+{
+  (*diag) ("missin

[PATCH 06/14] Red-black tree implementation for balanced tree search.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rbtree.c: New file.
* config/rs6000/rbtree.h: New file.
---
 gcc/config/rs6000/rbtree.c | 233 +
 gcc/config/rs6000/rbtree.h |  51 
 2 files changed, 284 insertions(+)
 create mode 100644 gcc/config/rs6000/rbtree.c
 create mode 100644 gcc/config/rs6000/rbtree.h

diff --git a/gcc/config/rs6000/rbtree.c b/gcc/config/rs6000/rbtree.c
new file mode 100644
index 000..f6a8cdefaae
--- /dev/null
+++ b/gcc/config/rs6000/rbtree.c
@@ -0,0 +1,233 @@
+/* Partial red-black tree implementation for rs6000-genbif.c.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Contributed by Bill Schmidt, IBM 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include 
+#include 
+#include 
+#include 
+#include "rbtree.h"
+
+/* Create a new node to be inserted into the red-black tree.  An inserted
+   node starts out red.  */
+static struct rbt_string_node *
+rbt_create_node (struct rbt_strings *t, char *str)
+{
+  struct rbt_string_node *nodeptr
+= (struct rbt_string_node *) malloc (sizeof (struct rbt_string_node));
+  nodeptr->str = str;
+  nodeptr->left = t->rbt_nil;
+  nodeptr->right = t->rbt_nil;
+  nodeptr->par = NULL;
+  nodeptr->color = RBT_RED;
+  return nodeptr;
+}
+
+/* Perform a left-rotate operation on NODE in the red-black tree.  */
+static void
+rbt_left_rotate (struct rbt_strings *t, struct rbt_string_node *node)
+{
+  struct rbt_string_node *right = node->right;
+  assert (right);
+
+  /* Turn RIGHT's left subtree into NODE's right subtree.  */
+  node->right = right->left;
+  if (right->left != t->rbt_nil)
+right->left->par = node;
+
+  /* Link NODE's parent to RIGHT.  */
+  right->par = node->par;
+
+  if (node->par == t->rbt_nil)
+t->rbt_root = right;
+  else if (node == node->par->left)
+node->par->left = right;
+  else
+node->par->right = right;
+
+  /* Put NODE on RIGHT's left.  */
+  right->left = node;
+  node->par = right;
+}
+
+/* Perform a right-rotate operation on NODE in the red-black tree.  */
+static void
+rbt_right_rotate (struct rbt_strings *t, struct rbt_string_node *node)
+{
+  struct rbt_string_node *left = node->left;
+  assert (left);
+
+  /* Turn LEFT's right subtree into NODE's left subtree.  */
+  node->left = left->right;
+  if (left->right != t->rbt_nil)
+left->right->par = node;
+
+  /* Link NODE's parent to LEFT.  */
+  left->par = node->par;
+
+  if (node->par == t->rbt_nil)
+t->rbt_root = left;
+  else if (node == node->par->right)
+node->par->right = left;
+  else
+node->par->left = left;
+
+  /* Put NODE on LEFT's right.  */
+  left->right = node;
+  node->par = left;
+}
+
+/* Insert STR into the tree, returning 1 for success and 0 if STR already
+   appears in the tree.  */
+int
+rbt_insert (struct rbt_strings *t, char *str)
+{
+  struct rbt_string_node *curr = t->rbt_root;
+  struct rbt_string_node *trail = t->rbt_nil;
+
+  while (curr != t->rbt_nil)
+{
+  trail = curr;
+  int cmp = strcmp (str, curr->str);
+  if (cmp < 0)
+   curr = curr->left;
+  else if (cmp > 0)
+   curr = curr->right;
+  else
+   return 0;
+}
+
+  struct rbt_string_node *fresh = rbt_create_node (t, str);
+  fresh->par = trail;
+
+  if (trail == t->rbt_nil)
+t->rbt_root = fresh;
+  else if (strcmp (fresh->str, trail->str) < 0)
+trail->left = fresh;
+  else
+trail->right = fresh;
+
+  fresh->left = t->rbt_nil;
+  fresh->right = t->rbt_nil;
+
+  /* FRESH has now been inserted as a red leaf.  If we have invalidated
+ one of the following preconditions, we must fix things up:
+  (a) If a node is red, both of its children are black.
+  (b) The root must be black.
+ Note that only (a) or (b) applies at any given time during the
+ process.  This algorithm works up the tree from NEW looking
+ for a red child with a red parent, and cleaning that up.  If the
+ root ends up red, it gets turned black at the end.  */
+  curr = fresh;
+  while (curr->par->color == RBT_RED)
+if (curr->par == curr->par->par->left)
+  {
+   struct rbt_string_node *unc

[PATCH 08/14] Add support for parsing rs6000-bif.def.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (MAXBIFSTANZAS): New defined
constant.
(bif_stanzas): New filescope variable.
(curr_bif_stanza): Likewise.
(fnkinds): New enum.
(typelist): New struct.
(attrinfo): New struct.
(prototype): New struct.
(MAXBIFS): New defined constant.
(bifdata): New struct.
(bifs): New filescope variable.
(curr_bif): Likewise.
(parse_bif_args): New function.
(parse_bif_attrs): New function.
(parse_prototype): New function.
(parse_bif_entry): New function.
(parse_bif_stanza): New function.
(parse_bif): Implement.
---
 gcc/config/rs6000/rs6000-genbif.c | 473 +-
 1 file changed, 472 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index 38401224dce..e7ce777afbb 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -156,7 +156,23 @@ enum void_status {
   VOID_OK
 };
 
+/* Stanzas are groupings of built-in functions and overloads by some
+   common feature/attribute.  These definitions are for built-in function
+   stanzas.  */
+#define MAXBIFSTANZAS 256
+static char *bif_stanzas[MAXBIFSTANZAS];
 static int num_bif_stanzas;
+static int curr_bif_stanza;
+
+/* Function modifiers provide special handling for const, pure, and math
+   functions.  These are mutually exclusive, and therefore kept separate
+   from other bif attributes.  */
+enum fnkinds {
+  FNK_NONE,
+  FNK_CONST,
+  FNK_PURE,
+  FNK_MATH
+};
 
 /* Legal base types for an argument or return type.  */
 enum basetype {
@@ -199,7 +215,54 @@ struct typeinfo {
   int val2;
 };
 
+/* A list of argument types.  */
+struct typelist {
+  typeinfo info;
+  typelist *next;
+};
+
+/* Attributes of a builtin function.  */
+struct attrinfo {
+  char isinit;
+  char isset;
+  char isext;
+  char isnosoft;
+  char isldv;
+  char isstv;
+  char isreve;
+  char isabs;
+  char ispred;
+  char ishtm;
+};
+
+/* Fields associated with a function prototype (bif or overload).  */
+struct prototype {
+  typeinfo rettype;
+  char *bifname;
+  int nargs;
+  typelist *args;
+  int restr_opnd;
+  restriction restr;
+  int restr_val1;
+  int restr_val2;
+};
+
+/* Data associated with a builtin function, and a table of such data.  */
+#define MAXBIFS 16384
+struct bifdata {
+  int stanza;
+  fnkinds kind;
+  prototype proto;
+  char *idname;
+  char *patname;
+  attrinfo attrs;
+  char *fndecl;
+};
+
+static bifdata bifs[MAXBIFS];
 static int num_bifs;
+static int curr_bif;
+
 static int num_ovld_stanzas;
 static int num_ovlds;
 
@@ -747,11 +810,419 @@ match_type (typeinfo *typedata, int voidok)
   return match_basetype (typedata);
 }
 
+/* Parse the argument list, returning 1 if success or 0 if any
+   malformation is found.  */
+static int
+parse_bif_args (prototype *protoptr)
+{
+  typelist **argptr = >args;
+  int *nargs = >nargs;
+  int *restr_opnd = >restr_opnd;
+  restriction *restr = >restr;
+  int *val1 = >restr_val1;
+  int *val2 = >restr_val2;
+
+  int success;
+  *nargs = 0;
+
+  /* Start the argument list.  */
+  consume_whitespace ();
+  if (linebuf[pos] != '(')
+{
+  (*diag) ("missing '(' at column %d.\n", pos + 1);
+  return 0;
+}
+  safe_inc_pos ();
+
+  do {
+consume_whitespace ();
+int oldpos = pos;
+typelist *argentry = (typelist *) malloc (sizeof (typelist));
+memset (argentry, 0, sizeof (*argentry));
+typeinfo *argtype = >info;
+success = match_type (argtype, VOID_NOTOK);
+if (success)
+  {
+   if (argtype->restr)
+ {
+   if (*restr_opnd)
+ {
+   (*diag) ("More than one restricted operand\n");
+   return 0;
+ }
+   *restr_opnd = *nargs;
+   *restr = argtype->restr;
+   *val1 = argtype->val1;
+   *val2 = argtype->val2;
+ }
+   (*nargs)++;
+   *argptr = argentry;
+   argptr = >next;
+   consume_whitespace ();
+   if (linebuf[pos] == ',')
+ safe_inc_pos ();
+   else if (linebuf[pos] != ')')
+ {
+   (*diag) ("arg not followed by ',' or ')' at column %d.\n",
+pos + 1);
+   return 0;
+ }
+
+#ifdef DEBUG
+   (*diag) ("argument type: isvoid = %d, isconst = %d, isvector = %d, \
+issigned = %d, isunsigned = %d, isbool = %d, ispixel = %d, ispointer = %d, \
+base = %d, restr = %d, val1 = %d, val2 = %d, pos = %d.\n",
+argtype->isvoid, argtype->isconst, argtype->isvector,
+argtype->issigned, argtype->isunsigned, argtype->isbool,
+argtype->ispixel, argtype->ispointer, argtype->base,
+argtype->restr, argtype->val1, argtype->val2, pos

[PATCH 05/14] Add support functions for matching types.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (void_status): New enum.
(basetype): Likewise.
(restriction): Likewise.
(typeinfo): New struct.
(match_basetype): New function.
(match_const_restriction): New function.
(match_type): New function.
---
 gcc/config/rs6000/rs6000-genbif.c | 453 ++
 1 file changed, 453 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index 197059cc2d2..7c1082fbe8f 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -149,6 +149,53 @@ static char linebuf[LINELEN];
 static int line;
 static int pos;
 
+/* Used to determine whether a type can be void (only return types).  */
+enum void_status {
+  VOID_NOTOK,
+  VOID_OK
+};
+
+/* Legal base types for an argument or return type.  */
+enum basetype {
+  BT_CHAR,
+  BT_SHORT,
+  BT_INT,
+  BT_LONGLONG,
+  BT_FLOAT,
+  BT_DOUBLE,
+  BT_INT128,
+  BT_FLOAT128
+};
+
+/* Ways in which a const int value can be restricted.  RES_BITS indicates
+   that the integer is restricted to val1 bits, interpreted as signed or
+   unsigned depending on whether the type is signed or unsigned.  RES_RANGE
+   indicates that the integer is restricted to values between val1 and val2,
+   inclusive.  RES_VALUES indicates that the integer must have one of the
+   values val1 or val2.  */
+enum restriction {
+  RES_NONE,
+  RES_BITS,
+  RES_RANGE,
+  RES_VALUES
+};
+
+/* Type modifiers for an argument or return type.  */
+struct typeinfo {
+  char isvoid;
+  char isconst;
+  char isvector;
+  char issigned;
+  char isunsigned;
+  char isbool;
+  char ispixel;
+  char ispointer;
+  basetype base;
+  restriction restr;
+  int val1;
+  int val2;
+};
+
 /* Exit codes for the shell.  */
 enum exit_codes {
   EC_INTERR
@@ -268,3 +315,409 @@ match_integer ()
   sscanf (buf, "%d", );
   return x;
 }
+
+/* Match one of the allowable base types.  Consumes one token unless the
+   token is "long", which must be paired with a second "long".  Return 1
+   for success, 0 for failure.  */
+static int
+match_basetype (typeinfo *typedata)
+{
+  consume_whitespace ();
+  int oldpos = pos;
+  char *token = match_identifier ();
+  if (!token)
+{
+  (*diag) ("missing base type in return type at column %d\n", pos + 1);
+  return 0;
+}
+
+  if (!strcmp (token, "char"))
+typedata->base = BT_CHAR;
+  else if (!strcmp (token, "short"))
+typedata->base = BT_SHORT;
+  else if (!strcmp (token, "int"))
+typedata->base = BT_INT;
+  else if (!strcmp (token, "long"))
+{
+  consume_whitespace ();
+  char *mustbelong = match_identifier ();
+  if (!mustbelong || strcmp (mustbelong, "long"))
+   {
+ (*diag) ("incomplete 'long long' at column %d\n", oldpos + 1);
+ return 0;
+   }
+  typedata->base = BT_LONGLONG;
+}
+  else if (!strcmp (token, "float"))
+typedata->base = BT_FLOAT;
+  else if (!strcmp (token, "double"))
+typedata->base = BT_DOUBLE;
+  else if (!strcmp (token, "__int128"))
+typedata->base = BT_INT128;
+  else if (!strcmp (token, "_Float128"))
+typedata->base = BT_FLOAT128;
+  else
+{
+  (*diag) ("unrecognized base type at column %d\n", oldpos + 1);
+  return 0;
+}
+
+  return 1;
+}
+
+/* A const int argument may be restricted to certain values.  This is
+   indicated by one of the following occurring after the "int' token:
+
+restricts the constant to x bits, interpreted as signed or
+  unsigned according to the argument type
+  restricts the constant to the inclusive range [x,y]
+ {x,y} restricts the constant to one of two values, x or y.
+
+   Here x and y are integer tokens.  Return 1 for success, else 0.  */
+static int
+match_const_restriction (typeinfo *typedata)
+{
+  int oldpos = pos;
+  if (linebuf[pos] == '<')
+{
+  safe_inc_pos ();
+  oldpos = pos;
+  int x = match_integer ();
+  if (x == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  consume_whitespace ();
+  if (linebuf[pos] == '>')
+   {
+ typedata->restr = RES_BITS;
+ typedata->val1 = x;
+ safe_inc_pos ();
+ return 1;
+   }
+  else if (linebuf[pos] != ',')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+  oldpos = pos;
+  int y = match_integer ();
+  if (y == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  typedata->restr = RES_RANGE;
+  typedata->val1 = x;
+  typed

[PATCH 07/14] Add main function with stub functions for parsing and output.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (rbtree.h): New include.
(num_bif_stanzas): New filescope variable.
(num_bifs): Likewise.
(num_ovld_stanzas): Likewise.
(num_ovlds): Likewise.
(exit_codes): Add more enum values.
(bif_rbt): New filescope variable.
(ovld_rbt): Likewise.
(fntype_rbt): Likewise.
(parse_bif): New function stub.
(parse_ovld): Likewise.
(write_header_file): Likewise.
(write_init_file): Likewise.
(write_defines_file): Likewise.
(main): New function.
---
 gcc/config/rs6000/rs6000-genbif.c | 185 ++
 1 file changed, 185 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index 7c1082fbe8f..38401224dce 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -122,6 +122,7 @@ along with GCC; see the file COPYING3.  If not see
 #include 
 #include 
 #include 
+#include "rbtree.h"
 
 /* Used as a sentinel for range constraints on integer fields.  No field can
be 32 bits wide, so this is a safe sentinel value.  */
@@ -155,6 +156,8 @@ enum void_status {
   VOID_OK
 };
 
+static int num_bif_stanzas;
+
 /* Legal base types for an argument or return type.  */
 enum basetype {
   BT_CHAR,
@@ -196,11 +199,33 @@ struct typeinfo {
   int val2;
 };
 
+static int num_bifs;
+static int num_ovld_stanzas;
+static int num_ovlds;
+
 /* Exit codes for the shell.  */
 enum exit_codes {
+  EC_OK,
+  EC_BADARGS,
+  EC_NOBIF,
+  EC_NOOVLD,
+  EC_NOHDR,
+  EC_NOINIT,
+  EC_NODEFINES,
+  EC_PARSEBIF,
+  EC_PARSEOVLD,
+  EC_WRITEHDR,
+  EC_WRITEINIT,
+  EC_WRITEDEFINES,
   EC_INTERR
 };
 
+/* The red-black trees for built-in function identifiers, built-in
+   overload identifiers, and function type descriptors.  */
+static rbt_strings bif_rbt;
+static rbt_strings ovld_rbt;
+static rbt_strings fntype_rbt;
+
 /* Pointer to a diagnostic function.  */
 void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2)))
   = NULL;
@@ -721,3 +746,163 @@ match_type (typeinfo *typedata, int voidok)
   consume_whitespace ();
   return match_basetype (typedata);
 }
+
+/* Parse the built-in file.  Return 1 for success, 5 for a parsing failure.  */
+static int
+parse_bif ()
+{
+  return 1;
+}
+
+/* Parse the overload file.  Return 1 for success, 6 for a parsing error.  */
+static int
+parse_ovld ()
+{
+  return 1;
+}
+
+/* Write everything to the header file (rs6000-bif.h).  */
+static int
+write_header_file ()
+{
+  return 1;
+}
+
+/* Write everything to the initialization file (rs6000-bif.c).  */
+static int
+write_init_file ()
+{
+  return 1;
+}
+
+/* Write everything to the include file (rs6000-vecdefines.h).  */
+static int
+write_defines_file ()
+{
+  return 1;
+}
+
+/* Main program to convert flat files into built-in initialization code.  */
+int
+main (int argc, const char **argv)
+{
+  if (argc != 6)
+{
+  fprintf (stderr,
+  "Five arguments required: two input file and three output"
+  "files.\n");
+  exit (EC_BADARGS);
+}
+
+  pgm_path = argv[0];
+  bif_path = argv[1];
+  ovld_path = argv[2];
+  header_path = argv[3];
+  init_path = argv[4];
+  defines_path = argv[5];
+
+  bif_file = fopen (bif_path, "r");
+  if (!bif_file)
+{
+  fprintf (stderr, "Cannot find input built-in file '%s'.\n", bif_path);
+  exit (EC_NOBIF);
+}
+  ovld_file = fopen (ovld_path, "r");
+  if (!ovld_file)
+{
+  fprintf (stderr, "Cannot find input overload file '%s'.\n", ovld_path);
+  exit (EC_NOOVLD);
+}
+  header_file = fopen (header_path, "w");
+  if (!header_file)
+{
+  fprintf (stderr, "Cannot open header file '%s' for output.\n",
+  header_path);
+  exit (EC_NOHDR);
+}
+  init_file = fopen (init_path, "w");
+  if (!init_file)
+{
+  fprintf (stderr, "Cannot open init file '%s' for output.\n", init_path);
+  exit (EC_NOINIT);
+}
+  defines_file = fopen (defines_path, "w");
+  if (!defines_file)
+{
+  fprintf (stderr, "Cannot open defines file '%s' for output.\n",
+  defines_path);
+  exit (EC_NODEFINES);
+}
+
+  /* Initialize the balanced trees containing built-in function ids,
+ overload function ids, and function type declaration ids.  */
+  bif_rbt.rbt_nil = (rbt_string_node *) malloc (sizeof (rbt_string_node));
+  bif_rbt.rbt_nil->color = RBT_BLACK;
+  bif_rbt.rbt_root = bif_rbt.rbt_nil;
+
+  ovld_rbt.rbt_nil = (rbt_string_node *) malloc (sizeof (rbt_string_node));
+  ovld_rbt.rbt_nil->color = RBT_BLACK;
+  ovld_rbt.rbt_root = ovld_rbt.rbt_nil;
+
+  fntype_rbt.rbt_nil = (rbt_string_node *) malloc (sizeof (rbt_string_node));
+  fntype_rbt.rbt_nil->color = RBT_BLACK;
+  fntype_rbt.rbt_root = fntyp

[PATCH 04/14] Support functions to parse whitespace, lines, identifiers, integers.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (MININT): New defined constant.
(exit_codes): New enum.
(consume_whitespace): New function.
(advance_line): New function.
(safe_inc_pos): New function.
(match_identifier): New function.
(match_integer): New function.
---
 gcc/config/rs6000/rs6000-genbif.c | 99 +++
 1 file changed, 99 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index 3fb13cb11d6..197059cc2d2 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -123,6 +123,10 @@ along with GCC; see the file COPYING3.  If not see
 #include 
 #include 
 
+/* Used as a sentinel for range constraints on integer fields.  No field can
+   be 32 bits wide, so this is a safe sentinel value.  */
+#define MININT INT32_MIN
+
 /* Input and output file descriptors and pathnames.  */
 static FILE *bif_file;
 static FILE *ovld_file;
@@ -145,6 +149,11 @@ static char linebuf[LINELEN];
 static int line;
 static int pos;
 
+/* Exit codes for the shell.  */
+enum exit_codes {
+  EC_INTERR
+};
+
 /* Pointer to a diagnostic function.  */
 void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2)))
   = NULL;
@@ -169,3 +178,93 @@ ovld_diag (const char * fmt, ...)
   vfprintf (stderr, fmt, args);
   va_end (args);
 }
+
+/* Pass over unprintable characters and whitespace (other than a newline,
+   which terminates the scan).  */
+static void
+consume_whitespace ()
+{
+  while (pos < LINELEN && isspace(linebuf[pos]) && linebuf[pos] != '\n')
+pos++;
+  return;
+}
+
+/* Get the next nonblank line, returning 0 on EOF, 1 otherwise.  */
+static int
+advance_line (FILE *file)
+{
+  while (1)
+{
+  /* Read ahead one line and check for EOF.  */
+  if (!fgets (linebuf, sizeof(linebuf), file))
+   return 0;
+  line++;
+  pos = 0;
+  consume_whitespace ();
+  if (linebuf[pos] != '\n')
+   return 1;
+}
+}
+
+static inline void
+safe_inc_pos ()
+{
+  if (pos++ >= LINELEN)
+{
+  (*diag) ("line length overrun.\n");
+  exit (EC_INTERR);
+}
+}
+
+/* Match an identifier, returning NULL on failure, else a pointer to a
+   buffer containing the identifier.  */
+static char *
+match_identifier ()
+{
+  int lastpos = pos - 1;
+  while (isalnum (linebuf[lastpos + 1]) || linebuf[lastpos + 1] == '_')
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return 0;
+
+  char *buf = (char *) malloc (lastpos - pos + 2);
+  memcpy (buf, [pos], lastpos - pos + 1);
+  buf[lastpos - pos + 1] = '\0';
+
+  pos = lastpos + 1;
+  return buf;
+}
+
+/* Match an integer and return its value, or MININT on failure.  */
+static int
+match_integer ()
+{
+  int startpos = pos;
+  if (linebuf[pos] == '-')
+safe_inc_pos ();
+
+  int lastpos = pos - 1;
+  while (isdigit (linebuf[lastpos + 1]))
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun in match_integer.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return MININT;
+
+  pos = lastpos + 1;
+  char *buf = (char *) malloc (lastpos - startpos + 2);
+  memcpy (buf, [startpos], lastpos - startpos + 1);
+  buf[lastpos - startpos + 1] = '\0';
+
+  int x;
+  sscanf (buf, "%d", );
+  return x;
+}
-- 
2.17.1



[PATCH 02/14] Add stubs for input files. These will grow much larger.

2020-02-03 Thread Bill Schmidt
This patch adds a subset of the builtin and overload descriptions.
I've also started annotating the old-style descriptions in rs6000-c.c
where I'm deliberately not planning to support new versions of them.
We may have to have some discussion around these at some point, but
this helps me track this as I move through the transition.

2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-bif.def: New file.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Annotate some deprecated and bogus entries.
* config/rs6000/rs6000-overload.def: New file.
---
 gcc/config/rs6000/rs6000-bif.def  | 187 ++
 gcc/config/rs6000/rs6000-call.c   |  35 +
 gcc/config/rs6000/rs6000-overload.def |   5 +
 3 files changed, 227 insertions(+)
 create mode 100644 gcc/config/rs6000/rs6000-bif.def
 create mode 100644 gcc/config/rs6000/rs6000-overload.def

diff --git a/gcc/config/rs6000/rs6000-bif.def b/gcc/config/rs6000/rs6000-bif.def
new file mode 100644
index 000..85196400993
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-bif.def
@@ -0,0 +1,187 @@
+[TARGET_ALTIVEC]
+  math vf __builtin_altivec_vmaddfp (vf, vf, vf);
+VMADDFP fmav4sf4 {}
+  vss __builtin_altivec_vmhaddshs (vss, vss, vss);
+VMHADDSHS altivec_vmhaddshs {}
+  vss __builtin_altivec_vmhraddshs (vss, vss, vss);
+VMHRADDSHS altivec_vmhraddshs {}
+  const vss __builtin_altivec_vmladduhm (vss, vss, vss);
+VMLADDUHM fmav8hi4 {}
+  const vui __builtin_altivec_vmsumubm (vuc, vuc, vui);
+VMSUMUBM altivec_vmsumubm {}
+  const vsi __builtin_altivec_vmsummbm (vsc, vuc, vsi);
+VMSUMMBM altivec_vmsummbm {}
+  const vui __builtin_altivec_vmsumuhm (vus, vus, vui);
+VMSUMUHM altivec_vmsumuhm {}
+  const vsi __builtin_altivec_vmsumshm (vss, vss, vsi);
+VMSUMSHM altivec_vmsumshm {}
+  vui __builtin_altivec_vmsumuhs (vus, vus, vui);
+VMSUMUHS altivec_vmsumuhs {}
+  vsi __builtin_altivec_vmsumshs (vss, vss, vsi);
+VMSUMSHS altivec_vmsumshs {}
+  math vf __builtin_altivec_vnmsubfp (vf, vf, vf);
+VNMSUBFP nfmsv4sf4 {}
+  const vsq __builtin_altivec_vperm_1ti (vsq, vsq, vuc);
+VPERM_1TI altivec_vperm_v1ti {}
+  const vf __builtin_altivec_vperm_4sf (vf, vf, vuc);
+VPERM_4SF altivec_vperm_v4sf {}
+  const vsi __builtin_altivec_vperm_4si (vsi, vsi, vuc);
+VPERM_4SI altivec_vperm_v4si {}
+  const vss __builtin_altivec_vperm_8hi (vss, vss, vuc);
+VPERM_8HI altivec_vperm_v8hi {}
+  const vsc __builtin_altivec_vperm_16qi (vsc, vsc, vuc);
+VPERM_16QI altivec_vperm_v16qi {}
+  const vuq __builtin_altivec_vperm_1ti_uns (vuq, vuq, vuc);
+VPERM_1TI_UNS altivec_vperm_v1ti_uns {}
+  const vui __builtin_altivec_vperm_4si_uns (vui, vui, vuc);
+VPERM_4SI_UNS altivec_vperm_v4si_uns {}
+  const vus __builtin_altivec_vperm_8hi_uns (vus, vus, vuc);
+VPERM_8HI_UNS altivec_vperm_v8hi_uns {}
+  const vuc __builtin_altivec_vperm_16qi_uns (vuc, vuc, vuc);
+VPERM_16QI_UNS altivec_vperm_v16qi_uns {}
+  const vf __builtin_altivec_vsel_4sf (vf, vf, vbi);
+VSEL_4SF_B vector_select_v4sf {}
+  const vf __builtin_altivec_vsel_4sf (vf, vf, vui);
+VSEL_4SF_U vector_select_v4sf {}
+  const vsi __builtin_altivec_vsel_4si (vsi, vsi, vbi);
+VSEL_4SI_B vector_select_v4si {}
+  const vsi __builtin_altivec_vsel_4si (vsi, vsi, vui);
+VSEL_4SI_U vector_select_v4si {}
+  const vui __builtin_altivec_vsel_4si (vui, vui, vbi);
+VSEL_4SI_UB vector_select_v4si {}
+  const vui __builtin_altivec_vsel_4si (vui, vui, vui);
+VSEL_4SI_UU vector_select_v4si {}
+  const vbi __builtin_altivec_vsel_4si (vbi, vbi, vbi);
+VSEL_4SI_BB vector_select_v4si {}
+  const vbi __builtin_altivec_vsel_4si (vbi, vbi, vui);
+VSEL_4SI_BU vector_select_v4si {}
+  const vss __builtin_altivec_vsel_8hi (vss, vss, vbs);
+VSEL_8HI_B vector_select_v8hi {}
+  const vss __builtin_altivec_vsel_8hi (vss, vss, vus);
+VSEL_8HI_U vector_select_v8hi {}
+  const vus __builtin_altivec_vsel_8hi (vus, vus, vbs);
+VSEL_8HI_UB vector_select_v8hi {}
+  const vus __builtin_altivec_vsel_8hi (vus, vus, vus);
+VSEL_8HI_UU vector_select_v8hi {}
+  const vbs __builtin_altivec_vsel_8hi (vbs, vbs, vbs);
+VSEL_8HI_BB vector_select_v8hi {}
+  const vbs __builtin_altivec_vsel_8hi (vbs, vbs, vus);
+VSEL_8HI_BU vector_select_v8hi {}
+  const vsc __builtin_altivec_vsel_16qi (vsc, vsc, vbc);
+VSEL_16QI_B vector_select_v16qi {}
+  const vsc __builtin_altivec_vsel_16qi (vsc, vsc, vuc);
+VSEL_16QI_U vector_select_v16qi {}
+  const vuc __builtin_altivec_vsel_16qi (vuc, vuc, vbc);
+VSEL_16QI_UB vector_select_v16qi {}
+  const vuc __builtin_altivec_vsel_16qi (vuc, vuc, vuc);
+VSEL_16QI_UU vector_select_v16qi {}
+  const vbc __builtin_altivec_vsel_16qi (vbc, vbc, vbc);
+VSEL_16QI_BB vector_select_v16qi {}
+  const vbc __builtin_altivec_vsel_16qi (vbc, vbc, vuc);
+VSEL_16QI_BU vector_select_v16qi {}
+  const vsq __builtin_altivec_vsel_1ti (vsq, vsq, vuq);
+VSEL_1TI

[PATCH 01/14] Initial create of rs6000-genbif.c.

2020-02-03 Thread Bill Schmidt
Includes header documentation and initial set of include directives.

2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c: New file.
---
 gcc/config/rs6000/rs6000-genbif.c | 124 ++
 1 file changed, 124 insertions(+)
 create mode 100644 gcc/config/rs6000/rs6000-genbif.c

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
new file mode 100644
index 000..a53209ed040
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -0,0 +1,124 @@
+/* Generate built-in function initialization and recognition for Power.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Contributed by Bill Schmidt, IBM 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This program generates built-in function initialization and
+   recognition code for Power targets, based on text files that
+   describe the built-in functions and vector overloads:
+
+ rs6000-bif.def   Table of built-in functions
+ rs6000-overload.def  Table of overload functions
+
+   Both files group similar functions together in "stanzas," as
+   described below.
+
+   Each stanza in the built-in function file starts with a line
+   identifying the target mask for which the group of functions is
+   permitted, with the mask in square brackets.  This is the only
+   information allowed on the stanza header line, other than
+   whitespace.  Following the stanza header are two lines for each
+   function: the prototype line and the attributes line.  The
+   prototype line has this format, where the square brackets
+   indicate optional information and angle brackets indicate
+   required information:
+
+ [kind]   ();
+
+   Here [kind] can be one of "const", "pure", or "math";
+is a legal type for a built-in function result;
+is the name by which the function can be called;
+   and  is a comma-separated list of legal types
+   for built-in function arguments.  The argument list may be
+   empty, but the parentheses and semicolon are required.
+
+   The attributes line looks like this:
+
+   {}
+
+   Here  is a unique internal identifier for the built-in
+   function that will be used as part of an enumeration of all
+   built-in functions;  is the define_expand or
+   define_insn that will be invoked when the call is expanded;
+   and  is a comma-separated list of special
+   conditions that apply to the built-in function.  The attribute
+   list may be empty, but the braces are required.
+
+   Attributes are strings, such as these:
+
+ initProcess as a vec_init function
+ set Process as a vec_set function
+ ext Process as a vec_extract function
+ nosoft  Not valid with -msoft-float
+ ldv Needs special handling for vec_ld semantics
+ stv Needs special handling for vec_st semantics
+ reveNeeds special handling for element reversal
+ abs Needs special handling for absolute value
+ predNeeds special handling for comparison predicates
+ htm Needs special handling for transactional memory
+
+   An example stanza might look like this:
+
+[TARGET_ALTIVEC]
+  const vector signed char __builtin_altivec_abs_v16qi (vector signed char);
+ABS_V16QI absv16qi2 {abs}
+  const vector signed short __builtin_altivec_abs_v8hi (vector signed short);
+ABS_V8HI absv8hi2 {abs}
+
+   Note the use of indentation, which is recommended but not required.
+
+   The overload file has more complex stanza headers.  Here the stanza
+   represents all functions with the same overloaded function name:
+
+ [, , ]
+
+   Here the square brackets are part of the syntax,  is a
+   unique internal identifier for the overload that will be used as part
+   of an enumeration of all overloaded functions;  is the
+   name that will appear as a #define in altivec.h; and 
+   is the name that is overloaded in the back end.
+
+   Each function entry again has two lines.  The first line is again a
+   prototype line (this time without [kind]):
+
+   ();
+
+   The second line contains only one token: the  that this
+   particular instance of the overloaded function maps to.  It must
+   match a token that appears in the bif file.
+
+   An example stanza might look like this:
+
+[VEC_ABS, vec_abs, __builtin_vec_abs]
+  vector signed char __builtin_vec_abs (vector signe

[PATCH 00/14] rs6000: Begin replacing built-in support

2020-02-03 Thread Bill Schmidt
The current built-in support in the rs6000 back end requires at least
a master's degree in spelunking to comprehend.  It's full of cruft,
redundancy, and unused bits of code, and long overdue for a
replacement.  This is the first part of my project to do that.

My intent is to make adding new built-in functions as simple as adding
a few lines to a couple of files, and automatically generating as much
of the initialization, overload resolution, and expansion logic as
possible.  This patch series establishes the format of the input files
and creates a new program (rs6000-genbif) to:

 * Parse the input files into an internal representation;
 * Generate a file of #defines (rs6000-vecdefines.h) for eventual
   inclusion into altivec.h; and
 * Generate an initialization file to create and initialize tables of
   built-in functions and overloads.

Note that none of the code in this patch set affects GCC's operation
at all, with the exception of patch #14.  Patch 14 causes the program
rs6000-genbif to be built and executed, producing the output files,
and linking rs6000-bif.o into the executable.  However, none of the
code in rs6000-bif.o is called, so the only effect is to make the gcc
executable larger.

I'd like to consider at least patches 1-13 as stage 4 material for the
current release.  I'd prefer to also include patch 14 for convenience,
but I understand if that's not deemed acceptable.

I've attempted to break this up into logical pieces for easy
consumption, but some of the patches may still be a bit large.  Please
let me know if you'd like me to break any of them up.

Thanks in advance for the review!

Bill Schmidt (14):
  Initial create of rs6000-genbif.c.
  Add stubs for input files.  These will grow much larger.
  Add file support and functions for diagnostic support.
  Support functions to parse whitespace, lines, identifiers, integers.
  Add support functions for matching types.
  Red-black tree implementation for balanced tree search.
  Add main function with stub functions for parsing and output.
  Add support for parsing rs6000-bif.def.
  Add parsing support for rs6000-overload.def.
  Build function type identifiers and store them.
  Write #defines to rs6000-vecdefines.h.
  Write code to rs6000-bif.h.
  Write code to rs6000-bif.c.
  Incorporate new code into the build machinery.

 gcc/config.gcc|3 +-
 gcc/config/rs6000/rbtree.c|  233 +++
 gcc/config/rs6000/rbtree.h|   51 +
 gcc/config/rs6000/rs6000-bif.def  |  187 ++
 gcc/config/rs6000/rs6000-call.c   |   35 +
 gcc/config/rs6000/rs6000-genbif.c | 2295 +
 gcc/config/rs6000/rs6000-overload.def |5 +
 gcc/config/rs6000/t-rs6000|   22 +
 8 files changed, 2830 insertions(+), 1 deletion(-)
 create mode 100644 gcc/config/rs6000/rbtree.c
 create mode 100644 gcc/config/rs6000/rbtree.h
 create mode 100644 gcc/config/rs6000/rs6000-bif.def
 create mode 100644 gcc/config/rs6000/rs6000-genbif.c
 create mode 100644 gcc/config/rs6000/rs6000-overload.def

-- 
2.17.1



[PATCH 03/14] Add file support and functions for diagnostic support.

2020-02-03 Thread Bill Schmidt
2020-02-03  Bill Schmidt  

* config/rs6000/rs6000-genbif.c (bif_file): New filescope
variable.
(ovld_file): Likewise.
(header_file): Likewise.
(init_file): Likewise.
(defines_file): Likewise.
(pgm_path): Likewise.
(bif_path): Likewise.
(ovld_path): Likewise.
(header_path): Likewise.
(init_path): Likewise.
(defines_path): Likewise.
(LINELEN): New defined constant.
(linebuf): New filescope variable.
(line): Likewise.
(pos): Likewise.
(diag): Likewise.
(bif_diag): New function.
(ovld_diag): New function.
---
 gcc/config/rs6000/rs6000-genbif.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-genbif.c 
b/gcc/config/rs6000/rs6000-genbif.c
index a53209ed040..3fb13cb11d6 100644
--- a/gcc/config/rs6000/rs6000-genbif.c
+++ b/gcc/config/rs6000/rs6000-genbif.c
@@ -122,3 +122,50 @@ along with GCC; see the file COPYING3.  If not see
 #include 
 #include 
 #include 
+
+/* Input and output file descriptors and pathnames.  */
+static FILE *bif_file;
+static FILE *ovld_file;
+static FILE *header_file;
+static FILE *init_file;
+static FILE *defines_file;
+
+static const char *pgm_path;
+static const char *bif_path;
+static const char *ovld_path;
+static const char *header_path;
+static const char *init_path;
+static const char *defines_path;
+
+/* Position information.  Note that "pos" is zero-indexed, but users
+   expect one-indexed column information, so representations of "pos"
+   as columns in diagnostic messages must be adjusted.  */
+#define LINELEN 1024
+static char linebuf[LINELEN];
+static int line;
+static int pos;
+
+/* Pointer to a diagnostic function.  */
+void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2)))
+  = NULL;
+
+/* Custom diagnostics.  */
+static void __attribute__ ((format (printf, 1, 2)))
+bif_diag (const char * fmt, ...)
+{
+  va_list args;
+  fprintf (stderr, "%s:%d: ", bif_path, line);
+  va_start (args, fmt);
+  vfprintf (stderr, fmt, args);
+  va_end (args);
+}
+
+static void __attribute__ ((format (printf, 1, 2)))
+ovld_diag (const char * fmt, ...)
+{
+  va_list args;
+  fprintf (stderr, "%s:%d: ", ovld_path, line);
+  va_start (args, fmt);
+  vfprintf (stderr, fmt, args);
+  va_end (args);
+}
-- 
2.17.1



Re: [rfc PATCH] rs6000: Updated constraint documentation

2020-01-31 Thread Bill Schmidt

On 1/31/20 9:42 AM, Segher Boessenkool wrote:

Hi Bill,

Thanks a lot for looking at this!  :-)

On Fri, Jan 31, 2020 at 08:49:21AM -0600, Bill Schmidt wrote:

+(define_register_constraint "wa"
"rs6000_constraints[RS6000_CONSTRAINT_wa]"
+  "A VSX register (VSR), @code{vs0}@dots{}@code{vs63}.  Either a @code{d}
+   or a @code{v} register.")

Not quite true, as the "d" register is only half of a VSX register.  It
may or may not be worth including a picture of register overlaps...

No, the "d" registers are the actual full registers, all 128 bits of it.
You often use them in a mode that uses only 64 bits, sure.



Perhaps that would be worth a few words when describing the "d" 
constraint, then.  This is not at all obvious to the casual user. Thanks!




I was planning to update this to

(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
   "A VSX register (VSR), @code{vs0}@dots{}@code{vs63}.  This is either an
FPR (@code{d}) or a VR (@code{v}).")

Does that improve it?



Yes, sure.



The numbering thing is also mentioned in the %x output modifier stuff.
There must be a better way to present this, but I don't see it yet.  Hrm.



I honestly thought that was pretty good as is.

Thanks again!
Bill




  (define_register_constraint "we"
  "rs6000_constraints[RS6000_CONSTRAINT_we]"
-  "VSX register if the -mpower9-vector -m64 options were used or
NO_REGS.")
+  "@internal VSX register if the -mpower9-vector -m64 options were used
+   or NO_REGS.")

Suggest changing "used or" to "used, else".

Or just "used."; this is internals documentation only, and all similar
constraints will ideally go away at some point (it just didn't fit in
easily with the "enabled" attribute yet; it probably should be just "p9"
for "isa" and test the TARGET_64BIT in the insn condition, something
like that.  Or maybe there shouldn't be separate handling for 64-bit
at all here).


  (define_register_constraint "wr"
  "rs6000_constraints[RS6000_CONSTRAINT_wr]"
-  "General purpose register if 64-bit instructions are enabled or
NO_REGS.")
+  "@internal General purpose register if 64-bit instructions are enabled
+   or NO_REGS.")

Similar here.

Yup.  I didn't change this, fwiw, just synched up md.texi and
constraints.md where they diverged.


  (define_memory_constraint "es"
-  "A ``stable'' memory operand; that is, one which does not include any
-automodification of the base register.  Unlike @samp{m}, this constraint
-can be used in @code{asm} statements that might access the operand
-several times, or that might not access it at all."
+  "@internal
+   A ``stable'' memory operand; that is, one which does not include any
+   automodification of the base register.  This used to be useful when
+   @code{m} allowed automodification of the base register, but as those

Trailing whitespace here.

Yeah, I don't know how I missed that, git tends to shout about it.
Fixed.


  @item wa
-Any VSX register if the @option{-mvsx} option was used or NO_REGS.
+A VSX register (VSR), @code{vs0}@dots{}@code{vs63}.  Either a @code{d} or
a @code{v}
+register.

Same concern as above.

It is literally the same text now (unless I messed up the c'n'p).


+@ifset INTERNALS
+@item h
+@code{vrsave}, @code{ctr}, or @code{lr}.
+@end ifset


I don't see vrsave elsewhere in either document (should have noted this
in constraints.md also).

There is no other constraint for vrsave.  constraints.md says

(define_register_constraint "h" "SPECIAL_REGS"
   "@internal @code{vrsave}, @code{ctr}, or @code{lr}.")

(Same text, as should be).  It ends up only in gccint.*, not in gcc.* .


  @item we
-VSX register if the @option{-mcpu=power9} and @option{-m64} options
-were used or NO_REGS.
+VSX register if the -mpower9-vector -m64 options were used or NO_REGS.

As above.  I won't call out the rest of these.

Since this is not new text, and it now only ends up in the internals
documentation, and a lot of it should go away in the short term anyway,
and importantly I don't know a good simple way to write what it does
anyway (because it *isn't* simple), I hoped I could just keep this for
now.

Hrm, I lost markup there, will fix.


+@item wZ
+Indexed or indirect memory operand, ignoring the bottom 4 bits.
+@end ifset

For consistency, "An indexed..." ?

Yes, thanks!


+@item Z
+A memory operand that is an indexed or indirect from a register.

"indexed or indirect access"?

And s/from a register// yeah.


Great improvements!

Thanks :-)

Somewhere it should say (in the gcc.* doc) that there are other
constraints and output modifiers as well, and some are even supported
for backwards compatibility, but here only the ones you should use are
mentioned.  Not sure where to do that.


Segher


Re: [rfc PATCH] rs6000: Updated constraint documentation

2020-01-31 Thread Bill Schmidt



On 1/30/20 6:17 PM, Segher Boessenkool wrote:

This is my current work-in-progress version.  There still are rough
edges, and not much is done for the output modifiers yet, but it should
be in much better shape wrt the user manual now.  The internals manual
also is a bit better I think.

md.texi is not automatically kept in synch with constraints.md (let
alone generated from it), so the two diverged.  I tried to correct
that, too.

Please let me know if you have any ideas how to improve it further, or
if I did something terribly wrong, or anything else.  Thanks,


Segher

---
  gcc/config/rs6000/constraints.md | 159 +++--
  gcc/doc/md.texi  | 188 +++
  2 files changed, 182 insertions(+), 165 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 398c894..bafc22a 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -21,192 +21,214 @@

  ;; Register constraints

-(define_register_constraint "f" "rs6000_constraints[RS6000_CONSTRAINT_f]"
-  "@internal")
-
-(define_register_constraint "d" "rs6000_constraints[RS6000_CONSTRAINT_d]"
-  "@internal")
+; Actually defined in common.md:
+; (define_register_constraint "r" "GENERAL_REGS"
+;   "A general purpose register (GPR), @code{r0}@dots{}@code{r31}.")

  (define_register_constraint "b" "BASE_REGS"
-  "@internal")
+  "A base register.  Like @code{r}, but @code{r0} is not allowed, so
+   @code{r1}@dots{}@code{r31}.")

-(define_register_constraint "h" "SPECIAL_REGS"
-  "@internal")
+(define_register_constraint "f" "rs6000_constraints[RS6000_CONSTRAINT_f]"
+  "A floating point register (FPR), @code{f0}@dots{}@code{f31}.")

-(define_register_constraint "c" "CTR_REGS"
-  "@internal")
-
-(define_register_constraint "l" "LINK_REGS"
-  "@internal")
+(define_register_constraint "d" "rs6000_constraints[RS6000_CONSTRAINT_d]"
+  "A floating point register.  This is the same as @code{f} nowadays;
+   historically @code{f} was for single-precision and @code{d} was for
+   double-precision floating point.")

  (define_register_constraint "v" "ALTIVEC_REGS"
-  "@internal")
+  "An Altivec vector register (VR), @code{v0}@dots{}@code{v31}.")
+
+(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
+  "A VSX register (VSR), @code{vs0}@dots{}@code{vs63}.  Either a @code{d}
+   or a @code{v} register.")



Not quite true, as the "d" register is only half of a VSX register.  It 
may or may not be worth including a picture of register overlaps...



+
+(define_register_constraint "h" "SPECIAL_REGS"
+  "@internal @code{vrsave}, @code{ctr}, or @code{lr}.")
+
+(define_register_constraint "c" "CTR_REGS"
+  "The count register, @code{ctr}.")
+
+(define_register_constraint "l" "LINK_REGS"
+  "The link register, @code{lr}.")

  (define_register_constraint "x" "CR0_REGS"
-  "@internal")
+  "Condition register field 0, @code{cr0}.")

  (define_register_constraint "y" "CR_REGS"
-  "@internal")
+  "Any condition register field, @code{cr0}@dots{}@code{cr7}.")

  (define_register_constraint "z" "CA_REGS"
-  "@internal")
-
-;; Use w as a prefix to add VSX modes
-;; any VSX register
-(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
-  "Any VSX register if the -mvsx option was used or NO_REGS.")
+  "@internal The carry bit, @code{XER[CA]}.")

  ;; NOTE: For compatibility, "wc" is reserved to represent individual CR bits.
  ;; It is currently used for that purpose in LLVM.

  (define_register_constraint "we" "rs6000_constraints[RS6000_CONSTRAINT_we]"
-  "VSX register if the -mpower9-vector -m64 options were used or NO_REGS.")
+  "@internal VSX register if the -mpower9-vector -m64 options were used
+   or NO_REGS.")



Suggest changing "used or" to "used, else".



  ;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
  ;; direct move directly, and movsf can't to move between the register sets.
  ;; There is a mode_attr that resolves to wa for SDmode and wn for SFmode
-(define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).")
+(define_register_constraint "wn" "NO_REGS"
+  "@internal No register (NO_REGS).")

  (define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
-  "General purpose register if 64-bit instructions are enabled or NO_REGS.")
+  "@internal General purpose register if 64-bit instructions are enabled
+   or NO_REGS.")



Similar here.



  (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
-  "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
+  "@internal Floating point register if the STFIWX instruction is enabled
+   or NO_REGS.")



And here.



  (define_register_constraint "wA" "rs6000_constraints[RS6000_CONSTRAINT_wA]"
-  "BASE_REGS if 64-bit instructions are enabled or NO_REGS.")
+  "@internal BASE_REGS if 64-bit instructions are enabled or NO_REGS.")



Etc.



  ;; wB 

Re: New repository location

2020-01-19 Thread Bill Schmidt
I apologize, I sent this to the wrong mailing list, this had meant to be 
internal.  But thank you very much for the information!  It appears we 
have some adjustments to make.


Thanks!
Bill

On 1/19/20 8:46 AM, H.J. Lu wrote:

On Sun, Jan 19, 2020 at 6:33 AM Bill Schmidt  wrote:

Question:  Is the new gcc git repository at gcc.gnu.org/git/gcc.git
using the same location as the earlier git mirror did?  I'm curious
whether our repository on pike is still syncing with the new master, or
whether we need to make some adjustments before we next rebase pu
against master.


2 repos are different.  I renamed my old mirror and created a new one:

https://gitlab.com/x86-gcc



New repository location

2020-01-19 Thread Bill Schmidt
Question:  Is the new gcc git repository at gcc.gnu.org/git/gcc.git 
using the same location as the earlier git mirror did?  I'm curious 
whether our repository on pike is still syncing with the new master, or 
whether we need to make some adjustments before we next rebase pu 
against master.




Re: [PATCH, rs6000] Adjust vectorization cost for scalar COND_EXPR

2019-12-11 Thread Bill Schmidt

Hi!

I can't approve this, but for what it's worth it looks fine to me.

Bill

On 12/11/19 6:31 AM, Kewen.Lin wrote:

Hi,

We found that the vectorization cost modeling on scalar COND_EXPR is a bit off
on rs6000.  One typical case is 548.exchange2_r, -Ofast -mcpu=power9 -mrecip
-fvect-cost-model=unlimited is better than -Ofast -mcpu=power9 -mrecip (the
default is -fvect-cost-model=dynamic) by 1.94%.  Scalar COND_EXPR is expanded
into compare + branch or compare + isel normally, either of them should be
priced more than the simple FXU operation.  This patch is to add additional
vectorization cost onto scalar COND_EXPR on top of builtin_vectorization_cost.
The idea to use additional cost value 2 instead of the others: 1) try various
possible value candidates from 1 to 5, 2 is the best measured on Power9.  2)
from latency view, compare takes 3 cycles and isel takes 2 on Power9, it's
2.5 times of simple FXU instruction which takes cost 1 in the current
modeling, it's close.  3) get fine SPEC2017 ratio on Power8 as well.

The SPEC2017 performance evaluation on Power9 with explicit unrolling shows
548.exchange2_r +2.35% gains, but 526.blender_r -1.99% degradation, the others
is trivial.  By further investigation on 526.blender_r, the assembly of 10
hottest functions are unchanged, the impact should be due to some side effects.
SPECINT geomean +0.16%, SPECFP geomean -0.16% (mainly due to blender_r).
Without explicit unrolling, 548.exchange2_r +1.78% gains and the others are
trivial.  SPECINT geomean +0.19%, SPECINT geomean +0.06%.

While the SPEC2017 performance evaluation on Power8 shows 500.perlbench_r
+1.32% gain and 511.povray_r +2.03% gain, the others are trivial.  SPECINT
geomean +0.08%, SPECINT geomean +0.18%.

Bootstrapped and regress tested on powerpc64le-linux-gnu.
Is OK for trunk?

BR,
Kewen
---

gcc/ChangeLog

2019-12-11  Kewen Lin  

* config/rs6000/rs6000.c (adjust_vectorization_cost): New function.
(rs6000_add_stmt_cost): Call adjust_vectorization_cost and update
stmt_cost.



[PATCH] rs6000: Make rs6000_invalid_builtin static (committed)

2019-12-02 Thread Bill Schmidt

Hi,

I noticed this function should have been made static in the recent separation of
rs6000-call.c from rs6000.c.  Bootstrapped and tested on powerpc64le-linux-gnu,
committed.

Thanks!
Bill


Make rs6000_invalid_builtin static.

2019-12-02  Bill Schmidt  

* config/rs6000/rs6000-call.c (rs6000_invalid_builtin): Make
static.
* config/rs6000/rs6000-internal.h (rs6000_invalid_builtin): Remove
decl.


diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 7280a4ed9c8..9c9da09af5e 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5087,7 +5087,7 @@ rs6000_builtin_is_supported_p (enum rs6000_builtins 
fncode)
 /* Raise an error message for a builtin function that is called without the
appropriate target options being set.  */
 
-void

+static void
 rs6000_invalid_builtin (enum rs6000_builtins fncode)
 {
   size_t uns_fncode = (size_t) fncode;
diff --git a/gcc/config/rs6000/rs6000-internal.h 
b/gcc/config/rs6000/rs6000-internal.h
index baccfb3f887..51eb3e053cf 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -138,7 +138,6 @@ extern void rs6000_output_mi_thunk (FILE *file,
tree function);
 extern bool rs6000_output_addr_const_extra (FILE *file, rtx x);
 extern bool rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi);
-extern void rs6000_invalid_builtin (enum rs6000_builtins fncode);
 extern tree rs6000_build_builtin_va_list (void);
 extern void rs6000_va_start (tree valist, rtx nextarg);
 extern tree rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,



[PATCH] rs6000: Fix PR92093

2019-10-15 Thread Bill Schmidt

The test case added for PR91275 fails on big-endian because 
__builtin_crypto_vpmsumd
is not a bi-endian intrinsic; source code must account for endian differences 
when
calling this intrinsic.  Fixed this and a type issue that only shows up on 
32-bit.
I thought I had previously tested this on P8 BE, but clearly not.

Tested on powerpc64-unknown-linux-gnu, committed as obvious.  Will backport 
soon.

Thanks,
Bill


2019-10-15  Bill Schmidt  

PR target/92093
* gcc.target/powerpc/pr91275.c: Fix type and endian issues.


Index: gcc/testsuite/gcc.target/powerpc/pr91275.c
===
--- gcc/testsuite/gcc.target/powerpc/pr91275.c  (revision 276968)
+++ gcc/testsuite/gcc.target/powerpc/pr91275.c  (working copy)
@@ -10,12 +10,17 @@ int main() {
   const unsigned long long r0l = 0x8e7dfceac070e3a0;
   vector unsigned long long r0 = (vector unsigned long long) {r0l, 0}, v;
   const vector unsigned long long pd
-= (vector unsigned long) {0xc2LLU << 56, 0};
+= (vector unsigned long long) {0xc2LLU << 56, 0};
 
   v = __builtin_crypto_vpmsumd ((vector unsigned long long) {r0[0], 0}, pd);
 
+#if __LITTLE_ENDIAN__

   if (v[0] != 0x4000 || v[1] != 0x65bd7ab605a4a8ff)
 __builtin_abort ();
+#else
+  if (v[1] != 0x4000 || v[0] != 0x65bd7ab605a4a8ff)
+__builtin_abort ();
+#endif
 
   return 0;

 }



[PATCH] rs6000: Fix PR91275

2019-09-30 Thread Bill Schmidt

Hi,

PR91275 observes that __builtin_crypto_vpmsumd fails to work properly
with -O1 or higher with -mcpu=power8.  That combination spells swap
optimization.  Indeed, all vpmsum* instructions were being accepted
as swappable operations.  This is correct for all of them but vpmsumd,
which creates a 128-bit result.

The -std=gnu11 in the testcase is there to avoid errors about long long
not being accepted with pure ANSI.  The "11" part is arbitrary.  The
testcase is modified from the original bug report.

This patch disallows swap optimization in the presence of vpmsumd.
Bootstrapped and tested on powerpc64le-unknown-linux-gnu.  Is this okay
for trunk, and for backport to all active branches after an appropriate
waiting period?

Thanks,
Bill


[gcc]

2019-09-30  Bill Schmidt  

* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Don't swap
vpmsumd.

[gcc/testsuite]

2019-09-30  Bill Schmidt  

* gcc.target/powerpc/pr91275.c: New.


Index: gcc/config/rs6000/rs6000-p8swap.c
===
--- gcc/config/rs6000/rs6000-p8swap.c   (revision 276360)
+++ gcc/config/rs6000/rs6000-p8swap.c   (working copy)
@@ -791,6 +791,11 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
  case UNSPEC_REDUC_PLUS:
  case UNSPEC_REDUC:
return 1;
+ case UNSPEC_VPMSUM:
+   /* vpmsumd is not swappable, but vpmsum[bhw] are.  */
+   if (GET_MODE (op) == V2DImode)
+ return 0;
+   break;
  }
   }
 
Index: gcc/testsuite/gcc.target/powerpc/pr91275.c

===
--- gcc/testsuite/gcc.target/powerpc/pr91275.c  (nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/pr91275.c  (working copy)
@@ -0,0 +1,21 @@
+/* Test that we generate vpmsumd correctly without a swap error.  */
+
+/* { dg-do run { target { p8vector_hw } } } */
+/* { dg-options "-O2 -std=gnu11" } */
+
+#include 
+
+int main() {
+
+  const unsigned long long r0l = 0x8e7dfceac070e3a0;
+  vector unsigned long long r0 = (vector unsigned long long) {r0l, 0}, v;
+  const vector unsigned long long pd
+= (vector unsigned long) {0xc2LLU << 56, 0};
+
+  v = __builtin_crypto_vpmsumd ((vector unsigned long long) {r0[0], 0}, pd);
+
+  if (v[0] != 0x4000 || v[1] != 0x65bd7ab605a4a8ff)
+__builtin_abort ();
+
+  return 0;
+}



Re: [PATCH, rs6000] Update powerpc vector load builtins with PURE attribute

2019-09-26 Thread Bill Schmidt

On 9/26/19 12:00 PM, Segher Boessenkool wrote:

Hi Will,

On Thu, Sep 26, 2019 at 10:40:29AM -0500, will schmidt wrote:

Update our (rs6000) vector load built-ins with the PURE attribute.  These
were previously given the MEM attribute, which meant that redundant loads
surrounding the built-in calls could not be eliminated in earlier passes
since they were defined as having the potential to touch memory.
2019-09-26  Will Schmidt 
* config/rs6000/rs6000-builtin.def: ( LVSL LVSR LVEBX LVEHX

  ^--- stray space

Please put commas between the items, too?

The patch is okay for trunk.  Thank you!

I wonder whether we should also consider a backport to 9, when we 
started expanding these earlier.  Thoughts?


Bill


Segher


Re: [PATCH] Fix PR91790

2019-09-19 Thread Bill Schmidt



On 9/19/19 1:34 PM, Segher Boessenkool wrote:

Hi!

On Tue, Sep 17, 2019 at 09:45:54AM +0200, Richard Biener wrote:

The following fixes an old vectorizer issue with realignment support
(thus only powerpc is affected) and BB vectorization.  The realignment
token is set up from the wrong data-ref which causes an SSA verification
failure but in other circumstances might simply generate wrong code.

Bootstrap running on x86_64-unknown-linux-gnu, I'll install this
as obvious on trunk.

PPC folks - you know best how to appropriately test a target
where we use the re-alignment optimization.  IIRC on later
powerpc hardware this isn't exercised at all since we can use
unaligned accesses.

The issue is at least present on the GCC 9 branch as well but I'd
appreciate testing where it exercises the path before considering
a backport.

Is there a testcase?



Richard, can you turn the PR's reported test into a torture test case?  
We post P7 big-endian results frequently to gcc-testresults, and this 
bug hasn't fired on anything there, so it's not covered by existing 
tests.  Nothing has turned up on the testers since your patch went in, 
so having the new test added should be sufficient, I'd think.  P7 or 
older running big-endian is what's needed to test realignment support.


Thanks,

Bill



You can use -malign-natural to get stricter alignment requirements,
that might help.

Cc:ing Bill, this is vectorizer :-)


Segher


Re: [PATCH], Patch #3 of 10, Add prefixed addressing support

2019-08-15 Thread Bill Schmidt
On 8/14/19 5:06 PM, Michael Meissner wrote:
> This patch adds prefixed memory support to all offsettable instructions.
>
> Unlike previous versions of the patch, this patch combines all of the
> modifications for addressing to one patch.  Previously, I had 3 separate
> patches (one for PADDI, one for scalar types, and one for vector types).
>
> 2019-08-14   Michael Meissner  
>
>   * config/rs6000/predicates.md (add_operand): Add support for the
>   PADDI instruction.
>   (non_add_cint_operand): Add support for the PADDI instruction.
>   (lwa_operand): Add support for the prefixed PLWA instruction.
>   * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok_uncached):
>   Only treat modes < 16 bytes as scalars.
>   (rs6000_debug_print_mode): Print whether the mode supports
>   prefixed addressing.
>   (setup_insn_form): Enable prefixed addressing for all modes whose
>   default instruction form includes offset addressing.
>   (num_insns_constant_gpr): Add support for the PADDI instruction.
>   (quad_address_p): Add support for prefixed addressing.
>   (mem_operand_gpr): Add support for prefixed addressing.
>   (mem_operand_ds_form): Add support for prefixed addressing.
>   (rs6000_legitimate_offset_address_p): Add support for prefixed
>   addressing.
>   (rs6000_legitimate_address_p): Add support for prefixed
>   addressing.
>   (rs6000_mode_dependent_address): Add support for prefixed
>   addressing.
>   (rs6000_rtx_costs): Make PADDI cost the same as ADDI or ADDIS.
>   * config/rs6000/rs6000.md (add3): Add support for PADDI.
>   (movsi_internal1): Add support for prefixed addressing, and using
>   PADDI to load up large integers.
>   (movsi splitter): Do not split up a PADDI instruction.
>   (mov_64bit_dm): Add support for prefixed addressing.
>   (movtd_64bit_nodm): Add support for prefixed addressing.
>   (movdi_internal64): Add support for prefixed addressing, and using
>   PADDI to load up large integers.
>   (movdi splitter): Update comment about PADDI.
>   (stack_protect_setdi): Add support for prefixed addressing.
>   (stack_protect_testdi): Add support for prefixed addressing.
>   * config/rs6000/vsx.md (vsx_mov_64bit): Add support for
>   prefixed addressing.
>   (vsx_extract___load): Add support for prefixed
>   addressing.
>   (vsx_extract___load): Add support for prefixed
>   addressing.
>
> Index: gcc/config/rs6000/predicates.md
> ===
> --- gcc/config/rs6000/predicates.md   (revision 274174)
> +++ gcc/config/rs6000/predicates.md   (working copy)
> @@ -839,7 +839,8 @@
>  (define_predicate "add_operand"
>(if_then_else (match_code "const_int")
>  (match_test "satisfies_constraint_I (op)
> -  || satisfies_constraint_L (op)")
> +  || satisfies_constraint_L (op)
> +  || satisfies_constraint_eI (op)")
>  (match_operand 0 "gpc_reg_operand")))
>
>  ;; Return 1 if the operand is either a non-special register, or 0, or -1.
> @@ -852,7 +853,8 @@
>  (define_predicate "non_add_cint_operand"
>(and (match_code "const_int")
> (match_test "!satisfies_constraint_I (op)
> - && !satisfies_constraint_L (op)")))
> + && !satisfies_constraint_L (op)
> + && !satisfies_constraint_eI (op)")))
>
>  ;; Return 1 if the operand is a constant that can be used as the operand
>  ;; of an AND, OR or XOR.
> @@ -933,6 +935,13 @@
>  return false;
>
>addr = XEXP (inner, 0);
> +
> +  /* The LWA instruction uses the DS-form format where the bottom two bits of
> + the offset must be 0.  The prefixed PLWA does not have this
> + restriction.  */
> +  if (prefixed_local_addr_p (addr, mode, INSN_FORM_DS))
> +return true;
> +
>if (GET_CODE (addr) == PRE_INC
>|| GET_CODE (addr) == PRE_DEC
>|| (GET_CODE (addr) == PRE_MODIFY
> Index: gcc/config/rs6000/rs6000.c
> ===
> --- gcc/config/rs6000/rs6000.c(revision 274175)
> +++ gcc/config/rs6000/rs6000.c(working copy)
> @@ -1828,7 +1828,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, mac
>
>if (ALTIVEC_REGNO_P (regno))
>   {
> -   if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
> +   if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
>   return 0;

Unrelated change?  I don't quite understand why it was changed, either. 
Is this to do with vector_pair support?  If so, maybe it belongs with a
different patch?
>
> return ALTIVEC_REGNO_P (last_regno);
> @@ -2146,6 +2146,11 @@ rs6000_debug_print_mode (ssize_t m)
>rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_FPR]),
>rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_VMX]));
>
> +  if 

Re: [PATCH], Patch #1 replacement (fix issues with future TLS patches)

2019-08-15 Thread Bill Schmidt
Hi Mike, just a couple points from me...

On 8/15/19 4:19 PM, Michael Meissner wrote:


> Index: gcc/config/rs6000/rs6000.c
> ===
> --- gcc/config/rs6000/rs6000.c(revision 274172)
> +++ gcc/config/rs6000/rs6000.c(working copy)
> @@ -369,8 +369,11 @@ struct rs6000_reg_addr {
>enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR.  */
>enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX.  */
>enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR.  */
> +  enum insn_form default_insn_form;  /* Default format for offsets.  */
> +  enum insn_form insn_form[(int)N_RELOAD_REG]; /* Register insn format.  */
>addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks.  */
>bool scalar_in_vmx_p;  /* Scalar value can go in VMX.  
> */
> +  bool prefixed_memory_p;/* We can use prefixed memory.  */
>  };
>
>  static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
> @@ -2053,6 +2056,28 @@ rs6000_debug_vector_unit (enum rs6000_ve
>return ret;
>  }
>
> +/* Return a character that can be printed out to describe an instruction
> +   format.  */
> +
> +DEBUG_FUNCTION char
> +rs6000_debug_insn_form (enum insn_form iform)
> +{
> +  char ret;
> +
> +  switch (iform)
> +{
> +case INSN_FORM_UNKNOWN:  ret = '-'; break;
> +case INSN_FORM_D:ret = 'd'; break;
> +case INSN_FORM_DS:   ret = 's'; break;
> +case INSN_FORM_DQ:   ret = 'q'; break;
> +case INSN_FORM_X:ret = 'x'; break;
> +case INSN_FORM_PREFIXED: ret = 'p'; break;
> +default: ret = '?'; break;
> +}
> +
> +  return ret;
> +}
> +
>  /* Inner function printing just the address mask for a particular reload
> register class.  */
>  DEBUG_FUNCTION char *
> @@ -2115,6 +2140,12 @@ rs6000_debug_print_mode (ssize_t m)
>  fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
>rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
>
> +  fprintf (stderr, "  Format: %c:%c%c%c",
> +  rs6000_debug_insn_form (reg_addr[m].default_insn_form),
> +  rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_GPR]),
> +  rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_FPR]),
> +  rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_VMX]));
> +
>if ((reg_addr[m].reload_store != CODE_FOR_nothing)
>|| (reg_addr[m].reload_load != CODE_FOR_nothing))
>  {
> @@ -2668,6 +2699,153 @@ rs6000_setup_reg_addr_masks (void)
>  }
>  }
>
> +/* Set up the instruction format for each mode and register type from the
> +   addr_mask.  */
> +
> +static void
> +setup_insn_form (void)
> +{
> +  for (ssize_t m = 0; m < NUM_MACHINE_MODES; ++m)
> +{
> +  machine_mode scalar_mode = (machine_mode) m;
> +
> +  /* Convert complex and IBM double double/_Decimal128 into their scalar
> +  parts that the registers will be split into for doing load or
> +  store.  */
> +  if (COMPLEX_MODE_P (scalar_mode))
> + scalar_mode = GET_MODE_INNER (scalar_mode);
> +
> +  if (FLOAT128_2REG_P (scalar_mode))
> + scalar_mode = DFmode;
> +
> +  for (ssize_t rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; 
> rc++)
> + {
> +   machine_mode single_reg_mode = scalar_mode;
> +   size_t msize = GET_MODE_SIZE (scalar_mode);
> +   addr_mask_type addr_mask = reg_addr[scalar_mode].addr_mask[rc];
> +   enum insn_form iform = INSN_FORM_UNKNOWN;
> +
> +   /* Is the mode permitted in the GPR/FPR/Altivec registers?  */
> +   if ((addr_mask & RELOAD_REG_VALID) != 0)

To help with readability and maintainability, may I suggest factoring
the following into a separate function...
> + {
> +   /* The addr_mask does not have the offsettable or indexed bits
> +  set for modes that are split into multiple registers (like
> +  IFmode).  It doesn't need this set, since typically by time it
> +  is used in secondary reload, the modes are split into
> +  component parts.
> +
> +  The instruction format however can be used earlier in the
> +  compilation, so we need to setup what kind of instruction can
> +  be generated for the modes that are split.  */
> +   if ((addr_mask & (RELOAD_REG_MULTIPLE
> + | RELOAD_REG_OFFSET
> + | RELOAD_REG_INDEXED)) == RELOAD_REG_MULTIPLE)
> + {
> +   /* Multiple register types in GPRs depend on whether we can
> +  use DImode in a single register or SImode.  */
> +   if (rc == RELOAD_REG_GPR)
> + {
> +   if (TARGET_POWERPC64)
> + {
> +   gcc_assert ((msize % 8) == 0);
> +   single_reg_mode = DImode;
> +

<    2   3   4   5   6   7   8   9   10   11   >