Re: libgo patch committed: Add S/390 support to internal/cpu package

2019-02-15 Thread Jakub Jelinek
On Fri, Feb 15, 2019 at 08:59:29PM +0100, Matthias Klose wrote:
> On 15.02.19 15:52, Ian Lance Taylor wrote:
> > This patch by Robin Dapp adds S/390 support to the internal/cpu
> > package.  This partially addresses PR 89123.  I bootstrapped it on
> > x86_64-pc-linux-gnu, which means little.  Committed to mainline.
> 
> fails in the -m31 multilib variant with

Indeed.  Given that there is just
libgo/go/internal/cpu/cpu_s390x.go
libgo/go/internal/cpu/cpu_s390x_test.go
(note, no s390), I think the easiest fix is:

--- libgo/go/internal/cpu/cpu_gccgo.c.jj2019-02-16 07:57:27.882179972 
+0100
+++ libgo/go/internal/cpu/cpu_gccgo.c   2019-02-16 08:36:37.241900882 +0100
@@ -71,7 +71,7 @@ struct xgetbv_ret xgetbv(void) {
 
 #endif /* defined(__i386__) || defined(__x86_64__)  */
 
-#ifdef __s390__
+#ifdef __s390x__
 
 struct facilityList {
uint64_t bits[4];
@@ -184,4 +184,4 @@ struct queryResult klmdQuery() {
 return ret;
 }
 
-#endif /* defined(__s390__)  */
+#endif /* defined(__s390x__)  */

If cpu_s390.go is ever added, this can be changed again and there can be say
#ifdef __s390x__
#define LHI "lghi"
#else
#define LHI "lhi"
#endif
and replace "lghi ... in the inline asm with LHI "...

Jakub


[PATCH] Decrease {i386,sse}.md global state by 12KB

2019-02-15 Thread Jakub Jelinek
Hi!

This is something I've noticed in a s390 change I'll post soon (where it was
even completely unnecessary), but it applies to i386 backend too.
Seems we have lots of .bss global state, 66x 64-byte and 61x 128-byte long
static buffers.  Instead of doing
  static char buf[128];
  ...
  s{,n}printf (buf, ...);
  ...
  return buf;
in the insn templates we can do:
  char buf[128];
  ...
  s{,n}printf (buf, ...);
  ...
  output_asm_insn (buf, operands);
  return "";
and avoid that way the global state.  The only problem with that is
that final.c does something in between:
1) if return from the template is NULL, not this case
2) if return from the template is "#", not this case
3) if (targetm.asm_out.unwind_emit_before_insn
&& targetm.asm_out.unwind_emit)
  targetm.asm_out.unwind_emit (asm_out_file, insn);
   while cygming.h has
#define TARGET_ASM_UNWIND_EMIT  i386_pe_seh_unwind_emit
#define TARGET_ASM_UNWIND_EMIT_BEFORE_INSN  false
   it is ok too (and other i386 subtargets don't do either,
   so unwind_emit_before_insn is true (the default) and unwind_emit
   NULL
4) rtx_call_insn *call_insn = dyn_cast  (insn);
if (call_insn != NULL)
   that is for calls only, the patch doesn't change any calls
Those 4 spots are in between get_insn_template and
output_asm_insn (templ, recog_data.operand);
which starts with:
  /* An insn may return a null string template
 in a case where no assembler code is needed.  */
  if (*templ == 0)
return;
so I think the patch doesn't make it more costly, there is just
one output_asm_insn extra call and the old one will return immediately.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-16  Jakub Jelinek  

* config/i386/i386.md (*movqi_internal): Remove static from
buf variable.  Use output_asm_insn (buf, operands); return "";
instead of return buf;.
* config/i386/sse.md (_andnot3,
*3, *andnot3, *andnottf3, *3,
*tf3, 3): Likewise.

--- gcc/config/i386/i386.md.jj  2019-02-12 21:48:53.183072497 +0100
+++ gcc/config/i386/i386.md 2019-02-15 23:25:36.198589133 +0100
@@ -2531,7 +2531,7 @@ (define_insn "*movqi_internal"
"Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
@@ -2564,7 +2564,8 @@ (define_insn "*movqi_internal"
   suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
 
   snprintf (buf, sizeof (buf), ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 
 case TYPE_MSKLOG:
   if (operands[1] == const0_rtx)
--- gcc/config/i386/sse.md.jj   2019-02-14 08:06:39.446519415 +0100
+++ gcc/config/i386/sse.md  2019-02-15 23:28:54.305366640 +0100
@@ -3198,7 +3198,7 @@ (define_insn "_andnot3"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
@@ -3233,7 +3233,8 @@ (define_insn "_andnot3_andnot3_andnot3|%%0, 
%%1, %%2}",
ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 }
   [(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -3314,7 +3316,7 @@ (define_insn "*3"
   "TARGET_SSE && 
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
@@ -3349,7 +3351,8 @@ (define_insn "*3"
 }
 
   snprintf (buf, sizeof (buf), ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 }
   [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
(set_attr "type" "sselog")
@@ -3378,7 +3381,7 @@ (define_insn "*3"
  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
@@ -3395,7 +3398,8 @@ (define_insn "*3"
   snprintf (buf, sizeof (buf),
   "v%s%s\t{%%2, %%1, %%0|%%0, 
%%1, %%2}",
   ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 }
   [(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -3449,7 +3453,7 @@ (define_insn "*andnot3"
(match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
   "SSE_FLOAT_MODE_P (mode)"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix
 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "";
@@ -3485,7 +3489,8 @@ (define_insn "*andnot3"
 }
 
   snprintf (buf, sizeof (buf), ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 }
   [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
@@ -3516,7 +3521,7 @@ (define_insn "*andnottf3"
  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
   "TARGET_SSE"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *tmp
 = 

[PATCH] Improve mem = STRING_CST expansion (PR rtl-optimization/66152)

2019-02-15 Thread Jakub Jelinek
Hi!

On the following testcase, we've regressed in bar since 8.x, in 8.x
store merging came up with mem = 64-bit constant, but starting with the
change to transform {0,1,2,3,4,5,6,7} char initializers into STRING_CSTs,
we don't do that anymore.  The mem = STRING_CST expansion can do that,
but only if there are no embedded zeros.  The following patch improves
it even for embedded zeros, by using a new callback for the
can_store_by_pieces/store_by_pieces calls which knows how to handle
STRING_CST.  We don't need strlen in that case, can use TREE_STRING_CST
instead.  Additionally, if the STRING_CST is slightly shorter than the
destination region, it might generate better code by trying to
store_by_pieces it all in one go (bytes from STRING_CST until the last one,
followed by artificially added zeros) and only if that doesn't seem to be
beneficial (e.g. very small STRING_CST followed by kilobytes of zeros)
goes for the store_by_pieces of STRING_CST (rounded up to next
STORE_MAX_PIECES) followed by a clear_storage.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-16  Jakub Jelinek  

PR rtl-optimization/66152
* builtins.h (c_readstr): Declare.
* builtins.c (c_readstr): Remove forward declaration.  Add
null_terminated_p argument, if false, read all bytes from the
string instead of stopping after '\0'.
* expr.c (string_cst_read_str): New function.
(store_expr): Use string_cst_read_str instead of
builtin_strncpy_read_str.  Try to store by pieces the whole
exp_len first, and only if that fails, split it up into
store by pieces followed by clear_storage.  Formatting fix.

* gcc.target/i386/pr66152.c: New test.

--- gcc/builtins.h.jj   2019-02-14 08:06:37.878546571 +0100
+++ gcc/builtins.h  2019-02-15 11:33:50.208180171 +0100
@@ -103,6 +103,7 @@ struct c_strlen_data
 };
 
 extern tree c_strlen (tree, int, c_strlen_data * = NULL, unsigned = 1);
+extern rtx c_readstr (const char *, scalar_int_mode, bool = true);
 extern void expand_builtin_setjmp_setup (rtx, rtx);
 extern void expand_builtin_setjmp_receiver (rtx);
 extern void expand_builtin_update_setjmp_buf (rtx);
--- gcc/builtins.c.jj   2019-02-11 20:58:48.509965578 +0100
+++ gcc/builtins.c  2019-02-15 11:37:00.046029652 +0100
@@ -95,7 +95,6 @@ builtin_info_type builtin_info[(int)END_
 /* Non-zero if __builtin_constant_p should be folded right away.  */
 bool force_folding_builtin_constant_p;
 
-static rtx c_readstr (const char *, scalar_int_mode);
 static int target_char_cast (tree, char *);
 static rtx get_memory_rtx (tree, tree);
 static int apply_args_size (void);
@@ -802,10 +801,14 @@ c_strlen (tree src, int only_value, c_st
 }
 
 /* Return a constant integer corresponding to target reading
-   GET_MODE_BITSIZE (MODE) bits from string constant STR.  */
-
-static rtx
-c_readstr (const char *str, scalar_int_mode mode)
+   GET_MODE_BITSIZE (MODE) bits from string constant STR.  If
+   NULL_TERMINATED_P, reading stops after '\0' character, all further ones
+   are assumed to be zero, otherwise it reads as many characters
+   as needed.  */
+
+rtx
+c_readstr (const char *str, scalar_int_mode mode,
+  bool null_terminated_p/*=true*/)
 {
   HOST_WIDE_INT ch;
   unsigned int i, j;
@@ -830,7 +833,7 @@ c_readstr (const char *str, scalar_int_m
j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1;
   j *= BITS_PER_UNIT;
 
-  if (ch)
+  if (ch || !null_terminated_p)
ch = (unsigned char) str[i];
   tmp[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT);
 }
--- gcc/expr.c.jj   2019-02-08 20:00:40.309835608 +0100
+++ gcc/expr.c  2019-02-15 11:37:18.715719809 +0100
@@ -5453,6 +5453,30 @@ emit_storent_insn (rtx to, rtx from)
   return maybe_expand_insn (code, 2, ops);
 }
 
+/* Helper function for store_expr storing of STRING_CST.  */
+
+static rtx
+string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode)
+{
+  tree str = (tree) data;
+
+  gcc_assert (offset >= 0);
+  if (offset >= TREE_STRING_LENGTH (str))
+return const0_rtx;
+
+  if ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode)
+  > (unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (str))
+{
+  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
+  size_t l = TREE_STRING_LENGTH (str) - offset;
+  memcpy (p, TREE_STRING_POINTER (str) + offset, l);
+  memset (p + l, '\0', GET_MODE_SIZE (mode) - l);
+  return c_readstr (p, mode, false);
+}
+
+  return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false);
+}
+
 /* Generate code for computing expression EXP,
and storing the value into TARGET.
 
@@ -5472,7 +5496,7 @@ emit_storent_insn (rtx to, rtx from)
 
 rtx
 store_expr (tree exp, rtx target, int call_param_p,
-   bool nontemporal, bool reverse)
+   bool nontemporal, bool reverse)
 {
   rtx temp;
   rtx alt_rtl = NULL_RTX;
@@ -5606,36 +5630,32 @@ 

[PATCH] Teach evrp that main's argc argument is always non-negative for C family (PR tree-optimization/89350)

2019-02-15 Thread Jakub Jelinek
Hi!

Both the C and C++ standard guarantee that the argc argument to main is
non-negative, the following patch sets (or adjusts) the corresponding
SSA_NAME_RANGE_INFO.  While main is just one, with IPA VRP it can also
propagate etc.  I had to change one testcase because it started optimizing
it better (the test has been folded away), so no sinking was done.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-16  Jakub Jelinek  

PR tree-optimization/89350
* gimple-ssa-evrp.c: Include tree-dfa.h and langhooks.h.
(maybe_set_main_argc_range): New function.
(execute_early_vrp): Call it.

* gcc.dg/tree-ssa/vrp122.c: New test.
* gcc.dg/tree-ssa/ssa-sink-3.c (main): Rename to ...
(bar): ... this.

--- gcc/gimple-ssa-evrp.c.jj2019-01-01 12:37:15.712998659 +0100
+++ gcc/gimple-ssa-evrp.c   2019-02-15 09:49:56.768534668 +0100
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.
 #include "tree-cfgcleanup.h"
 #include "vr-values.h"
 #include "gimple-ssa-evrp-analyze.h"
+#include "tree-dfa.h"
+#include "langhooks.h"
 
 class evrp_folder : public substitute_and_fold_engine
 {
@@ -291,6 +293,39 @@ evrp_dom_walker::cleanup (void)
   evrp_folder.vr_values->cleanup_edges_and_switches ();
 }
 
+/* argc in main in C/C++ is guaranteed to be non-negative.  Adjust the
+   range info for it.  */
+
+static void
+maybe_set_main_argc_range (void)
+{
+  if (!DECL_ARGUMENTS (current_function_decl)
+  || !(lang_GNU_C () || lang_GNU_CXX () || lang_GNU_OBJC ()))
+return;
+
+  tree argc = DECL_ARGUMENTS (current_function_decl);
+  if (TYPE_MAIN_VARIANT (TREE_TYPE (argc)) != integer_type_node)
+return;
+
+  argc = ssa_default_def (cfun, argc);
+  if (argc == NULL_TREE)
+return;
+
+  wide_int min, max;
+  value_range_kind kind = get_range_info (argc, , );
+  if (kind == VR_VARYING)
+{
+  min = wi::zero (TYPE_PRECISION (integer_type_node));
+  max = wi::to_wide (TYPE_MAX_VALUE (integer_type_node));
+}
+  else if (kind == VR_RANGE && wi::neg_p (min) && !wi::neg_p (max))
+min = wi::zero (TYPE_PRECISION (integer_type_node));
+  else
+return;
+
+  set_range_info (argc, VR_RANGE, min, max);
+}
+
 /* Main entry point for the early vrp pass which is a simplified non-iterative
version of vrp where basic blocks are visited in dominance order.  Value
ranges discovered in early vrp will also be used by ipa-vrp.  */
@@ -307,6 +342,10 @@ execute_early_vrp ()
   scev_initialize ();
   calculate_dominance_info (CDI_DOMINATORS);
 
+  /* argc in main in C/C++ is guaranteed to be non-negative.  */
+  if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
+maybe_set_main_argc_range ();
+
   /* Walk stmts in dominance order and propagate VRP.  */
   evrp_dom_walker walker;
   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
--- gcc/testsuite/gcc.dg/tree-ssa/vrp122.c.jj   2019-02-15 09:54:07.016357759 
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/vrp122.c  2019-02-15 09:53:59.299486561 
+0100
@@ -0,0 +1,14 @@
+/* PR tree-optimization/89350 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "link_error \\\(" "optimized" } } */
+
+extern void link_error (void);
+
+int
+main (int argc, const char *argv[])
+{
+  if (argc < 0)
+link_error ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c.jj   2015-05-29 
15:03:44.947546711 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c  2019-02-16 08:04:29.951126611 
+0100
@@ -2,7 +2,7 @@
 /* { dg-options "-O2 -fdump-tree-sink-stats" } */
 extern void foo(int a);
 int
-main (int argc)
+bar (int argc)
 {
   int a;
   a = argc + 1;

Jakub


[Bug preprocessor/89373] macro expansion not counting braces correctly

2019-02-15 Thread pinskia at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89373

Andrew Pinski  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
  Component|c   |preprocessor
 Resolution|--- |INVALID

--- Comment #1 from Andrew Pinski  ---
In the C preprocessor without a '(' everything after the ',' is the next
argument.

[Bug c/89373] New: macro expansion not counting braces correctly

2019-02-15 Thread mdblack98 at yahoo dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89373

Bug ID: 89373
   Summary: macro expansion not counting braces correctly
   Product: gcc
   Version: unknown
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: mdblack98 at yahoo dot com
  Target Milestone: ---

include 

#define MYMACRO1(m) MYLIST;

#define MYMACRO(r,m) \
   MYMACRO##r(m) 


#define MYLIST {1,2,3,4,5}

// too many args -- does not allow for braces such as used for array
initialization
int ii[32] = MYMACRO(1,{1,2,3,4,5});

// too many args -- does not allow for braces such as used for array
initialization
int jj[32] = MYMACRO(1,MYLIST);

// this works
int kk[32] = MYLIST;

int
main()
{
  // all 3 of these should print 1,2,3,4,5
  for(int i=0;i<5;++i) printf("ii %d,%d\n",i,ii[i]);
  for(int i=0;i<5;++i) printf("jj %d,%d\n",i,jj[i]);
  for(int i=0;i<5;++i) printf("kk %d,%d\n",i,kk[i]);
  return 0;
}

Re: Parallelize the compilation using Threads

2019-02-15 Thread Oleg Endo
On Tue, 2019-02-12 at 15:12 +0100, Richard Biener wrote:
> On Mon, Feb 11, 2019 at 10:46 PM Giuliano Belinassi
>  wrote:
> > 
> > Hi,
> > 
> > I was just wondering what API should I use to spawn threads and
> > control
> > its flow. Should I use OpenMP, pthreads, or something else?
> > 
> > My point what if we break compatibility with something. If we use
> > OpenMP, I'm afraid that we will break compatibility with compilers
> > not
> > supporting it. On the other hand, If we use pthread, we will break
> > compatibility with non-POSIX systems (Windows).
> 
> I'm not sure we have a thread abstraction for the host - we do have
> one for the target via libgcc gthr.h though.  For prototyping I'd
> resort
> to this same interface and fixup the host != target case as needed.

Or maybe, in the year 2019, we could assume that most c++ compilers
which are used to compile GCC support c++11 and come with an adequate
 implementation...  yeah, I know, sounds jacked :)

Cheers,
Oleg



Demoussage de toiture, couverture, ramonage

2019-02-15 Thread Société d’artisans LEON
Découvrez la société d’artisansLEONsituée à NOZAY en Essonne. Nous 
sommes les spécialistes régionaux de la toiture. Nous exerçons dans ce domaine 
de père en fils depuis 4 générations. Les tuiles, liteaux, gouttières n’ont 
aucuns secrets pour nous, pas plus que le zinc ou l’étanchéisation.Notre équipe 
est uniquement familiale, ce qui explique NOS PRIX ABORDABLES, sans sacrifier 
la qualité!Quelques unes de nos prestations:- Ramonage- Couverture- 
Peinture- Isolation- Nettoyage et démoussage de toitures- Peinture- 
RavalementBien évidemment, nous avons la garantie décennale.A 
bientôt.Appelez-nous au 01.60.14.44.43Ecrivez àlobry. leon / a/ 
aliceadsl.frRm 453607913 RM910MaçonnerieSi vous souhaitez vous désinscrire de 
notre newsletter, vous pouvez répondre en indiquant le mot stopNous avons 
volontairement modifié la forme du numéro de téléphone pour des raisons liées à 
la bonne délivrance du messageE-mailing envoyé par FOP KUKSA KIEV 
-office 195, #24 Chavdar Str., Kyiv02140,Ukraine -reg. 
num. 2065000  049975

Re: C++ PATCH for c++/89217 - ICE with list-initialization in range-based for loop

2019-02-15 Thread Jason Merrill

On 2/11/19 6:03 PM, Marek Polacek wrote:

On Mon, Feb 11, 2019 at 01:43:36PM -0500, Jason Merrill wrote:

On 2/7/19 6:02 PM, Marek Polacek wrote:

Since r268321 we can call digest_init even in a template, when the compound
literal isn't instantiation-dependent.


Right.  And since digest_init modifies the CONSTRUCTOR in place, that means
the template trees are digested rather than the original parse trees that we
try to use.  If we're going to use digest_init, we should probably save
another CONSTRUCTOR with the original trees.


I tried unsharing the constructor and even its contents but only then did I
realize that this cannot work.


Why wouldn't going back to saving {*((struct S *) this)->r} work?


It's not digest_init that adds the problematic
INDIRECT_REF via convert_from_reference, it's instantiate_pending_templates
-> tsubst_expr -> ... -> finish_non_static_data_member.

So the problem isn't sharing the contents of the CONSTRUCTOR, but rather what
finish_non_static_data_member does with the

   {.r=(struct R &) (struct R *) ((struct S *) this)->r}

expression.  The same problem would appear even before r268321 changes if we
called tsubst_* twice on the CONSTRUCTOR above.


Yes, it sounds like there's a bug in that path as well.  Perhaps 
tsubst_copy_and_build/COMPONENT_REF should strip a REFERENCE_REF_P if t 
was already a reference.



Do you still think digest_init and/or finish_compound_literal need tweaking?


I imagine that saving post-digest trees might cause other problems, but 
perhaps not.  Perhaps we ought to move away more generally from trying 
to save the original parse trees for non-dependent expressions and 
messing with NON_DEPENDENT_EXPR.


Jason


Re: [PATCH] v2: Fix excess warnings from -Wtype-limits with location wrappers (PR c++/88680)

2019-02-15 Thread Jason Merrill

On 2/14/19 4:20 PM, David Malcolm wrote:

On Thu, 2019-02-14 at 17:32 +0100, Jakub Jelinek wrote:

On Thu, Feb 14, 2019 at 11:26:15AM -0500, David Malcolm wrote:

There's an asymmetry in the warning; it's looking for a comparison
of a
LHS expression against an RHS constant 0, spelled as "0".

If we fold_for_warn on the RHS, then that folding introduces a
warning
for expressions that aren't spelled as "0" but can be folded to 0,
e.g., with:

enum { FOO, BAR };


So, shouldn't it be made symmetric?  Check if one argument is literal
0
before folding, and only if it is, fold_for_warn the other argument?

Jakub


The reference to symmetry in my earlier email was somewhat
misleading, sorry.

The test happens after a canonicalization of the ordering happens
here, near the top of shorten_compare:

   /* If first arg is constant, swap the args (changing operation
  so value is preserved), for canonicalization.  Don't do this if
  the second arg is 0.  */

so this already gives us symmetry.

Here's an updated version of the patch which add the fold_for_warn in
a slightly later place, and adds a comment, and some more test cases.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?


OK.

Jason



Re: [C++ PATCH] preview: Fix braces around scalar initializer (C++/88572) Inbox x

2019-02-15 Thread Jason Merrill

On 2/14/19 7:09 PM, will wray wrote:

Thanks Jason.
Adding this 'else if' condition afterwards seems to work:

  else if (BRACE_ENCLOSED_INITIALIZER_P (CONSTRUCTOR_ELT
(stripped_init,0)->value))
{
   if (complain & tf_error)
  error ("too many braces around scalar initializer for
type %qT", type);
   init = error_mark_node;
 }

I'll regtest that and run through the rest of the reshape logic again.


I think the first_initializer_p check should be part of this condition 
rather than the C++98 condition.

What do you think about the fact that this patch now rejects empty
brace inits like int{{}}
that was previously accepted? It's a breaking change for any code that
was incorrectly doing that.


The change makes sense to me; I would hope that such code is rare.

Jason


On Thu, Feb 14, 2019 at 6:02 PM Jason Merrill  wrote:


On 2/12/19 6:04 PM, will wray wrote:

A proposed patch for Bug 88572 is attached to the bug report along
with a short description and Change Log (a link there gives a pretty
diff of the patch):

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88572#c15

I'd appreciate any review of this patch, as well as testing on more
platforms. The patch with updated tests passes for me on x86_64.

There's also test code in bug comment #1 that demonstrates SFINAE
based on the nesting of braces. It could also be added to the
testsuite - I'm not sure how to do that or if it is needed.



+ if (cxx_dialect < cxx11 || first_initializer_p)


I would expect this to miss the error in

struct A { int i; } a = {{{42}}};

I see that we end up complaining about this in convert_like_real because
implicit_conversion catches the problem here, but I think we ought to
catch it in reshape_init_r as well.  So, also complain if the element of
the CONSTRUCTOR is also BRACE_ENCLOSED_INITIALIZER_P.

Jason




Re: [PATCH] document __builtin_is_constant_evaluated

2019-02-15 Thread Sandra Loosemore

On 2/13/19 4:33 PM, Martin Sebor wrote:


Index: gcc/doc/extend.texi
===
--- gcc/doc/extend.texi (revision 268856)
+++ gcc/doc/extend.texi (working copy)
@@ -12890,6 +12890,22 @@ built-in in this case, because it has no opportuni
 optimization.
 @end deftypefn
 
+@deftypefn {Built-in Function} bool __builtin_is_constant_evaluated ()

+The @code{__builtin_is_constant_evaluated} function is available only
+in C++.  Its main use case is to determine whether a @code{constexpr}
+function is being called in a @code{constexpr} context.  A call to
+the function evaluates to a core constant expression with the value
+@code{true} if and only if it occurs within the evaluation of an expression
+or conversion that is manifestly constant-evaluated as defined in the C++
+standard.  Manifestly constant-evaluated contexts include constant-expressions,
+the conditions of @code{constexpr if} statements, constraint-expresions, and


s/expresions/expressions/


+initializers of variables usable in constant expressions.  The built-in is
+intended to be used by implementations of the @code{std::is_constant_evaluated}
+C++ function.  Programs should make use of the latter function rather than
+invoking the built-in directly.  For more details refer to the latest revision
+of the C++ standard.
+@end deftypefn
+
 @deftypefn {Built-in Function} long __builtin_expect (long @var{exp}, long 
@var{c})
 @opindex fprofile-arcs
 You may use @code{__builtin_expect} to provide the compiler with


I think this is generally reasonable (and I agree with the rationale for 
documenting this at all), but I'd like to see this rearranged and 
rephrased to put the most important point (it's an internal hook to 
implement std::is_constant_evaluated and shouldn't be called directly) 
before the technical details, with a paragraph break in between.


-Sandra


Re: [PATCH] document __has_attribute and __has_include

2019-02-15 Thread Sandra Loosemore

On 2/13/19 2:46 PM, Martin Sebor wrote:

The attached patch adds documentation for the __has_attribute (and
__has_cpp_attribute) and __has_include operators added in r215752.


Thanks!


I was a little unsure where to add this, whether the preprocessor
manual or the GCC manual, or both.  It seems that it belongs in
the preprocessor manual but since more users read the GCC manual,
it's likely to be overlooked there.


I think the preprocessor manual is the right place.  A while back I 
brought up the idea of consolidating the preprocessor docs into the GCC 
manual but the consensus seemed to be for retaining a separate 
preprocessor manual.


My comments on this patch are mostly trivial markup things.


@@ -3422,6 +3425,99 @@ condition succeeds after the original @samp{#if} a
 @samp{#else} is allowed after any number of @samp{#elif} directives, but
 @samp{#elif} may not follow @samp{#else}.
 
+@node __has_attribute

+@subsection __has_attribute


Please use @code markup in the @subsection.


+@cindex @code{__has_attribute}
+
+The special operator @code{__has_attribute (operand)} may be used in


@code{__has_attribute (@var{operand})}


+@samp{#if} and @samp{#elif} expressions to test whether the attribute
+referenced by its argument is recognized by GCC.  Using the operator
+in other contexts is not valid.  In C code, @var{operand} must be
+a valid identifier.  In C++ code, @var{operand} may be optionally
+introduced by the @code{attribute-scope::} prefix.


I think "attribute-scope" is not a literal part of the prefix, so

@code{@var{attribute-scope}::}


+The @code{attribute-scope} prefix identifies the ``namespace'' within


And @var markup here, too.


+which the attribute is recognized.  The scope of GCC attributes is
+@samp{gnu} or @samp{__gnu__}.  The operator by itself, without any


The @code{__has_attribute} operator by itself


+@var{operand} or parentheses, acts as a predefined macro so that support
+for it can be tested in portable code.  Thus, the recommended use of
+the operator is as follows:
+
+@smallexample
+#if defined __has_attribute
+#  if __has_attribute (nonnull)
+#define ATTR_NONNULL __attribute__ ((nonnull))
+#  endif
+#endif
+@end smallexample
+
+The first @samp{#if} test succeeds only when the operator is supported
+by the version of GCC (or another compiler) being used.  Only when that
+test succeeds is it valid to use @code{__has_attribute} as a preprocessor
+operator.  As a result, combining the two tests into a single expression as
+shown below would only be valid with a compiler that supports the operator
+but not with others that don't.
+
+@smallexample
+#if defined __has_attribute && __has_attribute (nonnull)   /* not portable */
+@dots{}
+#endif
+@end smallexample
+
+@node __has_cpp_attribute
+@subsection __has_cpp_attribute


@code markup in the @subsection title, again.


+@cindex @code{__has_cpp_attribute}
+
+The special operator @code{__has_cpp_attribute (operand)} may be used


@var{operand} markup again.


+in @samp{#if} and @samp{#elif} expressions in C++ code to test whether
+the attribute referenced by its argument is recognized by GCC.
+@code{__has_cpp_attribute (operand)} is equivalent to
+@code{__has_attribute (operand)} except that when @code{operand}


The 3 instances above too.


+designates a supported standard attribute it evaluates to an integer
+constant of the form @code{MM} indicating the year and month when
+the attribute was first introduced into the C++ standard.  For additional
+information including the dates of the introduction of current standard
+attributes, see 
@w{@uref{https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations/,
+SD-6: SG10 Feature Test Recommendations}}.
+
+@node __has_include
+@subsection __has_include


@code markup in title again


+@cindex @code{__has_include}
+ > +The special operator @code{__has_include (operand)} may be used in 

@samp{#if}

@var{operand}


+and @samp{#elif} expressions to test whether the header referenced by its
+@var{operand} can be included using the @samp{#include} directive.  Using
+the operator in other contexts is not valid.  The @var{operand} takes
+the same form as the file in the @samp{#include} directive (@xref{Include
+Syntax}) and evaluates to a nonzero value if the header can be included and
+to zero otherwise.  Note that that the ability to include a header doesn't
+imply that the header doesn't contain invalid constructs or @samp{#error}
+directives that would cause the preprocessor to fail.
+
+The @code{__has_include} operator by itself, without any @var{operand} or
+parentheses, acts as a predefined macro so that support for it can be tested
+in portable code.  Thus, the recommended use of the operator is as follows:
+
+@smallexample
+#if defined __has_include
+#  if __has_include ()
+#include 
+#  endif
+#endif
+@end smallexample
+
+The first @samp{#if} test succeeds only when the operator is supported
+by the version of GCC (or another 

Re: [PATCH doc] correct/expand -Wreturn-type

2019-02-15 Thread Sandra Loosemore

On 2/6/19 11:15 AM, Martin Sebor wrote:

[snip]
But whatever.  Attached is a change with the subsentences reversed.


This version of the patch is OK.

-Sandra


Re: [PATCH doc] correct/improve -Wmissing-attributes and -Wattribute-alias

2019-02-15 Thread Sandra Loosemore

On 2/6/19 9:16 AM, Martin Sebor wrote:

The manual documents the -Wno-missing-attributes form of the option
as if it was enabled by default, even though it's enabled by -Wall
(I can't get this -Wno- convention straight in my head).  I also
got private comments on the documentation of the option suggesting
to add cross-references, and to list the attributes
-Wattribute-alias considers (the same ones as -Wmissing-attributes).

The attached patch makes these changes.


I found the discussion of both options incomprehensible even with this 
patch.  :-(  The defaults are incorrect, there are typos, awkward 
wording and confusing paragraph organization, etc.  So I consulted the 
sources and came up with the attached alternative patch.  Can you review 
this for correctness and generally making sense?


-Sandra
2019-02-15  Sandra Loosemore  
	Martin Sebor  

	gcc/
	* c-family/c.opt (Wmissing-attributes): Clean up doc string.
	* common.opt (Wattribute-alias): Likewise.
	* doc/invoke.texi (Option Summary): List general form of
	-Wattribute-alias=.  List positive form of -Wmissing-attributes.
	(-Wmissing-attributes): Invert entry, rewrite and correct default.
	Add cross-references.
	(-Wattribute-alias): Rewrite and correct default.  Mention
	considered attributes (same as for -Wmissing-attributes).
Index: gcc/c-family/c.opt
===
--- gcc/c-family/c.opt	(revision 268948)
+++ gcc/c-family/c.opt	(working copy)
@@ -818,7 +818,7 @@ Warn on primary template declaration.
 Wmissing-attributes
 C ObjC C++ ObjC++ Var(warn_missing_attributes) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
 Warn about declarations of entities that may be missing attributes
-that related entities have been declared with it.
+that related entities have been declared with.
 
 Wmissing-format-attribute
 C ObjC C++ ObjC++ Warning Alias(Wsuggest-attribute=format)
Index: gcc/common.opt
===
--- gcc/common.opt	(revision 268948)
+++ gcc/common.opt	(working copy)
@@ -552,11 +552,11 @@ Warn about inappropriate attribute usage
 
 Wattribute-alias
 Common Alias(Wattribute_alias=, 1, 0) Warning
-Warn about type safety and similar errors and mismatches in attribute alias and related.
+Warn about type safety and similar errors and mismatches in declarations with alias attributes.
 
 Wattribute-alias=
 Common Joined RejectNegative UInteger Var(warn_attribute_alias) Init(1) Warning IntegerRange(0, 2)
-Warn about type safety and similar errors and mismatches in attribute alias and related.
+Warn about type safety and similar errors and mismatches in declarations with alias attributes.
 
 Wcannot-profile
 Common Var(warn_cannot_profile) Init(1) Warning
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi	(revision 268948)
+++ gcc/doc/invoke.texi	(working copy)
@@ -288,7 +288,7 @@ Objective-C and Objective-C++ Dialects}.
 -Walloc-zero  -Walloc-size-larger-than=@var{byte-size} @gol
 -Walloca  -Walloca-larger-than=@var{byte-size} @gol
 -Wno-aggressive-loop-optimizations  -Warray-bounds  -Warray-bounds=@var{n} @gol
--Wno-attributes  -Wno-attribute-alias @gol
+-Wno-attributes  -Wattribute-alias=@var{n}  @gol
 -Wbool-compare  -Wbool-operation @gol
 -Wno-builtin-declaration-mismatch @gol
 -Wno-builtin-macro-redefined  -Wc90-c99-compat  -Wc99-c11-compat @gol
@@ -322,7 +322,7 @@ Objective-C and Objective-C++ Dialects}.
 -Winvalid-pch  -Wlarger-than=@var{byte-size} @gol
 -Wlogical-op  -Wlogical-not-parentheses  -Wlong-long @gol
 -Wmain  -Wmaybe-uninitialized  -Wmemset-elt-size  -Wmemset-transposed-args @gol
--Wmisleading-indentation  -Wno-missing-attributes  -Wmissing-braces @gol
+-Wmisleading-indentation  -Wmissing-attributes  -Wmissing-braces @gol
 -Wmissing-field-initializers  -Wmissing-format-attribute @gol
 -Wmissing-include-dirs  -Wmissing-noreturn  -Wmissing-profile @gol
 -Wno-multichar  -Wmultistatement-macros  -Wnonnull  -Wnonnull-compare @gol
@@ -5056,7 +5056,7 @@ about the layout of the file that the di
 
 This warning is enabled by @option{-Wall} in C and C++.
 
-@item -Wno-missing-attributes
+@item -Wmissing-attributes
 @opindex Wmissing-attributes
 @opindex Wno-missing-attributes
 Warn when a declaration of a function is missing one or more attributes
@@ -5064,10 +5064,10 @@ that a related function is declared with
 affect the correctness or efficiency of generated code.  For example,
 the warning is issued for declarations of aliases that use attributes
 to specify less restrictive requirements than those of their targets.
-This typically represents a potential optimization oportunity rather
-than a hidden bug.  The @option{-Wattribute-alias} option controls warnings
-issued for mismatches between declarations of aliases and their targets
-that might be indicative of code generation bugs.
+This typically represents a potential optimization opportunity.
+By contrast, 

[Bug go/89123] Too many go test failures on s390x-linux

2019-02-15 Thread ian at airs dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89123

Ian Lance Taylor  changed:

   What|Removed |Added

 CC||doko at debian dot org

--- Comment #15 from Ian Lance Taylor  ---
*** Bug 89277 has been marked as a duplicate of this bug. ***

[Bug go/89277] [9 Regression] libgo memory hogs in libgo testsuite (at least on s390x-linux-gnu)

2019-02-15 Thread ian at airs dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89277

Ian Lance Taylor  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |DUPLICATE

--- Comment #2 from Ian Lance Taylor  ---
Closing as a duplicate per earlier comment.  Please comment if you disagree.

*** This bug has been marked as a duplicate of bug 89123 ***

[Bug go/89171] FAIL: go/build

2019-02-15 Thread ian at airs dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89171

--- Comment #1 from Ian Lance Taylor  ---
I don't know what is happening here and I don't have access to a riscv64
machine to test on (there doesn't seem to be one in the compile farm).  The
errors are incorrect in that these are not expected dependencies, not
unexpected ones.  This may be a bug in the generation of top level composite
literals, though I can't think of any reason that would be specific to riscv64,
and of course it is working on other systems.

Do you have any suggestions for how I could debug this?

[PATCH 32/42] i386: Emulate MMX pshufb with SSE version

2019-02-15 Thread H.J. Lu
Emulate MMX version of pshufb with SSE version by masking out the bit 3
of the shuffle control byte.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pshufbv8qi3): Changed to
define_insn_and_split.  Also allow TARGET_MMX_WITH_SSE.  Add
SSE emulation.
---
 gcc/config/i386/sse.md | 46 +-
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 80b1a46f507..704e211c0b8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15697,17 +15697,45 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "")])
 
-(define_insn "ssse3_pshufbv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-UNSPEC_PSHUFB))]
-  "TARGET_SSSE3"
-  "pshufb\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+(define_insn_and_split "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" 
"ym,x,Yv")]
+UNSPEC_PSHUFB))
+   (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pshufb\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 3)
+   (and:V4SI (match_dup 3) (match_dup 2)))
+   (set (match_dup 0)
+   (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
+{
+  /* Emulate MMX version of pshufb with SSE version by masking out the
+ bit 3 of the shuffle control byte.  */
+  operands[0] = lowpart_subreg (V16QImode, operands[0],
+   GET_MODE (operands[0]));
+  operands[1] = lowpart_subreg (V16QImode, operands[1],
+   GET_MODE (operands[1]));
+  operands[2] = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  operands[4] = lowpart_subreg (V16QImode, operands[3],
+   GET_MODE (operands[3]));
+  rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7));
+  rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
+  operands[5] = force_const_mem (V4SImode, vec_const);
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_psign3"
   [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
-- 
2.20.1



Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE

2019-02-15 Thread H.J. Lu
On Fri, Feb 15, 2019 at 9:50 AM Uros Bizjak  wrote:
>
> On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu  wrote:
> >
> > On x86-64, since __m64 is returned and passed in XMM registers, we can
> > emulate MMX intrinsics with SSE instructions. To support it, we added
> >
> >  #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)
> >
> > ;; Define instruction set of MMX instructions
> > (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
> >   (const_string "base"))
> >
> >  (eq_attr "mmx_isa" "native")
> >(symbol_ref "!TARGET_MMX_WITH_SSE")
> >  (eq_attr "mmx_isa" "x64")
> >(symbol_ref "TARGET_MMX_WITH_SSE")
> >  (eq_attr "mmx_isa" "x64_avx")
> >(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
> >  (eq_attr "mmx_isa" "x64_noavx")
> >(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
> >
> > We added SSE emulation to MMX patterns and disabled MMX alternatives with
> > TARGET_MMX_WITH_SSE.
> >
> > Most of MMX instructions have equivalent SSE versions and results of some
> > SSE versions need to be reshuffled to the right order for MMX.  Thee are
> > couple tricky cases:
> >
> > 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
> > maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
> > mask operand and handle unmapped bits 64:127 at memory address by
> > adjusting source and mask operands together with memory address.
> >
> > 2. MMX movntq is emulated with SSE2 DImode movnti, which is available
> > in 64-bit mode.
> >
> > 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
> > SSE emulation must clear the bit 4 in the shuffle control mask.
> >
> > 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
> > the upper 64 bits of destination XMM register.
> >
> > Tests are also added to check each SSE emulation of MMX intrinsics.
> >
> > There are no regressions on i686 and x86-64.  For x86-64, GCC is also
> > tested with
> >
> > --with-arch=native --with-cpu=native
> >
> > on AVX2 and AVX512F machines.
>
> I went through the code again, and looks OK in general, modulo
> mmx_nonimmediate_operand issue and a couple of minor issues.
>
> Please substitute nonimmediate_operand predicate with
> mmx_nonimmediate_operand in expanders and insn patterns. Please note
> that the proposed convention is to name the operand
> register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest
> we name the predicate in this way.
>
> There is an issue with a change to emms pattern.
>
> And let's remove _mm_empty () calls from testcases; they complicate
> things too much for no apparent benefit.
>
> With those issues fixed, the patchset is OK for gcc-10 when it opens.

The new patch set starts at

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01275.html

including

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01271.html

for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89372

-- 
H.J.


[PATCH 40/42] i386: Allow MMX intrinsic emulation with SSE

2019-02-15 Thread H.J. Lu
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3.  Don't enable MMX ISA
by default with TARGET_MMX_WITH_SSE.

For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
mode since MMX intrinsics can be emulated wit SSE.

gcc/

PR target/89021
* config/i386/i386-builtin.def: Enable MMX intrinsics with
SSE/SSE2/SSSE3.
* config/i386/i386.c (ix86_init_mmx_sse_builtins): Likewise.
(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
intrinsics with TARGET_MMX_WITH_SSE.
* config/i386/mmintrin.h: Only require SSE2 if __MMX_WITH_SSE__
is defined.

gcc/testsuite/

PR target/89021
* gcc.target/i386/pr82483-1.c: Error only on ia32.
* gcc.target/i386/pr82483-2.c: Likewise.
---
 gcc/config/i386/i386-builtin.def  | 126 +++---
 gcc/config/i386/i386.c|  29 -
 gcc/config/i386/mmintrin.h|  12 ++-
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 5 files changed, 101 insertions(+), 70 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4687f..10a9d631f29 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", 
IX86_BUILTIN_FNSTSW, UNKN
 BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, 
UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", 
IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, 
"__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* 3DNow! */
 BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", 
IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", 
IX86_BUILTIN_RORQI, UNKNO
 BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, 
UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", 
IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", 
IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", 
IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", 
IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", 
IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", 
IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, 
"__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, 
"__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, 
"__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, 
"__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, 
"__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, 
"__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, 
"__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, 
"__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", 
IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, 
"__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", 
IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, 
"__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", 
IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", 
IX86_BUILTIN_PXOR, UNKNOWN, (int) 

[PATCH 26/42] i386: Emulate MMX umulv1siv1di3 with SSE2

2019-02-15 Thread H.J. Lu
Emulate MMX umulv1siv1di3 with SSE2.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (sse2_umulv1siv1di3): Add SSE emulation
support.
(*sse2_umulv1siv1di3): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 993ad99a36e..9cf0251293a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -905,30 +905,36 @@
 (mult:V1DI
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand")
+ (match_operand:V2SI 1 "register_mmxmem_operand")
  (parallel [(const_int 0)])))
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand")
+ (match_operand:V2SI 2 "register_mmxmem_operand")
  (parallel [(const_int 0)])]
-  "TARGET_SSE2"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
 
 (define_insn "*sse2_umulv1siv1di3"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
 (mult:V1DI
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv")
  (parallel [(const_int 0)])))
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
  (parallel [(const_int 0)])]
-  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && TARGET_SSE2
+   && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+  "@
+   pmuludq\t{%2, %0|%0, %2}
+   pmuludq\t{%2, %0|%0, %2}
+   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 22/42] i386: Emulate MMX mmx_uavgv8qi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_uavgv8qi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_uavgv8qi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(*mmx_uavgv8qi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index d78c6a31962..570153521a1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1678,50 +1678,55 @@
(plus:V8HI
  (plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand"))
+ (match_operand:V8QI 1 "register_mmxmem_operand"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand")))
+ (match_operand:V8QI 2 "register_mmxmem_operand")))
  (const_vector:V8HI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "TARGET_SSE || TARGET_3DNOW"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
 
 (define_insn "*mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(truncate:V8QI
  (lshiftrt:V8HI
(plus:V8HI
  (plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_vector:V8HI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "(TARGET_SSE || TARGET_3DNOW)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V8QImode, operands)"
 {
   /* These two instructions have the same operation, but their encoding
  is different.  Prefer the one that is de facto standard.  */
-  if (TARGET_SSE || TARGET_3DNOW_A)
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
+  else if (TARGET_SSE || TARGET_3DNOW_A)
 return "pavgb\t{%2, %0|%0, %2}";
   else
 return "pavgusb\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "mmxshft")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
(set (attr "prefix_extra")
  (if_then_else
(not (ior (match_test "TARGET_SSE")
 (match_test "TARGET_3DNOW_A")))
(const_string "1")
(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 28/42] i386: Emulate MMX ssse3_phwv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_phwv4hi3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phwv4hi3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 06c9b5b58f1..38b83c57ffc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15232,13 +15232,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "0")
+   (match_operand:V4HI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
@@ -15247,19 +15247,37 @@
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))]
-  "TARGET_SSSE3"
-  "phw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   phw\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = lowpart_subreg (V8HImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx op1 = lowpart_subreg (V8HImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx op2 = lowpart_subreg (V8HImode, operands[2],
+   GET_MODE (operands[2]));
+  emit_insn (gen_ssse3_phwv8hi3 (op0, op1, op2));
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_phdv8si3"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
-- 
2.20.1



[PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled

2019-02-15 Thread H.J. Lu
With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
when MMX is disabled.

PR target/89021
* config/i386/mmx.md (mmx_): Renamed to ...
(mmx__1): This.
(mmx_): New expander.
---
 gcc/config/i386/mmx.md | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9cf0251293a..0f925c0b1ea 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1848,7 +1848,34 @@
   [(UNSPECV_EMMS "emms")
(UNSPECV_FEMMS "femms")])
 
-(define_insn "mmx_"
+(define_expand "mmx_"
+  [(unspec_volatile [(const_int 0)] EMMS)
+   (clobber (reg:XF ST0_REG))
+   (clobber (reg:XF ST1_REG))
+   (clobber (reg:XF ST2_REG))
+   (clobber (reg:XF ST3_REG))
+   (clobber (reg:XF ST4_REG))
+   (clobber (reg:XF ST5_REG))
+   (clobber (reg:XF ST6_REG))
+   (clobber (reg:XF ST7_REG))
+   (clobber (reg:DI MM0_REG))
+   (clobber (reg:DI MM1_REG))
+   (clobber (reg:DI MM2_REG))
+   (clobber (reg:DI MM3_REG))
+   (clobber (reg:DI MM4_REG))
+   (clobber (reg:DI MM5_REG))
+   (clobber (reg:DI MM6_REG))
+   (clobber (reg:DI MM7_REG))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+{
+   if (TARGET_MMX)
+ emit_insn (gen_mmx__1 ());
+   else
+ emit_insn (gen_nop ());
+   DONE;
+})
+
+(define_insn "mmx__1"
   [(unspec_volatile [(const_int 0)] EMMS)
(clobber (reg:XF ST0_REG))
(clobber (reg:XF ST1_REG))
-- 
2.20.1



[PATCH 12/42] i386: Emulate MMX vec_dupv2si with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX vec_dupv2si with SSE.  Add the "Yw" constraint to allow
broadcast from integer register for AVX512BW with TARGET_AVX512VL.
Only SSE register source operand is allowed.

PR target/89021
* config/i386/constraints.md (Yw): New constraint.
* config/i386/mmx.md (*vec_dupv2si): Changed to
define_insn_and_split and also allow TARGET_MMX_WITH_SSE to
support SSE emulation.
---
 gcc/config/i386/constraints.md |  6 ++
 gcc/config/i386/mmx.md | 24 +---
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 16075b4acf3..c546b20d9dc 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,6 +110,8 @@
 ;;  v  any EVEX encodable SSE register for AVX512VL target,
 ;; otherwise any SSE register
 ;;  h  EVEX encodable SSE register with number factor of four
+;;  w  any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
+;; target.
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -146,6 +148,10 @@
  "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
  "@internal For AVX512VL, any EVEX encodable SSE register 
(@code{%xmm0-%xmm31}), otherwise any SSE register.")
 
+(define_register_constraint "Yw"
+ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW 
with TARGET_AVX512VL target.")
+
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
 ;;  g  GOT memory operand.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b0c6a8c8077..d568a534956 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1381,14 +1381,24 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI")])
 
-(define_insn "*vec_dupv2si"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
(vec_duplicate:V2SI
- (match_operand:SI 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+ (match_operand:SI 1 "register_operand" "0,0,Yv,r")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (vec_duplicate:V4SI (match_dup 1)))]
+  "operands[0] = lowpart_subreg (V4SImode, operands[0],
+GET_MODE (operands[0]));"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI,TI")])
 
 (define_insn "*mmx_concatv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,y")
-- 
2.20.1



[PATCH 37/42] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE

2019-02-15 Thread H.J. Lu
From: Uros Bizjak 

2019-02-14  Uroš Bizjak  

PR target/89021
* config/i386/i386.md (*zero_extendsidi2): Add mmx_isa attribute.
* config/i386/sse.md (*vec_concatv2sf_sse4_1): Ditto.
(*vec_concatv2sf_sse): Ditto.
(*vec_concatv2si_sse4_1): Ditto.
(*vec_concatv2si): Ditto.
(*vec_concatv4si_0): Ditto.
(*vec_concatv2di_0): Ditto.
---
 gcc/config/i386/i386.md |  4 
 gcc/config/i386/sse.md  | 16 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e1727676deb..22172fd77a8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3682,6 +3682,10 @@
  (const_string "avx512bw")
   ]
   (const_string "*")))
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "5,6")
+  (const_string "native")
+  (const_string "*")))
(set (attr "type")
  (cond [(eq_attr "alternative" "0,1,2,4")
  (const_string "multi")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 379da16615d..b6196b088fd 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -7201,6 +7201,10 @@
  (const_string "mmxmov")
   ]
   (const_string "sselog")))
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "7,8")
+  (const_string "native")
+  (const_string "*")))
(set (attr "prefix_data16")
  (if_then_else (eq_attr "alternative" "3,4")
   (const_string "1")
@@ -7236,7 +7240,8 @@
movss\t{%1, %0|%0, %1}
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+  [(set_attr "mmx_isa" "*,*,native,native")
+   (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "V4SF,SF,DI,DI")])
 
 (define_insn "*vec_concatv4sf"
@@ -14509,6 +14514,10 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
   [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "8,9")
+  (const_string "native")
+  (const_string "*")))
(set (attr "type")
  (cond [(eq_attr "alternative" "7")
  (const_string "ssemov")
@@ -14546,6 +14555,7 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
   [(set_attr "isa" "sse2,sse2,*,*,*,*")
+   (set_attr "mmx_isa" "*,*,*,*,native,native")
(set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
 
@@ -14575,7 +14585,8 @@
   "@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
+  [(set_attr "mmx_isa" "*,native")
+   (set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex,orig")
(set_attr "mode" "TI")])
 
@@ -14650,6 +14661,7 @@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
   [(set_attr "isa" "x64,*,*")
+   (set_attr "mmx_isa" "*,*,native")
(set_attr "type" "ssemov")
(set_attr "prefix_rex" "1,*,*")
(set_attr "prefix" "maybe_vex,maybe_vex,orig")
-- 
2.20.1



[PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE

2019-02-15 Thread H.J. Lu
PR target/89021
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
mmx_ok to true if TARGET_MMX_WITH_SSE is true.
(ix86_expand_vector_init_one_nonzero): Likewise.
(ix86_expand_vector_init_one_var): Likewise.
(ix86_expand_vector_init_general): Likewise.
(ix86_expand_vector_init): Likewise.
(ix86_expand_vector_set): Likewise.
(ix86_expand_vector_extract): Likewise.
* config/i386/mmx.md (*vec_dupv2sf): Changed to
define_insn_and_split to support SSE emulation.
(*vec_extractv2sf_0): Likewise.
(*vec_extractv2sf_1): Likewise.
(*vec_extractv2si_0): Likewise.
(*vec_extractv2si_1): Likewise.
(*vec_extractv2si_zext_mem): Likewise.
(vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
(vec_extractv2sf_1 splitter): Likewise.
(vec_extractv2sfsf): Likewise.
(vec_setv2si): Likewise.
(vec_extractv2si_1 splitter): Likewise.
(vec_extractv2sisi): Likewise.
(vec_setv4hi): Likewise.
(vec_extractv4hihi): Likewise.
(vec_setv8qi): Likewise.
(vec_extractv8qiqi): Likewise.
---
 gcc/config/i386/i386.c |  8 +
 gcc/config/i386/mmx.md | 69 +++---
 2 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a76c17beece..25e0dc43a9e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
 {
   bool ok;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SImode:
@@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, 
machine_mode mode,
   bool use_vector_set = false;
   rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2DImode:
@@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, 
machine_mode mode,
   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2DFmode:
@@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok, 
machine_mode mode,
   machine_mode quarter_mode = VOIDmode;
   int n, i;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SFmode:
@@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx 
vals)
   int i;
   rtx x;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
+
   /* Handle first initialization from vector elts.  */
   if (n_elts != XVECLEN (vals, 0))
 {
@@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx 
val, int elt)
   machine_mode mmode = VOIDmode;
   rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SFmode:
@@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, 
rtx vec, int elt)
   bool use_vec_extr = false;
   rtx tmp;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SImode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a21e11c8dfb..fa0b0126e91 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -555,14 +555,23 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
 
-(define_insn "*vec_dupv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(vec_duplicate:V2SF
- (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+ (match_operand:SF 1 "register_operand" "0,0,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (vec_duplicate:V4SF (match_dup 1)))]
+  "operands[0] = lowpart_subreg (V4SFmode, operands[0],
+GET_MODE (operands[0]));"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_concatv2sf"
   [(set (match_operand:V2SF 0 "register_operand" "=y,y")
@@ -580,7 +589,7 @@
   [(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (false, operands[0], operands[1],
  INTVAL (operands[2]));
@@ -594,11 +603,13 @@
(vec_select:SF
  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
  (parallel [(const_int 0)])))]
-  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && 

[PATCH 41/42] i386: Enable TM MMX intrinsics with SSE2

2019-02-15 Thread H.J. Lu
This pach enables TM MMX intrinsics with SSE2 when MMX is disabled.

PR target/89021
* config/i386/i386.c (bdesc_tm): Enable MMX intrinsics with
SSE2.
---
 gcc/config/i386/i386.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 073a2534d1f..319a98f824a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31065,13 +31065,13 @@ static const struct builtin_description 
bdesc_##kind[] =  \
we're lazy.  Add casts to make them fit.  */
 static const struct builtin_description bdesc_tm[] =
 {
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, 
VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, 
UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, 
UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, 
V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
 
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum 
ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
@@ -31089,7 +31089,7 @@ static const struct builtin_description bdesc_tm[] =
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
 
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, 
VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
 };
-- 
2.20.1



[PATCH 21/42] i386: Emulate MMX maskmovq with SSE2 maskmovdqu

2019-02-15 Thread H.J. Lu
Emulate MMX maskmovq with SSE2 maskmovdqu for TARGET_MMX_WITH_SSE by
zero-extending source and mask operands to 128 bits.  Handle unmapped
bits 64:127 at memory address by adjusting source and mask operands
together with memory address.

PR target/89021
* config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2
maskmovdqu for __MMX_WITH_SSE__.
---
 gcc/config/i386/xmmintrin.h | 61 +
 1 file changed, 61 insertions(+)

diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 58284378514..a915f6c87d7 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
 {
+#ifdef __MMX_WITH_SSE__
+  /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits
+ 64:127 at address __P.  */
+  typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+  typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+  /* Zero-extend __A and __N to 128 bits.  */
+  __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+  __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+
+  /* Check the alignment of __P.  */
+  __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+  if (offset)
+{
+  /* If the misalignment of __P > 8, subtract __P by 8 bytes.
+Otherwise, subtract __P by the misalignment.  */
+  if (offset > 8)
+   offset = 8;
+  __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+
+  /* Shift __A128 and __N128 to the left by the adjustment.  */
+  switch (offset)
+   {
+   case 1:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+ break;
+   case 2:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+ break;
+   case 3:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+ break;
+   case 4:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+ break;
+   case 5:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+ break;
+   case 6:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+ break;
+   case 7:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+ break;
+   case 8:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+ break;
+   default:
+ break;
+   }
+}
+  __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
+#else
   __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+#endif
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-- 
2.20.1



[PATCH 25/42] i386: Emulate MMX movntq with SSE2 movntidi

2019-02-15 Thread H.J. Lu
Emulate MMX movntq with SSE2 movntidi.  Only register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (sse_movntq): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index bcce7c06c4f..993ad99a36e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -214,12 +214,16 @@
 })
 
 (define_insn "sse_movntq"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+  [(set (match_operand:DI 0 "memory_operand" "=m,m")
+   (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
   UNSPEC_MOVNTQ))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "movntq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxmov")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   movntq\t{%1, %0|%0, %1}
+   movnti\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxmov,ssemov")
(set_attr "mode" "DI")])
 
 ;
-- 
2.20.1



[PATCH 35/42] i386: Emulate MMX abs2 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX abs2 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/sse.md (abs2): Add SSE emulation.
---
 gcc/config/i386/sse.md | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ec68b5dc2ce..92f5ad17156 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15942,16 +15942,19 @@
 })
 
 (define_insn "abs2"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
(abs:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
-  "TARGET_SSSE3"
-  "pabs\t{%1, %0|%0, %1}";
-  [(set_attr "type" "sselog1")
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pabs\t{%1, %0|%0, %1}
+   %vpabs\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "sselog1")
(set_attr "prefix_rep" "0")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 24/42] i386: Emulate MMX mmx_psadbw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_psadbw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_psadbw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b8983e1755a..bcce7c06c4f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1770,14 +1770,19 @@
(set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
-(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
+(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" 
"ym,x,Yv")]
 UNSPEC_PSADBW))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   psadbw\t{%2, %0|%0, %2}
+   psadbw\t{%2, %0|%0, %2}
+   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 31/42] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_pmulhrswv4hi3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index dc07173cb1c..80b1a46f507 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15652,25 +15652,31 @@
(set_attr "mode" "")])
 
 (define_insn "*ssse3_pmulhrswv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 14))
  (match_operand:V4HI 3 "const1_operand"))
(const_int 1]
-  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "pmulhrsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && TARGET_SSSE3
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   pmulhrsw\t{%2, %0|%0, %2}
+   pmulhrsw\t{%2, %0|%0, %2}
+   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_pshufb3"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
-- 
2.20.1



[PATCH 38/42] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE

2019-02-15 Thread H.J. Lu
PR target/89021
* config/i386/mmx.md (MMXMODE:mov): Also allow
TARGET_MMX_WITH_SSE.
(MMXMODE:*mov_internal): Likewise.
(MMXMODE:movmisalign): Likewise.
---
 gcc/config/i386/mmx.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0f925c0b1ea..a21e11c8dfb 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -70,7 +70,7 @@
 (define_expand "mov"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
@@ -81,7 +81,7 @@
 "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
 "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
-  "TARGET_MMX
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -207,7 +207,7 @@
 (define_expand "movmisalign"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
-- 
2.20.1



[PATCH 36/42] i386: Correct _pmulhrsw3[_mask]

2019-02-15 Thread H.J. Lu
There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
AVX2.  To support TARGET_MMX_WITH_SSE, replace nonimmediate_operand with
register_pmulhrswmem_operand in _pmulhrsw3.

PR target/89372
* config/i386/predicates.md (register_pmulhrswmem_operand): New.
* config/i386/sse.md (PMULHRSW): Remove V4HI.
(PMULHRSW_MMX): New.
(_pmulhrsw3): Replace PMULHRSW with
PMULHRSW_MMX.  Require TARGET_SSSE3, not TARGET_AVX2.  Replace
nonimmediate_operand with register_pmulhrswmem_operand.
---
 gcc/config/i386/predicates.md |  7 +++
 gcc/config/i386/sse.md| 15 +--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index f3c2f72de54..b7cb26a81fe 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -56,6 +56,13 @@
(and (not (match_test "TARGET_MMX_WITH_SSE"))
(match_operand 0 "memory_operand"
 
+;; Match register operands, but include memory operands for
+;; !(TARGET_MMX_WITH_SSE && mode == V4HImode).
+(define_predicate "register_pmulhrswmem_operand"
+  (ior (match_operand 0 "register_operand")
+   (and (not (match_test "TARGET_MMX_WITH_SSE && mode == V4HImode"))
+   (match_operand 0 "memory_operand"
+
 ;; True if the operand is an SSE register.
 (define_predicate "sse_reg_operand"
   (and (match_code "reg")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 92f5ad17156..379da16615d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15579,7 +15579,7 @@
(set_attr "mode" "DI,TI,TI")])
 
 (define_mode_iterator PMULHRSW
-  [V4HI V8HI (V16HI "TARGET_AVX2")])
+  [V8HI (V16HI "TARGET_AVX2")])
 
 (define_expand "_pmulhrsw3_mask"
   [(set (match_operand:PMULHRSW 0 "register_operand")
@@ -15604,21 +15604,24 @@
   ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
 })
 
+(define_mode_iterator PMULHRSW_MMX
+  [V4HI V8HI (V16HI "TARGET_AVX2")])
+
 (define_expand "_pmulhrsw3"
-  [(set (match_operand:PMULHRSW 0 "register_operand")
-   (truncate:PMULHRSW
+  [(set (match_operand:PMULHRSW_MMX 0 "register_operand")
+   (truncate:PMULHRSW_MMX
  (lshiftrt:
(plus:
  (lshiftrt:
(mult:
  (sign_extend:
-   (match_operand:PMULHRSW 1 "nonimmediate_operand"))
+   (match_operand:PMULHRSW_MMX 1 
"register_pmulhrswmem_operand"))
  (sign_extend:
-   (match_operand:PMULHRSW 2 "nonimmediate_operand")))
+   (match_operand:PMULHRSW_MMX 2 
"register_pmulhrswmem_operand")))
(const_int 14))
  (match_dup 3))
(const_int 1]
-  "TARGET_AVX2"
+  "TARGET_SSSE3"
 {
   operands[3] = CONST1_RTX(mode);
   ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
-- 
2.20.1



[PATCH 30/42] i386: Emulate MMX ssse3_pmaddubsw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_pmaddubsw with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pmaddubsw): Add SSE emulation.
---
 gcc/config/i386/sse.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0565ddc177f..dc07173cb1c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15544,17 +15544,17 @@
(set_attr "mode" "TI")])
 
 (define_insn "ssse3_pmaddubsw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(ss_plus:V4HI
  (mult:V4HI
(zero_extend:V4HI
  (vec_select:V4QI
-   (match_operand:V8QI 1 "register_operand" "0")
+   (match_operand:V8QI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0) (const_int 2)
   (const_int 4) (const_int 6)])))
(sign_extend:V4HI
  (vec_select:V4QI
-   (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+   (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)
   (const_int 4) (const_int 6)]
  (mult:V4HI
@@ -15566,13 +15566,17 @@
  (vec_select:V4QI (match_dup 2)
(parallel [(const_int 1) (const_int 3)
   (const_int 5) (const_int 7)]))]
-  "TARGET_SSSE3"
-  "pmaddubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pmaddubsw\t{%2, %0|%0, %2}
+   pmaddubsw\t{%2, %0|%0, %2}
+   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_mode_iterator PMULHRSW
   [V4HI V8HI (V16HI "TARGET_AVX2")])
-- 
2.20.1



[PATCH 29/42] i386: Emulate MMX ssse3_phdv2si3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_phdv2si3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phdv2si3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 38b83c57ffc..0565ddc177f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15356,26 +15356,44 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phdv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phdv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_concat:V2SI
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])]
-  "TARGET_SSSE3"
-  "phd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   phd\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = lowpart_subreg (V4SImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx op1 = lowpart_subreg (V4SImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx op2 = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  emit_insn (gen_ssse3_phdv4si3 (op0, op1, op2));
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_pmaddubsw256"
   [(set (match_operand:V16HI 0 "register_operand" "=x,v")
-- 
2.20.1



[PATCH 33/42] i386: Emulate MMX ssse3_psign3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_psign3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_psign3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 704e211c0b8..c2dbd59049a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15755,17 +15755,21 @@
(set_attr "mode" "")])
 
 (define_insn "ssse3_psign3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(unspec:MMXMODEI
- [(match_operand:MMXMODEI 1 "register_operand" "0")
-  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
  UNSPEC_PSIGN))]
-  "TARGET_SSSE3"
-  "psign\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   psign\t{%2, %0|%0, %2}
+   psign\t{%2, %0|%0, %2}
+   vpsign\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_palignr_mask"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
-- 
2.20.1



[PATCH 34/42] i386: Emulate MMX ssse3_palignrdi with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX version of palignrq with SSE version by concatenating 2
64-bit MMX operands into a single 128-bit SSE operand, followed by
SSE psrldq.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_palignrdi): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 58 ++
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c2dbd59049a..ec68b5dc2ce 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15824,23 +15824,61 @@
(set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "")])
 
-(define_insn "ssse3_palignrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "0")
-   (match_operand:DI 2 "nonimmediate_operand" "ym")
-   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+(define_insn_and_split "ssse3_palignrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
+   (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
+   (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
+   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
   UNSPEC_PALIGNR))]
-  "TARGET_SSSE3"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
 {
-  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
-  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+  switch (which_alternative)
+{
+case 0:
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+case 1:
+case 2:
+  return "#";
+default:
+  gcc_unreachable ();
+}
 }
-  [(set_attr "type" "sseishft")
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
+{
+  /* Emulate MMX palignrdi with SSE psrldq.  */
+  rtx op0 = lowpart_subreg (V2DImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx insn;
+  if (TARGET_AVX)
+insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
+  else
+{
+  /* NB: SSE can only concatenate OP0 and OP1 to OP0.  */
+  insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
+  emit_insn (insn);
+  /* Swap bits 0:63 with bits 64:127.  */
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2),
+ GEN_INT (3),
+ GEN_INT (0),
+ GEN_INT (1)));
+  rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
+  rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
+  insn = gen_rtx_SET (op1, op2);
+}
+  emit_insn (insn);
+  operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
 ;; modes for abs instruction on pre AVX-512 targets.
-- 
2.20.1



[PATCH 19/42] i386: Emulate MMX mmx_pmovmskb with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb
from QImode to SImode.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmovmskb): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/mmx.md | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 058791e01e6..9c552f929f1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1762,14 +1762,30 @@
   [(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
 
-(define_insn "mmx_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+(define_insn_and_split "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
   UNSPEC_MOVMSK))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pmovmskb\t{%1, %0|%0, %1}
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+   (set (match_dup 0)
+   (zero_extend:SI (match_dup 2)))]
+{
+  /* Generate SSE pmovmskb and zero-extend from QImode to SImode.  */
+  operands[1] = lowpart_subreg (V16QImode, operands[1],
+   GET_MODE (operands[1]));
+  operands[2] = lowpart_subreg (QImode, operands[0],
+   GET_MODE (operands[0]));
+}
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,ssemov")
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_maskmovq"
   [(set (match_operand:V8QI 0 "memory_operand")
-- 
2.20.1



[PATCH 15/42] i386: Emulate MMX sse_cvtpi2ps with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
destination XMM register.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (sse_cvtpi2ps): Changed to
define_insn_and_split.  Also allow TARGET_MMX_WITH_SSE.  Add
SSE emulation.
---
 gcc/config/i386/sse.md | 64 --
 1 file changed, 56 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 70e3669d115..06c9b5b58f1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4561,16 +4561,64 @@
 ;;
 ;
 
-(define_insn "sse_cvtpi2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn_and_split "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
(vec_merge:V4SF
  (vec_duplicate:V4SF
-   (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
- (match_operand:V4SF 1 "register_operand" "0")
- (const_int 3)))]
-  "TARGET_SSE"
-  "cvtpi2ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
+   (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" 
"ym,x,Yv")))
+ (match_operand:V4SF 1 "register_operand" "0,0,Yv")
+ (const_int 3)))
+   (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
+  "TARGET_SSE || TARGET_MMX_WITH_SSE"
+  "@
+   cvtpi2ps\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  rtx op2 = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  /* Generate SSE2 cvtdq2ps.  */
+  rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
+  emit_insn (insn);
+
+  /* Merge operands[3] with operands[0].  */
+  rtx mask, op1;
+  if (TARGET_AVX)
+{
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (1),
+ GEN_INT (6), GEN_INT (7)));
+  op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
+  op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+  insn = gen_rtx_SET (operands[0], op2);
+}
+  else
+{
+  /* NB: SSE can only concatenate OP0 and OP3 to OP0.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (4), GEN_INT (5)));
+  op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
+  op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+  insn = gen_rtx_SET (operands[0], op2);
+  emit_insn (insn);
+
+  /* Swap bits 0:63 with bits 64:127.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (1)));
+  rtx dest = lowpart_subreg (V4SImode, operands[0],
+GET_MODE (operands[0]));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+}
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
 
 (define_insn "sse_cvtps2pi"
-- 
2.20.1



[PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_pinsrw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_pinsrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 22547c7da6f..1e68d1bb338 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1282,32 +1282,45 @@
 (match_operand:SI 2 "nonimmediate_operand"))
  (match_operand:V4HI 1 "register_operand")
   (match_operand:SI 3 "const_0_to_3_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   operands[2] = gen_lowpart (HImode, operands[2]);
   operands[3] = GEN_INT (1 << INTVAL (operands[3]));
 })
 
 (define_insn "*mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
 (vec_merge:V4HI
   (vec_duplicate:V4HI
-(match_operand:HI 2 "nonimmediate_operand" "rm"))
- (match_operand:V4HI 1 "register_operand" "0")
+(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
   (match_operand:SI 3 "const_int_operand")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (V4HImode))"
 {
   operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  if (MEM_P (operands[2]))
-return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+{
+  if (MEM_P (operands[2]))
+   return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  else
+   return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
   else
-return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+{
+  if (MEM_P (operands[2]))
+   return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  else
+   return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_pextrw"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 20/42] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_umulv4hi3_highpart with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check
TARGET_MMX and TARGET_MMX_WITH_SSE.
(*mmx_umulv4hi3_highpart): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9c552f929f1..d78c6a31962 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -781,28 +781,34 @@
  (lshiftrt:V4SI
(mult:V4SI
  (zero_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand"))
+   (match_operand:V4HI 1 "register_mmxmem_operand"))
  (zero_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand")))
+   (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 16]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (zero_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (zero_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_int 16]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "@
+   pmulhuw\t{%2, %0|%0, %2}
+   pmulhuw\t{%2, %0|%0, %2}
+   vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand")
-- 
2.20.1



[PATCH 13/42] i386: Emulate MMX pshufw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX pshufw with SSE.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pshufw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(mmx_pshufw_1): Add SSE emulation.
(*vec_dupv4hi): Changed to define_insn_and_split and also allow
TARGET_MMX_WITH_SSE to support SSE emulation.
---
 gcc/config/i386/mmx.md | 81 +-
 1 file changed, 65 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index d568a534956..43f85064cd9 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1323,9 +1323,10 @@
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-   (match_operand:V4HI 1 "nonimmediate_operand")
+   (match_operand:V4HI 1 "register_mmxmem_operand")
(match_operand:SI 2 "const_int_operand")]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   int mask = INTVAL (operands[2]);
   emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
@@ -1337,14 +1338,15 @@
 })
 
 (define_insn "mmx_pshufw_1"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
 (vec_select:V4HI
-  (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+  (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yv")
   (parallel [(match_operand 2 "const_0_to_3_operand")
  (match_operand 3 "const_0_to_3_operand")
  (match_operand 4 "const_0_to_3_operand")
  (match_operand 5 "const_0_to_3_operand")])))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   int mask = 0;
   mask |= INTVAL (operands[2]) << 0;
@@ -1353,11 +1355,20 @@
   mask |= INTVAL (operands[5]) << 6;
   operands[2] = GEN_INT (mask);
 
-  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+  switch (which_alternative)
+{
+case 0:
+  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+case 1:
+  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+default:
+  gcc_unreachable ();
+}
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_insn "mmx_pswapdv2si2"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -1370,16 +1381,54 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
 
-(define_insn "*vec_dupv4hi"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv,Yw")
(vec_duplicate:V4HI
  (truncate:HI
-   (match_operand:SI 1 "register_operand" "0"]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pshufw\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (match_operand:SI 1 "register_operand" "0,Yv,r"]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pshufw\t{$0, %0, %0|%0, %0, 0}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  rtx op;
+  operands[0] = lowpart_subreg (V8HImode, operands[0],
+   GET_MODE (operands[0]));
+  if (TARGET_AVX2)
+{
+  operands[1] = lowpart_subreg (HImode, operands[1],
+   GET_MODE (operands[1]));
+  op = gen_rtx_VEC_DUPLICATE (V8HImode, operands[1]);
+}
+  else
+{
+  operands[1] = lowpart_subreg (V8HImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (8,
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (4),
+ GEN_INT (5),
+ GEN_INT (6),
+ GEN_INT (7)));
+
+  op = gen_rtx_VEC_SELECT (V8HImode, operands[1], mask);
+}
+  rtx insn = gen_rtx_SET (operands[0], op);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64,x64_avx")
+   (set_attr "type" "mmxcvt,sselog1,ssemov")
+   (set_attr "length_immediate" "1,1,0")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "*vec_dupv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
-- 
2.20.1



[PATCH 14/42] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.

PR target/89021
* config/i386/sse.md (sse_cvtps2pi): Add SSE emulation.
(sse_cvttps2pi): Likewise.
---
 gcc/config/i386/sse.md | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c8e0133560a..70e3669d115 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4574,26 +4574,32 @@
(set_attr "mode" "V4SF")])
 
 (define_insn "sse_cvtps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" 
"xm,YvBm")]
   UNSPEC_FIX_NOTRUNC)
  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvtps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+  "@
+   cvtps2pi\t{%1, %0|%0, %q1}
+   %vcvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
(set_attr "mode" "DI")])
 
 (define_insn "sse_cvttps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvttps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+  "@
+   cvttps2pi\t{%1, %0|%0, %q1}
+   %vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
(set_attr "prefix_rep" "0")
(set_attr "mode" "SF")])
 
-- 
2.20.1



[PATCH 23/42] i386: Emulate MMX mmx_uavgv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_uavgv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_uavgv4hi3): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_uavgv4hi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 570153521a1..b8983e1755a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1735,33 +1735,39 @@
(plus:V4SI
  (plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand"))
+ (match_operand:V4HI 1 "register_mmxmem_operand"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand")))
+ (match_operand:V4HI 2 "register_mmxmem_operand")))
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
 
 (define_insn "*mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V4HImode, operands)"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "@
+   pavgw\t{%2, %0|%0, %2}
+   pavgw\t{%2, %0|%0, %2}
+   vpavgw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
   [(set (match_operand:V1DI 0 "register_operand" "=y")
-- 
2.20.1



[PATCH 05/42] i386: Emulate MMX mulv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_mulv4hi3): Also allow
TARGET_MMX_WITH_SSE.
(mulv4hi3): New.
(*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
support.
---
 gcc/config/i386/mmx.md | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 517c3283963..cdb0f698001 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -716,19 +716,31 @@
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
-  (match_operand:V4HI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
+  (match_operand:V4HI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_expand "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+(mult:V4HI (match_operand:V4HI 1 "register_operand")
+  (match_operand:V4HI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_mulv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
-  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+  (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmullw\t{%2, %0|%0, %2}
+   pmullw\t{%2, %0|%0, %2}
+   vpmullw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_smulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 06/42] i386: Emulate MMX smulv4hi3_highpart with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_smulv4hi3_highpart): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. Add
SSE support.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index cdb0f698001..3a7964d52bb 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -748,27 +748,32 @@
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand"))
+   (match_operand:V4HI 1 "register_mmxmem_operand"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand")))
+   (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 16]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 16]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmulhw\t{%2, %0|%0, %2}
+   pmulhw\t{%2, %0|%0, %2}
+   vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_umulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 18/42] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_v4hi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(mmx_v8qi3): Likewise.
(smaxmin:v4hi3): New.
(umaxmin:v8qi3): Likewise.
(smaxmin:*mmx_v4hi3): Add SSE emulation.
(umaxmin:*mmx_v8qi3): Likewise.
---
 gcc/config/i386/mmx.md | 68 +-
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1e68d1bb338..058791e01e6 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -923,40 +923,68 @@
 (define_expand "mmx_v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
 (smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand")
- (match_operand:V4HI 2 "nonimmediate_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+ (match_operand:V4HI 1 "register_mmxmem_operand")
+ (match_operand:V4HI 2 "register_mmxmem_operand")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);")
+
+(define_expand "v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+(smaxmin:V4HI
+ (match_operand:V4HI 1 "register_operand")
+ (match_operand:V4HI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);")
 
 (define_insn "*mmx_v4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
 (smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (, V4HImode, operands)"
-  "pw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   pw\t{%2, %0|%0, %2}
+   pw\t{%2, %0|%0, %2}
+   vpw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
 (umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand")
- (match_operand:V8QI 2 "nonimmediate_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+ (match_operand:V8QI 1 "register_mmxmem_operand")
+ (match_operand:V8QI 2 "register_mmxmem_operand")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);")
+
+(define_expand "v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+(umaxmin:V8QI
+ (match_operand:V8QI 1 "register_operand")
+ (match_operand:V8QI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);")
 
 (define_insn "*mmx_v8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
 (umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (, V8QImode, operands)"
-  "pb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   pb\t{%2, %0|%0, %2}
+   pb\t{%2, %0|%0, %2}
+   vpb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_ashr3"
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
-- 
2.20.1



[PATCH 16/42] i386: Emulate MMX mmx_pextrw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_pextrw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 43f85064cd9..22547c7da6f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1310,16 +1310,20 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
 (zero_extend:SI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "y")
-   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
+   (match_operand:V4HI 1 "register_operand" "y,Yv")
+   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pextrw\t{%2, %1, %0|%0, %1, %2}
+   %vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 11/42] i386: Emulate MMX mmx_eq/mmx_gt3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_eq/mmx_gt3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_eq3): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_eq3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
support.
(mmx_gt3): Likewise.
---
 gcc/config/i386/mmx.md | 43 +-
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 510d453f0fd..b0c6a8c8077 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1017,30 +1017,39 @@
 (define_expand "mmx_eq3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
 (eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand")
- (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);")
 
 (define_insn "*mmx_eq3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (EQ, mode, operands)"
-  "pcmpeq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (EQ, mode, operands)"
+  "@
+   pcmpeq\t{%2, %0|%0, %2}
+   pcmpeq\t{%2, %0|%0, %2}
+   vpcmpeq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_gt3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (gt:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgt\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pcmpgt\t{%2, %0|%0, %2}
+   pcmpgt\t{%2, %0|%0, %2}
+   vpcmpgt\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 10/42] i386: Emulate MMX mmx_andnot3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_andnot3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (mmx_andnot3): Also allow
TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7e2d40313c3..510d453f0fd 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1049,14 +1049,18 @@
 ;
 
 (define_insn "mmx_andnot3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pandn\t{%2, %0|%0, %2}
+   pandn\t{%2, %0|%0, %2}
+   vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
-- 
2.20.1



[PATCH 07/42] i386: Emulate MMX mmx_pmaddwd with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX pmaddwd with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.
(*mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 3a7964d52bb..9f0311badca 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -810,11 +810,11 @@
  (mult:V2SI
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 1 "nonimmediate_operand")
+   (match_operand:V4HI 1 "register_mmxmem_operand")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 2 "nonimmediate_operand")
+   (match_operand:V4HI 2 "register_mmxmem_operand")
(parallel [(const_int 0) (const_int 2)]
  (mult:V2SI
(sign_extend:V2SI
@@ -823,20 +823,20 @@
(sign_extend:V2SI
  (vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_pmaddwd"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
 (plus:V2SI
  (mult:V2SI
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)]
  (mult:V2SI
(sign_extend:V2SI
@@ -845,10 +845,15 @@
(sign_extend:V2SI
  (vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmaddwd\t{%2, %0|%0, %2}
+   pmaddwd\t{%2, %0|%0, %2}
+   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmulhrwv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 09/42] i386: Emulate MMX 3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX 3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (any_logic:mmx_3): Also allow
TARGET_MMX_WITH_SSE.
(any_logic:3): New.
(any_logic:*mmx_3): Also allow TARGET_MMX_WITH_SSE.
Add SSE support.
---
 gcc/config/i386/mmx.md | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 240e0188a78..7e2d40313c3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1061,20 +1061,33 @@
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
(any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand")
- (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand")
+ (match_operand:MMXMODEI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 08/42] i386: Emulate MMX ashr3/3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ashr3/3 with SSE.  Only SSE register
source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_ashr3): Also allow
TARGET_MMX_WITH_SSE.  Add SSE emulation.
(mmx_3): Likewise.
(ashr3): New.
(3): Likewise.
---
 gcc/config/i386/mmx.md | 50 ++
 1 file changed, 36 insertions(+), 14 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9f0311badca..240e0188a78 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -959,32 +959,54 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_ashr3"
-  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
 (ashiftrt:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
-  "psra\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   psra\t{%2, %0|%0, %2}
+   psra\t{%2, %0|%0, %2}
+   vpsra\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
  (if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
+
+(define_expand "ashr3"
+  [(set (match_operand:MMXMODE24 0 "register_operand")
+(ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_MMX_WITH_SSE")
 
 (define_insn "mmx_3"
-  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
 (any_lshift:MMXMODE248
- (match_operand:MMXMODE248 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
  (if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
+
+(define_expand "3"
+  [(set (match_operand:MMXMODE248 0 "register_operand")
+(any_lshift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_MMX_WITH_SSE")
 
 ;
 ;;
-- 
2.20.1



[PATCH 02/42] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2

2019-02-15 Thread H.J. Lu
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
source operand is allowed.

2019-02-08  H.J. Lu  
Uros Bizjak  

PR target/89021
* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
New prototype.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
function.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.md (mmx_isa): New.
(enabled): Also check mmx_isa.
* config/i386/mmx.md (any_s_truncate): New code iterator.
(s_trunsuffix): New code attr.
(mmx_packsswb): Removed.
(mmx_packssdw): Likewise.
(mmx_packuswb): Likewise.
(mmx_packswb): New define_insn_and_split to emulate
MMX packsswb/packuswb with SSE2.
(mmx_packssdw): Likewise.
* config/i386/predicates.md (register_mmxmem_operand): New.
---
 gcc/config/i386/i386-protos.h |  3 ++
 gcc/config/i386/i386.c| 54 
 gcc/config/i386/i386.md   | 13 +++
 gcc/config/i386/mmx.md| 67 +++
 gcc/config/i386/predicates.md |  7 
 5 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 27f5cc13abf..a53b48438ec 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, 
rtx, rtx);
 
 extern rtx ix86_split_stack_guard (void);
 
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif /* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7d7dd80930e..d31b69d9a82 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20221,6 +20221,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, 
rtx operands[])
 gcc_unreachable ();
 }
 
+/* Move bits 64:95 to bits 32:63.  */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+  rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op));
+  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  rtx insn = gen_rtx_SET (dest, op);
+  emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  machine_mode dmode = GET_MODE (op0);
+  machine_mode smode = GET_MODE (op1);
+  machine_mode inner_dmode = GET_MODE_INNER (dmode);
+  machine_mode inner_smode = GET_MODE_INNER (smode);
+
+  /* Get the corresponding SSE mode for destination.  */
+  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+   nunits).require ();
+  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+nunits / 2).require ();
+
+  /* Get the corresponding SSE mode for source.  */
+  nunits = 16 / GET_MODE_SIZE (inner_smode);
+  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+   nunits).require ();
+
+  /* Generate SSE pack with signed/unsigned saturation.  */
+  rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0));
+  op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
+  op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
+
+  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+   op1, op2));
+  emit_insn (insn);
+
+  ix86_move_vector_high_sse_to_mmx (op0);
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 40ed93dc804..e1727676deb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,10 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
+  (const_string "base"))
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
 (eq_attr "isa" "x64_sse2")
@@ -830,6 +834,15 @@
 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
 (eq_attr "isa" "avx512vl") 

[PATCH 04/42] i386: Emulate MMX plusminus/sat_plusminus with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX plusminus/sat_plusminus with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
(plusminus:mmx_3): Check
TARGET_MMX_WITH_SSE.
(sat_plusminus:mmx_3): Likewise.
(3): New.
(*mmx_3): Add SSE emulation.
(*mmx_3): Likewise.
---
 gcc/config/i386/mmx.md | 59 +++---
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 34fecd6a745..517c3283963 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
 
 ;; All 8-byte vector modes handled by MMX
 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -663,39 +663,56 @@
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI8 0 "register_operand")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
-  "TARGET_MMX || (TARGET_SSE2 && mode == V1DImode)"
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (plusminus:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand")
+ (match_operand:MMXMODEI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
 (plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_MMX || (TARGET_SSE2 && mode == V1DImode))
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODE12 0 "register_operand")
(sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand")
- (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
 (sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand" "0,0,Yv")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 01/42] i386: Allow MMX register modes in SSE registers

2019-02-15 Thread H.J. Lu
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support MMX register modes.

PR target/89021
* config/i386/i386-c.c (ix86_target_macros_internal): Define
__MMX_WITH_SSE__ for TARGET_MMX_WITH_SSE.
* config/i386/i386.c (ix86_set_reg_reg_cost): Add support for
TARGET_MMX_WITH_SSE with VALID_MMX_REG_MODE.
(ix86_vector_mode_supported_p): Likewise.
* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
---
 gcc/config/i386/i386-c.c | 2 ++
 gcc/config/i386/i386.c   | 5 +++--
 gcc/config/i386/i386.h   | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 5e7e46fcebe..213e1b56c6b 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -548,6 +548,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
 def_or_undef (parse_in, "__CLDEMOTE__");
   if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
 def_or_undef (parse_in, "__PTWRITE__");
+  if (TARGET_MMX_WITH_SSE)
+def_or_undef (parse_in, "__MMX_WITH_SSE__");
   if (TARGET_IAMCU)
 {
   def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3e5f52175d2..7d7dd80930e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40490,7 +40490,8 @@ ix86_set_reg_reg_cost (machine_mode mode)
  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
- || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
+ || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE (mode)))
units = GET_MODE_SIZE (mode);
 }
 
@@ -44316,7 +44317,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
 return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
 return true;
-  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+  if ((TARGET_MMX ||TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
 return true;
   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
 return true;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 4fd8bc40a34..91b233022c2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #define TARGET_16BIT   TARGET_CODE16
 #define TARGET_16BIT_P(x)  TARGET_CODE16_P(x)
 
+#define TARGET_MMX_WITH_SSE(TARGET_64BIT && TARGET_SSE2)
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
-- 
2.20.1



[PATCH 00/42] V7: Emulate MMX intrinsics with SSE

2019-02-15 Thread H.J. Lu
On x86-64, since __m64 is returned and passed in XMM registers, we can
emulate MMX intrinsics with SSE instructions. To support it, we added

 #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)

;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
  (const_string "base"))

 (eq_attr "mmx_isa" "native")
   (symbol_ref "!TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64")
   (symbol_ref "TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64_avx")
   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
 (eq_attr "mmx_isa" "x64_noavx")
   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")

We added SSE emulation to MMX patterns and disabled MMX alternatives with
TARGET_MMX_WITH_SSE.

Most of MMX instructions have equivalent SSE versions and results of some
SSE versions need to be reshuffled to the right order for MMX.  Thee are
couple tricky cases:

1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
mask operand and handle unmapped bits 64:127 at memory address by
adjusting source and mask operands together with memory address.

2. MMX movntq is emulated with SSE2 DImode movnti, which is available
in 64-bit mode.

3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
SSE emulation must clear the bit 4 in the shuffle control mask.

4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
the upper 64 bits of destination XMM register.

Tests are also added to check each SSE emulation of MMX intrinsics.

There are no regressions on i686 and x86-64.  For x86-64, GCC is also
tested with

--with-arch=native --with-cpu=native

on AVX2 and AVX512F machines.

H.J. Lu (41):
  i386: Allow MMX register modes in SSE registers
  i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
  i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
  i386: Emulate MMX plusminus/sat_plusminus with SSE
  i386: Emulate MMX mulv4hi3 with SSE
  i386: Emulate MMX smulv4hi3_highpart with SSE
  i386: Emulate MMX mmx_pmaddwd with SSE
  i386: Emulate MMX ashr3/3 with SSE
  i386: Emulate MMX 3 with SSE
  i386: Emulate MMX mmx_andnot3 with SSE
  i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
  i386: Emulate MMX vec_dupv2si with SSE
  i386: Emulate MMX pshufw with SSE
  i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  i386: Emulate MMX sse_cvtpi2ps with SSE
  i386: Emulate MMX mmx_pextrw with SSE
  i386: Emulate MMX mmx_pinsrw with SSE
  i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
  i386: Emulate MMX mmx_pmovmskb with SSE
  i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
  i386: Emulate MMX maskmovq with SSE2 maskmovdqu
  i386: Emulate MMX mmx_uavgv8qi3 with SSE
  i386: Emulate MMX mmx_uavgv4hi3 with SSE
  i386: Emulate MMX mmx_psadbw with SSE
  i386: Emulate MMX movntq with SSE2 movntidi
  i386: Emulate MMX umulv1siv1di3 with SSE2
  i386: Make _mm_empty () as NOP when MMX is disabled
  i386: Emulate MMX ssse3_phwv4hi3 with SSE
  i386: Emulate MMX ssse3_phdv2si3 with SSE
  i386: Emulate MMX ssse3_pmaddubsw with SSE
  i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
  i386: Emulate MMX pshufb with SSE version
  i386: Emulate MMX ssse3_psign3 with SSE
  i386: Emulate MMX ssse3_palignrdi with SSE
  i386: Emulate MMX abs2 with SSE
  i386: Correct _pmulhrsw3[_mask]
  i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
  i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
  i386: Allow MMX intrinsic emulation with SSE
  i386: Enable TM MMX intrinsics with SSE2
  i386: Add tests for MMX intrinsic emulations with SSE

Uros Bizjak (1):
  Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE

 gcc/config/i386/constraints.md|   6 +
 gcc/config/i386/i386-builtin.def  | 126 +--
 gcc/config/i386/i386-c.c  |   2 +
 gcc/config/i386/i386-protos.h |   4 +
 gcc/config/i386/i386.c| 189 +++-
 gcc/config/i386/i386.h|   2 +
 gcc/config/i386/i386.md   |  17 +
 gcc/config/i386/mmintrin.h|  12 +-
 gcc/config/i386/mmx.md| 986 --
 gcc/config/i386/predicates.md |  14 +
 gcc/config/i386/sse.md| 368 +--
 gcc/config/i386/xmmintrin.h   |  61 ++
 gcc/testsuite/gcc.target/i386/mmx-vals.h  |  77 ++
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c   |  43 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c   |  39 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c   |  42 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c   |  40 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c   |  31 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c   |  36 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c   

[PATCH 03/42] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX

2019-02-15 Thread H.J. Lu
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/i386.c (ix86_split_mmx_punpck): New function.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
---
 gcc/config/i386/i386-protos.h |   1 +
 gcc/config/i386/i386.c|  77 +++
 gcc/config/i386/mmx.md| 138 ++
 3 files changed, 168 insertions(+), 48 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index a53b48438ec..37581837a32 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -204,6 +204,7 @@ extern rtx ix86_split_stack_guard (void);
 
 extern void ix86_move_vector_high_sse_to_mmx (rtx);
 extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d31b69d9a82..a76c17beece 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20275,6 +20275,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code 
code)
   ix86_move_vector_high_sse_to_mmx (op0);
 }
 
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  machine_mode mode = GET_MODE (op0);
+  rtx mask;
+  /* The corresponding SSE mode.  */
+  machine_mode sse_mode, double_sse_mode;
+
+  switch (mode)
+{
+case E_V8QImode:
+  sse_mode = V16QImode;
+  double_sse_mode = V32QImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+  break;
+
+case E_V4HImode:
+  sse_mode = V8HImode;
+  double_sse_mode = V16HImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+  break;
+
+case E_V2SImode:
+  sse_mode = V4SImode;
+  double_sse_mode = V8SImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+  break;
+
+default:
+  gcc_unreachable ();
+}
+
+  /* Generate SSE punpcklXX.  */
+  rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
+  op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
+  op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
+
+  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+  rtx insn = gen_rtx_SET (dest, op2);
+  emit_insn (insn);
+
+  if (high_p)
+{
+  /* Move bits 64:127 to bits 0:63.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+  dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+  emit_insn (insn);
+}
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index dbb2baa74d7..34fecd6a745 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1064,87 +1064,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 

Re: Fortran vector math header

2019-02-15 Thread Steve Kargl
On Tue, Feb 05, 2019 at 01:47:57PM +0100, Martin Liška wrote:
> 
> gcc/fortran/ChangeLog:
> 
> 2019-01-24  Martin Liska  
> 
>   * decl.c (gfc_match_gcc_builtin): Add support for filtering
>   of builtin directive based on multilib ABI name.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-01-24  Martin Liska  
> 
>   * gfortran.dg/simd-builtins-7.f90: New test.
>   * gfortran.dg/simd-builtins-7.h: New test.

The Fortran bits look ok to me.

-- 
steve


Go patch committed: Use __builtin_dwarf_cfa for getcallersp

2019-02-15 Thread Ian Lance Taylor
This patch by Cherry Zhang changes the Go compiler and runtime to use
__builtin_dwarf_cfa for getcallersp.  Currently, the compiler lowers
runtime.getcallersp to __builtin_frame_address(1).  In the C side of
the runtime, getcallersp is defined as __builtin_frame_address(0).
They don't match.  Further, neither of them actually returns the
caller's SP.  On x86_64, __builtin_frame_address(0) just returns the
frame pointer.  __builtin_frame_address(1) returns the memory content
where the frame pointer points to, which is typically the caller's
frame pointer but can also be garbage if the frame pointer is not
enabled.

This patch changes getcallersp to use __builtin_dwarf_cfa(), which
returns the caller's SP at the call site.  This matches the SP we get
from unwinding the stack.

Currently getcallersp is not used for anything real. It will be used
for precise stack scan.

Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian


2019-02-15  Cherry Zhang  

* go-gcc.cc (Gcc_backend::Gcc_backend): Define __builtin_dwarf_cfa
instead of __builtin_frame_address.
Index: gcc/go/go-gcc.cc
===
--- gcc/go/go-gcc.cc(revision 268369)
+++ gcc/go/go-gcc.cc(working copy)
@@ -734,8 +734,9 @@ Gcc_backend::Gcc_backend()
   this->define_builtin(BUILT_IN_RETURN_ADDRESS, "__builtin_return_address",
   NULL, t, false, false);
 
-  // The runtime calls __builtin_frame_address for runtime.getcallersp.
-  this->define_builtin(BUILT_IN_FRAME_ADDRESS, "__builtin_frame_address",
+  // The runtime calls __builtin_dwarf_cfa for runtime.getcallersp.
+  t = build_function_type_list(ptr_type_node, NULL_TREE);
+  this->define_builtin(BUILT_IN_DWARF_CFA, "__builtin_dwarf_cfa",
   NULL, t, false, false);
 
   // The runtime calls __builtin_extract_return_addr when recording
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 268948)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-1a74b8a22b2ff7f430729aa87ecb8cea7b5cdd70
+9605c2efd99aa9c744652a9153e208e0653b8596
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/expressions.cc
===
--- gcc/go/gofrontend/expressions.cc(revision 268923)
+++ gcc/go/gofrontend/expressions.cc(working copy)
@@ -9903,17 +9903,18 @@ Call_expression::do_lower(Gogo* gogo, Na
  && n == "getcallerpc")
{
  static Named_object* builtin_return_address;
+  int arg = 0;
  return this->lower_to_builtin(_return_address,
"__builtin_return_address",
-   0);
+   );
}
  else if ((this->args_ == NULL || this->args_->size() == 0)
   && n == "getcallersp")
{
- static Named_object* builtin_frame_address;
- return this->lower_to_builtin(_frame_address,
-   "__builtin_frame_address",
-   1);
+ static Named_object* builtin_dwarf_cfa;
+ return this->lower_to_builtin(_dwarf_cfa,
+   "__builtin_dwarf_cfa",
+   NULL);
}
}
 }
@@ -10031,21 +10032,24 @@ Call_expression::lower_varargs(Gogo* gog
   this->varargs_are_lowered_ = true;
 }
 
-// Return a call to __builtin_return_address or __builtin_frame_address.
+// Return a call to __builtin_return_address or __builtin_dwarf_cfa.
 
 Expression*
 Call_expression::lower_to_builtin(Named_object** pno, const char* name,
- int arg)
+ int* arg)
 {
   if (*pno == NULL)
-*pno = Gogo::declare_builtin_rf_address(name);
+*pno = Gogo::declare_builtin_rf_address(name, arg != NULL);
 
   Location loc = this->location();
 
   Expression* fn = Expression::make_func_reference(*pno, NULL, loc);
-  Expression* a = Expression::make_integer_ul(arg, NULL, loc);
   Expression_list *args = new Expression_list();
-  args->push_back(a);
+  if (arg != NULL)
+{
+  Expression* a = Expression::make_integer_ul(*arg, NULL, loc);
+  args->push_back(a);
+}
   Expression* call = Expression::make_call(fn, args, false, loc);
 
   // The builtin functions return void*, but the Go functions return uintptr.
Index: gcc/go/gofrontend/expressions.h
===
--- gcc/go/gofrontend/expressions.h (revision 268369)
+++ gcc/go/gofrontend/expressions.h (working copy)
@@ -2356,7 +2356,7 @@ class Call_expression : public Expressio
   

[testsuite] Couple of g++.dg/asan tweaks

2019-02-15 Thread Eric Botcazou
One of the tests in g++.dg/asan/asan_oob_test.cc uses unaligned memory 
accesses and g++.dg/asan/function-argument-3.C assumes a specific kind of 
calling conventions for vectors.

Tested on SPARC64/Linux, applied on the mainline.


2019-02-15  Eric Botcazou  

* g++.dg/asan/asan_oob_test.cc: Skip OOB_int on SPARC.
* g++.dg/asan/function-argument-3.C: Tweak for 32-bit SPARC.

-- 
Eric BotcazouIndex: g++.dg/asan/asan_oob_test.cc
===
--- g++.dg/asan/asan_oob_test.cc	(revision 268849)
+++ g++.dg/asan/asan_oob_test.cc	(working copy)
@@ -68,9 +68,13 @@ TEST(AddressSanitizer, OOB_char) {
   OOBTest();
 }
 
+// The following test uses unaligned memory accesses
+
+#if !defined(__sparc__)
 TEST(AddressSanitizer, OOB_int) {
   OOBTest();
 }
+#endif
 
 TEST(AddressSanitizer, OOBRightTest) {
   for (size_t access_size = 1; access_size <= 8; access_size *= 2) {
Index: g++.dg/asan/function-argument-3.C
===
--- g++.dg/asan/function-argument-3.C	(revision 268849)
+++ g++.dg/asan/function-argument-3.C	(working copy)
@@ -2,7 +2,16 @@
 // { dg-shouldfail "asan" }
 // { dg-additional-options "-Wno-psabi" }
 
+// On SPARC 32-bit, only vectors up to 8 bytes are passed in registers
+#if defined(__sparc__) && !defined(__sparcv9) && !defined(__arch64__)
+#define SMALL_VECTOR
+#endif
+
+#ifdef SMALL_VECTOR
+typedef int v4si __attribute__ ((vector_size (8)));
+#else
 typedef int v4si __attribute__ ((vector_size (16)));
+#endif
 
 static __attribute__ ((noinline)) int
 goo (v4si *a)
@@ -19,10 +28,14 @@ foo (v4si arg)
 int
 main ()
 {
+#ifdef SMALL_VECTOR
+  v4si v = {1,2};
+#else
   v4si v = {1,2,3,4};
+#endif
   return foo (v);
 }
 
 // { dg-output "ERROR: AddressSanitizer: stack-buffer-overflow on address.*(\n|\r\n|\r)" }
 // { dg-output "READ of size . at.*" }
-// { dg-output ".*'arg' \\(line 14\\) <== Memory access at offset \[0-9\]* overflows this variable.*" }
+// { dg-output ".*'arg' \\(line 23\\) <== Memory access at offset \[0-9\]* overflows this variable.*" }


[SPARC] Small ASAN fixes

2019-02-15 Thread Eric Botcazou
This automatically passes -funwind-tables when ASAN is used on Linux, as done 
for other architectures, and also adjusts the shadow offset in 64-bit mode.

Tested on SPARC64/Linux, applied on the mainline.


2019-02-15  Eric Botcazou  

* config/sparc/linux.h (ASAN_CC1_SPEC): Define.
(CC1_SPEC): Use GNU_USER_TARGET_CC1_SPEC and ASAN_CC1_SPEC.
* config/sparc/linux64.h (ASAN_CC1_SPEC): Likewise.
(CC1_SPEC): Likewise.
* config/sparc/sparc.c (sparc_asan_shadow_offset): Adjust for 64-bit.

-- 
Eric BotcazouIndex: config/sparc/linux.h
===
--- config/sparc/linux.h	(revision 268849)
+++ config/sparc/linux.h	(working copy)
@@ -54,10 +54,11 @@ extern const char *host_detect_local_cpu
 
 #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
 
-/* This is for -profile to use -lc_p instead of -lc.  */
-#undef	CC1_SPEC
-#define	CC1_SPEC "%{profile:-p} \
-"
+#undef  ASAN_CC1_SPEC
+#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"
+
+#undef  CC1_SPEC
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC
 
 #undef SIZE_TYPE
 #define SIZE_TYPE "unsigned int"
Index: config/sparc/linux64.h
===
--- config/sparc/linux64.h	(revision 268849)
+++ config/sparc/linux64.h	(working copy)
@@ -143,24 +143,25 @@ extern const char *host_detect_local_cpu
 
 #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
 
-#undef	CC1_SPEC
+#undef  ASAN_CC1_SPEC
+#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"
+
+#undef  CC1_SPEC
 #if DEFAULT_ARCH32_P
-#define CC1_SPEC "%{profile:-p} \
-%{m32:%{m64:%emay not use both -m32 and -m64}} \
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC \
+"%{m32:%{m64:%emay not use both -m32 and -m64}} \
 %{m64:-mptr64 -mstack-bias -mlong-double-128 \
   %{!mcpu*:-mcpu=ultrasparc} \
-  %{!mno-vis:%{!mcpu=v9:-mvis}}} \
-"
+  %{!mno-vis:%{!mcpu=v9:-mvis}}}"
 #else
-#define CC1_SPEC "%{profile:-p} \
-%{m32:%{m64:%emay not use both -m32 and -m64}} \
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC \
+"%{m32:%{m64:%emay not use both -m32 and -m64}} \
 %{m32:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \
   %{!mcpu*:-mcpu=cypress}} \
 %{mv8plus:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \
   %{!mcpu*:-mcpu=v9}} \
 %{!m32:%{!mcpu*:-mcpu=ultrasparc}} \
-%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}} \
-"
+%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}}"
 #endif
 
 /* Support for a compile-time default CPU, et cetera.  The rules are:
Index: config/sparc/sparc.c
===
--- config/sparc/sparc.c	(revision 268849)
+++ config/sparc/sparc.c	(working copy)
@@ -12524,7 +12524,7 @@ sparc_init_machine_status (void)
 static unsigned HOST_WIDE_INT
 sparc_asan_shadow_offset (void)
 {
-  return TARGET_ARCH64 ? HOST_WIDE_INT_C (0x7fff8000) : (HOST_WIDE_INT_1 << 29);
+  return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
 }
 
 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.


Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)

2019-02-15 Thread Martin Sebor

On 2/15/19 3:46 PM, Eric Botcazou wrote:

I'm ready to commit the patch once it's approved, and have been since
the day the problem was reported.


Maybe CCing whoever approved the previous patch would help?


I just pinged the patch a few minutes ago and CC'd Jason.  Sorry
about any trouble this has caused.

Martin


Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)

2019-02-15 Thread Eric Botcazou
> I'm ready to commit the patch once it's approved, and have been since
> the day the problem was reported.

Maybe CCing whoever approved the previous patch would help?

-- 
Eric Botcazou


gcc-8-20190215 is now available

2019-02-15 Thread gccadmin
Snapshot gcc-8-20190215 is now available on
  ftp://gcc.gnu.org/pub/gcc/snapshots/8-20190215/
and on various mirrors, see http://gcc.gnu.org/mirrors.html for details.

This snapshot has been generated from the GCC 8 SVN branch
with the following options: svn://gcc.gnu.org/svn/gcc/branches/gcc-8-branch 
revision 268949

You'll find:

 gcc-8-20190215.tar.xzComplete GCC

  SHA256=27a88eec101063e31b67057d6870cc0641639191a94c3aa36a497b5f746fc20e
  SHA1=95873a3d6e64f4d0f2ec3f73ef7ffda270d79931

Diffs from 8-20190208 are available in the diffs/ subdirectory.

When a particular snapshot is ready for public consumption the LATEST-8
link is updated and a message is sent to the gcc list.  Please do not use
a snapshot before it has been announced that way.


Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)

2019-02-15 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00857.html

Jason, since you approved the original patch, can you please also
review this one?  Due to the Ada test breakage there seems to be
some anxiety about getting the problem corrected soon.

Thanks
Martin

On 2/11/19 6:13 PM, Martin Sebor wrote:

The attached patch removes the assumption introduced earlier today
in my fix for bug 87996 that the valid_constant_size_p argument is
a constant expression.  I couldn't come up with a C/C++ test case
where this isn't true but apparently it can happen in Ada which I
inadvertently didn't build.  I still haven't figured out what
I have to do to build it on my Fedora 29 machine so I tested
this change by hand (besides bootstrapping w/o Ada).

The first set of instructions Google gives me don't seem to do
it:

   https://fedoraproject.org/wiki/Features/Ada_developer_tools

and neither does dnf install gcc-gnat as explained on our Wiki:

   https://gcc.gnu.org/wiki/GNAT

If someone knows the magic chant I would be grateful (it might
be helpful to also update the Wiki page -- the last change to
it was made in 2012; I volunteer to do that).

Martin




[Bug middle-end/89371] New: missed vectorisation with "#pragma omp simd collapse(2)"

2019-02-15 Thread arnaud02 at users dot sourceforge.net
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89371

Bug ID: 89371
   Summary: missed vectorisation with "#pragma omp simd
collapse(2)"
   Product: gcc
   Version: 8.2.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: middle-end
  Assignee: unassigned at gcc dot gnu.org
  Reporter: arnaud02 at users dot sourceforge.net
  Target Milestone: ---

void ff(double* res, double const* a, double const* b, int ncell, int neq)
{
#pragma omp simd collapse(2)
  for(int icell=0; icell < ncell; ++icell)
  {
  for(int ieq=0; ieq

[PATCH, og8] Don't rescan "attach" node for dereferenced struct member

2019-02-15 Thread Julian Brown
Hi,

The following (og8 branch) patch added support for
attaching/detaching from dereferenced struct members:

https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01778.html

Unfortunately I made a mistake in the portion of that patch that
inserts new alloc and firstprivate_pointer nodes for the struct base,
meaning that the node rewritten to an attach operation would be
scanned again. This is both unnecessary, and can cause problems in some
circumstances.

Tested with offloading to nvptx, no regressions and the new test passes.
I will apply (to the og8 branch) shortly.

Thanks,

Julian

ChangeLog

gcc/
* gimplify.c (gimplify_scan_omp_clauses): Avoid scanning
'c' again after creating base-pointer nodes for
dereferenced struct.

gcc/testsuite/
* gfortran.dg/goacc/derived-types-2.f90: New.
commit e374d415801588435d62ac214e0313ffd3ef2198
Author: Julian Brown 
Date:   Thu Feb 14 16:40:21 2019 -0800

[og8] Don't rescan "attach" node for dereferenced struct member

gcc/
* gimplify.c (gimplify_scan_omp_clauses): Avoid scanning 'c' again
after creating base-pointer nodes for dereferenced struct.

gcc/testsuite/
* gfortran.dg/goacc/derived-types-2.f90: New.

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 8bf11eb659e..2ff5b68e0cc 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8289,8 +8289,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		  *list_p = c2;
 		  OMP_CLAUSE_CHAIN (c2) = c3;
 		  OMP_CLAUSE_CHAIN (c3) = c;
-		  c = c3;
-		  list_p = _CLAUSE_CHAIN (c3);
 
 		  struct_deref_set->add (decl);
 		}
diff --git a/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90 b/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90
new file mode 100644
index 000..d01583fac89
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90
@@ -0,0 +1,14 @@
+module bar
+  type :: type1
+ real(8), pointer, public :: p(:) => null()
+  end type
+  type :: type2
+ class(type1), pointer :: p => null()
+  end type
+end module
+
+subroutine foo (var)
+   use bar
+   type(type2), intent(inout) :: var
+   !$acc enter data create(var%p%p)
+end subroutine


[Bug c++/88512] Too much STL in error output

2019-02-15 Thread redi at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88512

--- Comment #10 from Jonathan Wakely  ---
I've created bug 89370 to request showing the type as std::string.

[Bug c++/88512] Too much STL in error output

2019-02-15 Thread redi at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88512

--- Comment #9 from Jonathan Wakely  ---
It's easy for you to say that after looking at the reason it failed and knowing
what the code is trying to do (obviously "insert" modifies the string, so it
can't be const). But the compiler has to try every overload, and doesn't know
what your intention is, or what the word "insert" means.

The problem here is (as before) that std::string has eight overloads of insert
(and just as many for assign, and begin, and replace etc), and there's nothing
the compiler can do about that.

Potentially the compiler could be changed so that if all the overload
candidates are non-const and the object is const, it just says that. But that's
another fairly specialized diagnostic just for this case. Most classes simply
don't have eight overloads of the same function.

Re: [patch] Disable store merging in asan_expand_mark_ifn

2019-02-15 Thread Eric Botcazou
> > OK, revised patch attached.  I have manually verified that it yields the
> > expected result for an array of long doubles on 64-bit SPARC.
> > 
> > 
> > 2019-02-12  Eric Botcazou  
> > 
> > * asan.c (asan_expand_mark_ifn): Take into account the alignment of
> > the object to pick the size of stores on strict-alignment platforms.
> 
> Ok, thanks.

Glad you insisted in the end, because I have ASAN working on SPARC64/Linux, 
but only after fixing another bug on 64-bit strict-alignment platforms:

  /* Align base if target is STRICT_ALIGNMENT.  */
  if (STRICT_ALIGNMENT)
base = expand_binop (Pmode, and_optab, base,
 gen_int_mode (-((GET_MODE_ALIGNMENT (SImode)
  << ASAN_SHADOW_SHIFT)
 / BITS_PER_UNIT), Pmode), NULL_RTX,
 1, OPTAB_DIRECT);

GET_MODE_ALIGNMENT is unsigned int so this zero-extends to unsigned long...

Tested on 32-bit and 64-bit SPARC/Linux, applied on mainline as obvious.


2019-02-15  Eric Botcazou  

* asan.c (asan_emit_stack_protection): Use full-sized mask to align
the base address on 64-bit strict-alignment platforms.

-- 
Eric BotcazouIndex: asan.c
===
--- asan.c	(revision 268849)
+++ asan.c	(working copy)
@@ -1440,13 +1441,15 @@ asan_emit_stack_protection (rtx base, rt
 	base_align_bias = ((asan_frame_size + alignb - 1)
 			   & ~(alignb - HOST_WIDE_INT_1)) - asan_frame_size;
 }
+
   /* Align base if target is STRICT_ALIGNMENT.  */
   if (STRICT_ALIGNMENT)
-base = expand_binop (Pmode, and_optab, base,
-			 gen_int_mode (-((GET_MODE_ALIGNMENT (SImode)
-	  << ASAN_SHADOW_SHIFT)
-	 / BITS_PER_UNIT), Pmode), NULL_RTX,
-			 1, OPTAB_DIRECT);
+{
+  const HOST_WIDE_INT align
+	= (GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT) << ASAN_SHADOW_SHIFT;
+  base = expand_binop (Pmode, and_optab, base, gen_int_mode (-align, Pmode),
+			   NULL_RTX, 1, OPTAB_DIRECT);
+}
 
   if (use_after_return_class == -1 && pbase)
 emit_move_insn (pbase, base);
@ -1534,7 +1548,7 @@ asan_emit_stack_protection (rtx base, rt
   shadow_mem = gen_rtx_MEM (SImode, shadow_base);
   set_mem_alias_set (shadow_mem, asan_shadow_set);
   if (STRICT_ALIGNMENT)
-set_mem_align (shadow_mem, (GET_MODE_ALIGNMENT (SImode)));
+set_mem_align (shadow_mem, GET_MODE_ALIGNMENT (SImode));
   prev_offset = base_offset;
 
   asan_redzone_buffer rz_buffer (shadow_mem, prev_offset);


[Bug tree-optimization/89350] [9 Regression] Wrong -Wstringop-overflow= warning since r261518

2019-02-15 Thread msebor at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89350

Martin Sebor  changed:

   What|Removed |Added

 Status|NEW |ASSIGNED
   Assignee|unassigned at gcc dot gnu.org  |msebor at gcc dot 
gnu.org

--- Comment #9 from Martin Sebor  ---
Let me put together a patch for the negative offsets.  The argc patch is useful
on its own, independent of this bug.

Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)

2019-02-15 Thread Martin Sebor

On 2/15/19 12:24 AM, Eric Botcazou wrote:

The attached patch removes the assumption introduced earlier today
in my fix for bug 87996 that the valid_constant_size_p argument is
a constant expression.  I couldn't come up with a C/C++ test case
where this isn't true but apparently it can happen in Ada which I
inadvertently didn't build.


Can we do something here?  Our internal testers have been down for 3 days
because of this blunder...


I'm ready to commit the patch once it's approved, and have been since
the day the problem was reported.


Martin




[Bug go/89368] [9 regression] ICE in go/gofrontend/expressions.cc:4669 after r268923

2019-02-15 Thread ian at airs dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89368

Ian Lance Taylor  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Ian Lance Taylor  ---
Thanks for the report.  Fixed.

[Bug c++/84916] Tweaks to template type elision

2019-02-15 Thread dmalcolm at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84916

David Malcolm  changed:

   What|Removed |Added

   Target Milestone|9.0 |10.0

--- Comment #4 from David Malcolm  ---
I have a patch for this, queuing for gcc 10 stage 1.

[Bug c++/60920] Crash on double template header due to default template parameter

2019-02-15 Thread paolo.carlini at oracle dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60920

Paolo Carlini  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED
   Target Milestone|--- |8.0

--- Comment #4 from Paolo Carlini  ---
Fixed by the patch which fixed c++/85264.

[Bug sanitizer/80953] Support libsanitizer on Solaris

2019-02-15 Thread ebotcazou at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80953

--- Comment #35 from Eric Botcazou  ---
> Right: 64-bit Solaris/SPARC uses the full 64-bit address space:
> 
> https://docs.oracle.com/cd/E37838_01/html/E66175/advanced-2.
> html#SSFDGadvanced-5
> 
> The gap between the low and high parts cannot be determined
> programmatically right now and varies between different sparc machines.

Linux uses an equivalent VM layout (although the primary stack is in the low
part instead of the high part).  I think that on Linux the hole is fully
determined by the number of bits available for virtual addresses.

> Even if that could be fixed (there's been talk about adding an interface
> to determine that information at runtime), the current shadow address
> calculation cannot cope with such a discontinuous address space.

Right, but you can tweak it once the position of the hole is determined.  I
have done so for a 52-bit VM layout (T4 and later apparently) and, modulo fixes
all over the place (libsanitizer, libbacktrace, compiler, testsuite), I now
have a 100% clean ASAN testsuite on both 32-bit and 64-bit SPARC/Linux.

Trying on Solaris now...

[Bug go/89368] [9 regression] ICE in go/gofrontend/expressions.cc:4669 after r268923

2019-02-15 Thread ian at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89368

--- Comment #1 from ian at gcc dot gnu.org  ---
Author: ian
Date: Fri Feb 15 21:04:58 2019
New Revision: 268948

URL: https://gcc.gnu.org/viewcvs?rev=268948=gcc=rev
Log:
PR go/89368
compiler: write barrier check nil-check policy tweak

Tweak the recipe for generating writeBarrier loads to insure that the
dereference expr is marked as not requiring a nil check (not needed
for gccgo, but needed for gollvm).

Fixes https://gcc.gnu.org/PR89368

Reviewed-on: https://go-review.googlesource.com/c/162904

Modified:
trunk/gcc/go/gofrontend/MERGE
trunk/gcc/go/gofrontend/wb.cc

Go patch committed: Don't use a nil check for the write barrier

2019-02-15 Thread Ian Lance Taylor
This patch to the Go frontend by Than McIntosh tweaks the recipe for
generating writeBarrier loads to insure that the dereference expr is
marked as not requiring a nil check.  This should fix gcc PR 89368.
Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 268941)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-0563f2d018cdb2cd685c254bac5ceb38396d0a27
+1a74b8a22b2ff7f430729aa87ecb8cea7b5cdd70
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/wb.cc
===
--- gcc/go/gofrontend/wb.cc (revision 268923)
+++ gcc/go/gofrontend/wb.cc (working copy)
@@ -904,7 +904,8 @@ Gogo::check_write_barrier(Block* enclosi
   ref = Expression::make_unary(OPERATOR_AND, ref, loc);
   ref = Expression::make_cast(unsafe_pointer_type, ref, loc);
   ref = Expression::make_cast(puint32_type, ref, loc);
-  ref = Expression::make_unary(OPERATOR_MULT, ref, loc);
+  ref = Expression::make_dereference(ref,
+ Expression::NIL_CHECK_NOT_NEEDED, loc);
   Expression* zero = Expression::make_integer_ul(0, ref->type(), loc);
   Expression* cond = Expression::make_binary(OPERATOR_EQEQ, ref, zero, loc);
 


Re: GCC 8.3 Release Candidate available from gcc.gnu.org

2019-02-15 Thread Bill Seurer

On 02/15/19 10:13, Jakub Jelinek wrote:

The first release candidate for GCC 8.3 is available from

  https://gcc.gnu.org/pub/gcc/snapshots/8.3.0-RC-20190215/
  ftp://gcc.gnu.org/pub/gcc/snapshots/8.3.0-RC-20190215/

and shortly its mirrors.  It has been generated from SVN revision 268935.

I have so far bootstrapped and tested the release candidate on
x86_64-linux and i686-linux.  Please test it and report any issues to
bugzilla.

If all goes well, I'd like to release 8.3 on Friday, February 22nd.



I bootstrapped and tested on powerpc64.  power 7 BE, power 8 BE, power 8 
LE, and power 9 LE all went well.


--

-Bill Seurer



Re: GCC missing -flto optimizations? SPEC lbm benchmark

2019-02-15 Thread Eric Botcazou
> Hasn't GNAT sorted Ada elements in records (e.g. structures) by size
> since near its initial addition to GCC in the mid-90s? This results in the
> largest elements up front and minimizes the need for alignment gaps.

No, that's a serious misconception, since one of the features of GNAT is to be 
compatible with C by default as much as possible.  But we started to do some 
reordering recently when the records don't have (direct) equivalents in C.

-- 
Eric Botcazou


[Bug c++/84536] [7/8/9 Regression] ICE with non-type template parameter

2019-02-15 Thread paolo.carlini at oracle dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84536

Paolo Carlini  changed:

   What|Removed |Added

   Priority|P4  |P3
 Status|UNCONFIRMED |NEW
   Last reconfirmed||2019-02-15
 CC||paolo.carlini at oracle dot com
 Ever confirmed|0   |1

--- Comment #1 from Paolo Carlini  ---
I would argue for P3, because, if we slightly tweak it to:

template auto foo(N...); // -> void {}

void bar()
{
  foo<>();
}

we don't emit any diagnostics before ICEing.

[Bug fortran/89077] ICE using * as len specifier for character parameter

2019-02-15 Thread anlauf at gmx dot de
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89077

--- Comment #17 from Harald Anlauf  ---
(In reply to Harald Anlauf from comment #16)
> Regarding the unwanted padding with \0, the following patch seems to
> solve the issue with transfer.

Regtested cleanly and submitted here:

https://gcc.gnu.org/ml/fortran/2019-02/msg00126.html

[PR fortran/89077, patch, part 3] - ICE using * as len specifier for character parameter

2019-02-15 Thread Harald Anlauf
The attached patch is the third in a series for the above PR.
This one fixes erroneous padding with garbage characters in some
declaration and initialization expressions.

The issue here was that expr->representation is set when either
Hollerith strings are used or a TRANSFER statement is involved.
As a result, the original string could be used with trailing
garbage instead of the properly space-padded string.  The patch
simply clears expr->representation in that case.

Regtested on x86_64-pc-linux-gnu.

OK for trunk?

Thanks,
Harald

2019-02-15  Harald Anlauf  

PR fortran/89077
* decl.c (gfc_set_constant_character_len): Clear original string
representation after padding has been performed to target length.

2019-02-15  Harald Anlauf  

PR fortran/89077
* gfortran.dg/transfer_simplify_12.f90: New test.

Index: gcc/fortran/decl.c
===
--- gcc/fortran/decl.c  (revision 268946)
+++ gcc/fortran/decl.c  (working copy)
@@ -1754,6 +1754,14 @@
   free (expr->value.character.string);
   expr->value.character.string = s;
   expr->value.character.length = len;
+  /* If explicit representation was given, clear it
+as it is no longer needed after padding.  */
+  if (expr->representation.length)
+   {
+ expr->representation.length = 0;
+ free (expr->representation.string);
+ expr->representation.string = NULL;
+   }
 }
 }
 
Index: gcc/testsuite/gfortran.dg/transfer_simplify_12.f90
===
--- gcc/testsuite/gfortran.dg/transfer_simplify_12.f90  (nonexistent)
+++ gcc/testsuite/gfortran.dg/transfer_simplify_12.f90  (working copy)
@@ -0,0 +1,27 @@
+! { dg-do run }
+! { dg-options "-O -std=legacy" }
+!
+! Test fixes for some findings while resolving PR fortran/89077
+
+program test
+  implicit none
+  integer :: i
+  character(*)  ,parameter :: s =  'abcdef'   ! Length will be 6
+  character(*)  ,parameter :: h = 6Habcdef! Length will be 8 (Hollerith!)
+  character(10) ,parameter :: k = 6Habcdef
+  character(10) ,parameter :: t = transfer (s, s)
+  character(10) ,save  :: u = transfer (s, s)
+  character(10) ,parameter :: v = transfer (h, h)
+  character(10) ,save  :: w = transfer (h, h)
+  character(10) ,parameter :: x = transfer ([(s(i:i),i=len(s),1,-1)], s)
+  character(10) ,save  :: y = transfer ([(s(i:i),i=len(s),1,-1)], s)
+  if (len (h) /= 8) stop 1
+  if (h /= s) stop 2
+  if (k /= s) stop 3
+  if (t /= s) stop 4
+  if (u /= s) stop 5
+  if (v /= s) stop 6
+  if (w /= s) stop 7
+  if (x /= "fedcba") stop 8
+  if (y /= x) stop 9
+end program test


[PATCH] i386: Fix ')' in VALID_MMX_REG_MODE

2019-02-15 Thread H.J. Lu
Replace "(MODE == V1DImode)" with "(MODE) == V1DImode".

* config/i386/i386.h (VALID_MMX_REG_MODE): Correct the misplaced
')'.
---
 gcc/ChangeLog  | 5 +
 gcc/config/i386/i386.h | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d1083735e26..96f8679e8f9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2019-02-15  H.J. Lu  
+
+   * config/i386/i386.h (VALID_MMX_REG_MODE): Correct the misplaced
+   ')'.
+
 2019-02-15  Uroš Bizjak  
 
* config/i386/darwin.h (TARGET_FPMATH_DEFAULT_P): New define.
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index d9039060997..4fd8bc40a34 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1158,7 +1158,7 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
   ((MODE) == V2SFmode || (MODE) == SFmode)
 
 #define VALID_MMX_REG_MODE(MODE)   \
-  ((MODE == V1DImode) || (MODE) == DImode  \
+  ((MODE) == V1DImode || (MODE) == DImode  \
|| (MODE) == V2SImode || (MODE) == SImode   \
|| (MODE) == V4HImode || (MODE) == V8QImode)
 
-- 
2.20.1



[Bug target/89372] New: Incorrect PMULHRSW

2019-02-15 Thread hjl.tools at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89372

Bug ID: 89372
   Summary: Incorrect PMULHRSW
   Product: gcc
   Version: 9.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: hjl.tools at gmail dot com
CC: ubizjak at gmail dot com
  Target Milestone: ---
Target: i386,x86-64

sse.md has

(define_mode_iterator PMULHRSW
  [V4HI V8HI (V16HI "TARGET_AVX2")])

(define_expand "_pmulhrsw3_mask"
  [(set (match_operand:PMULHRSW 0 "register_operand")
(vec_merge:PMULHRSW
  (truncate:PMULHRSW
(lshiftrt:
  (plus:
(lshiftrt:
  (mult:
(sign_extend:
  (match_operand:PMULHRSW 1 "nonimmediate_operand"))
(sign_extend:
  (match_operand:PMULHRSW 2 "nonimmediate_operand")))
  (const_int 14))
(match_dup 5))
  (const_int 1)))
  (match_operand:PMULHRSW 3 "register_operand")
  (match_operand: 4 "register_operand")))]
  "TARGET_AVX512BW && TARGET_AVX512VL"
{
  operands[5] = CONST1_RTX(mode);
  ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
})

But AVX512BW doesn't support V4HI.

(define_expand "_pmulhrsw3"
  [(set (match_operand:PMULHRSW 0 "register_operand")
(truncate:PMULHRSW
  (lshiftrt:
(plus:
  (lshiftrt:
(mult:
  (sign_extend:
(match_operand:PMULHRSW 1 "nonimmediate_operand"))
  (sign_extend:
(match_operand:PMULHRSW 2 "nonimmediate_operand")))
(const_int 14))
  (match_dup 3))
(const_int 1]
  "TARGET_AVX2"
{
  operands[3] = CONST1_RTX(mode);
  ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
})


But V4HI V8HI doesn't require AVX2.

[Bug c/88944] Suggested alternative C stdbool.h

2019-02-15 Thread dmalcolm at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88944

David Malcolm  changed:

   What|Removed |Added

 Status|UNCONFIRMED |ASSIGNED
   Last reconfirmed||2019-02-15
   Assignee|unassigned at gcc dot gnu.org  |dmalcolm at gcc dot 
gnu.org
   Target Milestone|--- |10.0
 Ever confirmed|0   |1

--- Comment #1 from David Malcolm  ---
Thanks.

FWIW trunk (gcc 9) no longer suggests "_Bool".

I have a patch pending (for gcc 10) which implements the header suggestion.

[Bug c++/89367] Constexpr expression is not constexpr in template, but is constexpr in non-template.

2019-02-15 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89367

Jakub Jelinek  changed:

   What|Removed |Added

 CC||hubicka at gcc dot gnu.org,
   ||jakub at gcc dot gnu.org,
   ||jason at gcc dot gnu.org,
   ||mpolacek at gcc dot gnu.org,
   ||redi at gcc dot gnu.org

--- Comment #3 from Jakub Jelinek  ---
With -fpic it actually rejects both static_asserts.
In all the cases, it is symtab_node::equal_address_to that attempts to decide
if the two VAR_DECLs need to be the same or distinct or unknown.
Without templates and without -fpic that function returns true, because both
are defined locally and we can easily decide that they must be different, but
with -fpic, the generic code allows them to be interposed and fails.  Even
without -fpic, for templates the variables are DECL_COMDAT and thus not really
known if defined by the current or some other TU.

No idea if C++ standard says anything here and constexpr evaluation shouldn't
use some different rules (or in addition to what the generic code says).

Re: libgo patch committed: Add S/390 support to internal/cpu package

2019-02-15 Thread Matthias Klose
On 15.02.19 15:52, Ian Lance Taylor wrote:
> This patch by Robin Dapp adds S/390 support to the internal/cpu
> package.  This partially addresses PR 89123.  I bootstrapped it on
> x86_64-pc-linux-gnu, which means little.  Committed to mainline.

fails in the -m31 multilib variant with

libtool: compile:  /<>/build/./gcc/xgcc
-B/<>/build/./gcc/ -B/usr/s390x-linux-gnu/bin/
-B/usr/s390x-linux-gnu/lib/ -isystem /usr/s390x-linux-gnu/include -isystem
/usr/s390x-linux-gnu/sys-include -isys
tem /<>/build/sys-include -m31 -DHAVE_CONFIG_H -I.
-I../../../../src/libgo -I ../../../../src/libgo/
runtime -I../../../../src/libgo/../libffi/include -I../libffi/include -pthread
-L../libatomic/.libs -fexceptions
-fnon-call-exceptions -fno-stack-protector -fsplit-stack -Wall -Wextra
-Wwrite-strings -Wcast-qual -D_GNU_SOURCE
-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -I ../../../../src/libgo/../libgcc -I
../../../../src/libgo/../libback
trace -I ../../../gcc/include -g -O2 -m31 -c
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c  -fPIC -DPIC -o in
ternal/cpu/.libs/cpu_gccgo.o

../../../../src/libgo/go/internal/cpu/cpu_gccgo.c: Assembler messages:
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:91: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:105: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:119: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:134: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:149: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:164: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:179: Error: Unrecognized
opcode: `lghi'
make[10]: *** [Makefile:2899: internal/cpu/cpu_gccgo.lo] Error 1

make[10]: *** Waiting for unfinished jobs
make[10]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo'
make[9]: *** [Makefile:2242: all-recursive] Error 1
make[9]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo'
make[8]: *** [Makefile:1167: all] Error 2
make[8]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo'
make[7]: *** [Makefile:3062: multi-do] Error 1

using binutils 2.32


[Bug c++/89370] Output std::string in diagnostics instead of std::__cxx11::basic_string<_CharT, _Traits, _Alloc>

2019-02-15 Thread jg at jguk dot org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89370

Jonny Grant  changed:

   What|Removed |Added

 CC||jg at jguk dot org

--- Comment #1 from Jonny Grant  ---
Many thanks!
Happy to pay $200 bounty for this feature to implementer or FSF/GNU etc

[Bug c++/88512] Too much STL in error output

2019-02-15 Thread jg at jguk dot org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88512

--- Comment #12 from Jonny Grant  ---
(In reply to Jonathan Wakely from comment #9)
> It's easy for you to say that after looking at the reason it failed and
> knowing what the code is trying to do (obviously "insert" modifies the
> string, so it can't be const). But the compiler has to try every overload,
> and doesn't know what your intention is, or what the word "insert" means.
> 
> The problem here is (as before) that std::string has eight overloads of
> insert (and just as many for assign, and begin, and replace etc), and
> there's nothing the compiler can do about that.
> 
> Potentially the compiler could be changed so that if all the overload
> candidates are non-const and the object is const, it just says that. But
> that's another fairly specialized diagnostic just for this case. Most
> classes simply don't have eight overloads of the same function.

Sounds good, those insert methods aren't 'const' so at least the first line
could say that...

eg:
$ g++ -Wall -c -o int int.cpp
int.cpp: In function ‘int main()’:
int.cpp:13:21: error: no matching function for call to
‘std::string::insert(size_t, const string&) const’
   str.insert(4, str2);
 ^
   Note, ‘str’ is const and all ‘std::string::insert()’ methods are non-const

[Bug middle-end/89371] missed vectorisation with "#pragma omp simd collapse(2)"

2019-02-15 Thread jakub at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89371

Jakub Jelinek  changed:

   What|Removed |Added

   Keywords||missed-optimization, openmp
 CC||jakub at gcc dot gnu.org

--- Comment #1 from Jakub Jelinek  ---
Well, in this case I don't see any advantage of using collapsed loops, it won't
make the loop more readable and is much harder to vectorize, so why not to
write it that way?
Yes, there is certainly room for improvement to undo this kind of stuff.

Re: riscv64 dep. computation

2019-02-15 Thread Paulo Matos



On 15/02/2019 19:15, Jim Wilson wrote:
> On Thu, Feb 14, 2019 at 11:33 PM Paulo Matos  wrote:
>> Are global variables not supposed to alias each other?
>> If I indeed do that, gcc still won't group loads and stores:
>> https://cx.rv8.io/g/rFjGLa
> 
> I meant something like
> struct foo_t x, y;
> and now they clearly don't alias.  As global pointers they may still alias.
> 

Ah ok, of course. Like that it makes sense they don't alias.

Thanks,

-- 
Paulo Matos


[Bug c++/88512] Too much STL in error output

2019-02-15 Thread redi at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88512

--- Comment #11 from Jonathan Wakely  ---
There's also Bug 53281

[Bug c++/89370] New: Output std::string in diagnostics instead of std::__cxx11::basic_string<_CharT, _Traits, _Alloc>

2019-02-15 Thread redi at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89370

Bug ID: 89370
   Summary: Output std::string in diagnostics instead of
std::__cxx11::basic_string<_CharT, _Traits, _Alloc>
   Product: gcc
   Version: 9.0
Status: UNCONFIRMED
  Keywords: diagnostic
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: redi at gcc dot gnu.org
  Target Milestone: ---

Errors involving std::string are unreadable (see Bug 88512 comment 8). Nobody
cares that it's:

'std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::iterator
std::__cxx11::basic_string<_CharT, _Traits,
_Alloc>::insert(std::__cxx11::basic_string<_CharT, _Traits,
_Alloc>::__const_iterator, _CharT) [with _CharT = char; _Traits =
std::char_traits; _Alloc = std::allocator;
std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::iterator =
__gnu_cxx::__normal_iterator >;
typename __gnu_cxx::__alloc_traits::rebind<_CharT>::other>::pointer = char*;
std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::__const_iterator =
__gnu_cxx::__normal_iterator >;
typename __gnu_cxx::__alloc_traits::rebind<_CharT>::other>::const_pointer =
const char*]'
 1745 |   insert(__const_iterator __p, _CharT __c)
  |   ^~


The type std::__cxx11::basic_string<_CharT, _Traits, _Alloc> should be shown as
std::string. That alone makes it far more readable. And if we don't show it as
a template specialization maybe we don't need the entire "[with ...]" part at
all, but if we do need it, we should reduce it ...

I don't think expanding the return type or the __const_iterator type in the
"[with ...]" part is very useful:

std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::iterator =
__gnu_cxx::__normal_iterator >;

std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::iterator =
__gnu_cxx::__normal_iterator >;


And why on Earth is it telling us what the pointer and const_pointer types
are?! They don't appear in the member function signature at all!

typename __gnu_cxx::__alloc_traits::rebind<_CharT>::other>::pointer = char*;

 typename __gnu_cxx::__alloc_traits::rebind<_CharT>::other>::const_pointer =
const char*

Cool story bro. Those types are not relevant.

[Bug lto/87525] [7/8/9 Regression] infinite loop generated for fread() if enabling -flto and -D_FORTIFY_SOURCE=2

2019-02-15 Thread jamborm at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87525

Martin Jambor  changed:

   What|Removed |Added

 CC||jamborm at gcc dot gnu.org

--- Comment #19 from Martin Jambor  ---
(In reply to Jan Hubicka from comment #14)
> Martin,
> it seems to me that ipa-cp should not clone extern inline functions unless
> it sees that it helps to clone some real functions called from it. Why the
> costmodel thinks it is profitable?

It does not special-case that kind of situation.  Still it is not
easy, because the cloning decisions are made on the final sweep over
the call graph from callers to callees and whether any callees would
be cloned will only be decided in the future.

Still, the cost model should probably evaluate these cases
differently, perhaps like the patch below.  To the extent to which I
am able to reproduce the issue (only on gcc 8 and in the "correct"
case it segfaults as opposed to endless loop), the adjustment helps,
but it is of course only a heuristics.

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index e868b9c2623..f148ceef393 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -4685,15 +4685,23 @@ decide_about_value (struct cgraph_node *node, int
index, HOST_WIDE_INT offset,
   fprintf (dump_file, " (caller_count: %i)\n", caller_count);
 }

-  if (!good_cloning_opportunity_p (node, val->local_time_benefit,
-  freq_sum, count_sum,
-  val->local_size_cost)
-  && !good_cloning_opportunity_p (node,
- val->local_time_benefit
- + val->prop_time_benefit,
- freq_sum, count_sum,
- val->local_size_cost
- + val->prop_size_cost))
+  if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
+{
+  if (!good_cloning_opportunity_p (node, val->prop_time_benefit,
+  freq_sum, count_sum,
+  val->local_size_cost
+  + val->prop_size_cost))
+   return false;
+}
+  else if (!good_cloning_opportunity_p (node, val->local_time_benefit,
+   freq_sum, count_sum,
+   val->local_size_cost)
+  && !good_cloning_opportunity_p (node,
+  val->local_time_benefit
+  + val->prop_time_benefit,
+  freq_sum, count_sum,
+  val->local_size_cost
+  + val->prop_size_cost))
 return false;

   if (dump_file)

Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE

2019-02-15 Thread Uros Bizjak
On Fri, Feb 15, 2019 at 7:20 PM H.J. Lu  wrote:
> > I went through the code again, and looks OK in general, modulo
> > mmx_nonimmediate_operand issue and a couple of minor issues.
> >
> > Please substitute nonimmediate_operand predicate with
> > mmx_nonimmediate_operand in expanders and insn patterns. Please note
>
> Can we keep nonimmediate_operand in expanders, like

No, expander should also be changed. The way expanders are called is -
if the operand can't satisfy the predicate, then move it to a
register. So, for TARGET_MMX_WITH_SSE, we allow memory operand which
isn't allowed by relevant insn pattern -> ICE.

There is nothing RA can do here. Operand type, produced by expander
must match predicate in the insn pattern to satisfy insn pattern.
Otherwise, the compiler will ICE way before RA comes into play. Also,
in the insn pattern, the constraints must allow a subset of an operand
predicate if we want RA to fixup the operand.

Uros.

> (define_expand "3"
>   [(set (match_operand:MMXMODEI 0 "register_operand")
> (plusminus:MMXMODEI
>   (match_operand:MMXMODEI 1 "nonimmediate_operand")
>   (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
>   "TARGET_MMX_WITH_SSE"
>   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
>
> (define_insn "*mmx_3"
>   [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
> (plusminus:MMXMODEI8
>   (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv")
>   (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
>   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
>&& ix86_binary_operator_ok (, mode, operands)"
>   "@
>p\t{%2, %0|%0, %2}
>p\t{%2, %0|%0, %2}
>vp\t{%2, %1, %0|%0, %1, %2}"
>   [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
>(set_attr "type" "mmxadd,sseadd,sseadd")
>(set_attr "mode" "DI,TI,TI")])
>
> Can RA do the right thing?
>
> > that the proposed convention is to name the operand
> > register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest
> > we name the predicate in this way.
>
> I will rename it to register_mmxmem_operand.
>
> > There is an issue with a change to emms pattern.
> >
> > And let's remove _mm_empty () calls from testcases; they complicate
> > things too much for no apparent benefit.
>
> Will do.
>
> > With those issues fixed, the patchset is OK for gcc-10 when it opens.
> >
> > Uros.
> >
> > > H.J. Lu (41):
> > >   i386: Allow MMX register modes in SSE registers
> > >   i386: Add mmx_nonimmediate_operand
> > >   i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
> > >   i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
> > >   i386: Emulate MMX plusminus/sat_plusminus with SSE
> > >   i386: Emulate MMX mulv4hi3 with SSE
> > >   i386: Emulate MMX smulv4hi3_highpart with SSE
> > >   i386: Emulate MMX mmx_pmaddwd with SSE
> > >   i386: Emulate MMX ashr3/3 with SSE
> > >   i386: Emulate MMX 3 with SSE
> > >   i386: Emulate MMX mmx_andnot3 with SSE
> > >   i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
> > >   i386: Emulate MMX vec_dupv2si with SSE
> > >   i386: Emulate MMX pshufw with SSE
> > >   i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
> > >   i386: Emulate MMX sse_cvtpi2ps with SSE
> > >   i386: Emulate MMX mmx_pextrw with SSE
> > >   i386: Emulate MMX mmx_pinsrw with SSE
> > >   i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
> > >   i386: Emulate MMX mmx_pmovmskb with SSE
> > >   i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
> > >   i386: Emulate MMX maskmovq with SSE2 maskmovdqu
> > >   i386: Emulate MMX mmx_uavgv8qi3 with SSE
> > >   i386: Emulate MMX mmx_uavgv4hi3 with SSE
> > >   i386: Emulate MMX mmx_psadbw with SSE
> > >   i386: Emulate MMX movntq with SSE2 movntidi
> > >   i386: Emulate MMX umulv1siv1di3 with SSE2
> > >   i386: Make _mm_empty () as NOP when MMX is disabled
> > >   i386: Emulate MMX ssse3_phwv4hi3 with SSE
> > >   i386: Emulate MMX ssse3_phdv2si3 with SSE
> > >   i386: Emulate MMX ssse3_pmaddubsw with SSE
> > >   i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
> > >   i386: Emulate MMX pshufb with SSE version
> > >   i386: Emulate MMX ssse3_psign3 with SSE
> > >   i386: Emulate MMX ssse3_palignrdi with SSE
> > >   i386: Emulate MMX abs2 with SSE
> > >   i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
> > >   i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
> > >   i386: Allow MMX intrinsic emulation with SSE
> > >   i386: Enable TM MMX intrinsics with SSE2
> > >   i386: Add tests for MMX intrinsic emulations with SSE
> > >
> > > Uros Bizjak (1):
> > >   Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE
> > >
> > >  gcc/config/i386/constraints.md|   6 +
> > >  gcc/config/i386/i386-builtin.def  | 126 +--
> > >  gcc/config/i386/i386-c.c  |   2 +
> > >  gcc/config/i386/i386-protos.h |   4 +
> > >  gcc/config/i386/i386.c| 189 +++-
> > >  gcc/config/i386/i386.h   

  1   2   3   >