From b1865c39a3e4f43e66c221a7eb803a084c13434e Mon Sep 17 00:00:00 2001
From: Sebastian Peryt <sebastian.peryt@intel.com>
Date: Wed, 9 May 2018 13:49:43 +0300
Subject: [PATCH] PTWRITE intrinsics

---
 gcc/common/config/i386/i386-common.c      | 15 +++++++++++++++
 gcc/config/i386/cpuid.h                   |  4 ++++
 gcc/config/i386/driver-i386.c             | 13 ++++++++++++-
 gcc/config/i386/i386-builtin.def          |  2 ++
 gcc/config/i386/i386-c.c                  |  2 ++
 gcc/config/i386/i386.c                    | 22 ++++++++++++++++++++--
 gcc/config/i386/i386.h                    |  2 ++
 gcc/config/i386/i386.md                   | 10 ++++++++++
 gcc/config/i386/i386.opt                  |  4 ++++
 gcc/config/i386/immintrin.h               | 24 ++++++++++++++++++++++++
 gcc/doc/extend.texi                       |  8 ++++++++
 gcc/doc/invoke.texi                       | 11 +++++++----
 gcc/testsuite/gcc.target/i386/ptwrite-1.c | 16 ++++++++++++++++
 13 files changed, 126 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/ptwrite-1.c

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 2629ae6..d8c687d 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -152,6 +152,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
 #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
 #define OPTION_MASK_ISA_MOVDIR64B_SET OPTION_MASK_ISA_MOVDIR64B
+#define OPTION_MASK_ISA_PTWRITE_SET OPTION_MASK_ISA_PTWRITE
 
 /* Define a set of ISAs which aren't available when a given ISA is
    disabled.  MMX and SSE ISAs are handled separately.  */
@@ -228,6 +229,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_VPCLMULQDQ_UNSET OPTION_MASK_ISA_VPCLMULQDQ
 #define OPTION_MASK_ISA_MOVDIRI_UNSET OPTION_MASK_ISA_MOVDIRI
 #define OPTION_MASK_ISA_MOVDIR64B_UNSET OPTION_MASK_ISA_MOVDIR64B
+#define OPTION_MASK_ISA_PTWRITE_UNSET OPTION_MASK_ISA_PTWRITE
 
 /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
    as -mno-sse4.1. */
@@ -1082,6 +1084,19 @@ ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_mptwrite:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE_SET;
+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_PTWRITE_UNSET;
+	  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_UNSET;
+	}
+      return true;
+
     case OPT_mf16c:
       if (value)
 	{
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index a26c5e4..151788c 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -124,6 +124,10 @@
 #define bit_XSAVEC	(1 << 1)
 #define bit_XSAVES	(1 << 3)
 
+/* PT sub leaf (%eax == 14, %ecx == 0) */
+/* %ebx.  */
+#define bit_PTWRITE	(1 << 4)
+
 /* Signatures for different CPU implementations as returned in uses
    of cpuid with level 0.  */
 #define signature_AMD_ebx	0x68747541
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 88cf6ea..ec433ee 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -425,6 +425,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_vpclmulqdq = 0;
   unsigned int has_movdiri = 0, has_movdir64b = 0;
 
+  unsigned int has_ptwrite = 0;
+
   bool arch;
 
   unsigned int l2sizekb = 0;
@@ -538,6 +540,13 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_xsaves = eax & bit_XSAVES;
     }
 
+  if (max_level >= 0x14)
+    {
+      __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
+
+      has_ptwrite = ebx & bit_PTWRITE;
+    }
+
   /* Check cpuid level of extended features.  */
   __cpuid (0x80000000, ext_level, ebx, ecx, edx);
 
@@ -1108,6 +1117,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
       const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
       const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
+      const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
+
       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
 			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
 			popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
@@ -1120,7 +1131,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 			avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
 			clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
 			avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
-			avx512bitalg, movdiri, movdir64b, NULL);
+			avx512bitalg, movdiri, movdir64b, ptwrite, NULL);
     }
 
 done:
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 1b9c63a..6e131cd 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2846,6 +2846,8 @@ BDESC_END (ARGS2, SPECIAL_ARGS2)
 BDESC_FIRST (special_args2, SPECIAL_ARGS2,
 	OPTION_MASK_ISA_WBNOINVD, CODE_FOR_wbnoinvd, "__builtin_ia32_wbnoinvd", IX86_BUILTIN_WBNOINVD, UNKNOWN, (int) VOID_FTYPE_VOID)
 BDESC (OPTION_MASK_ISA_MOVDIR64B, CODE_FOR_nothing, "__builtin_ia32_movdir64b", IX86_BUILTIN_MOVDIR64B, UNKNOWN, (int) VOID_FTYPE_PVOID_PCVOID)
+BDESC (OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritesi, "__builtin_ia32_ptwrite32", IX86_BUILTIN_PTWRITE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
+BDESC (OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritedi, "__builtin_ia32_ptwrite64", IX86_BUILTIN_PTWRITE64, UNKNOWN, (int) VOID_FTYPE_UINT64)
 
 BDESC_END (SPECIAL_ARGS2, MPX)
 
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 3df599c..2bedd56 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -516,6 +516,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__MOVDIRI__");
   if (isa_flag2 & OPTION_MASK_ISA_MOVDIR64B)
     def_or_undef (parse_in, "__MOVDIR64B__");
+  if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
+    def_or_undef (parse_in, "__PTWRITE__");
   if (TARGET_IAMCU)
     {
       def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 70e87fb..fbee338 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2772,7 +2772,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
     { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
     { "-mclzero",	OPTION_MASK_ISA_CLZERO },
     { "-mmwaitx",	OPTION_MASK_ISA_MWAITX },
-    { "-mmovdir64b",	OPTION_MASK_ISA_MOVDIR64B }
+    { "-mmovdir64b",	OPTION_MASK_ISA_MOVDIR64B },
+    { "-mptwrite",	OPTION_MASK_ISA_PTWRITE }
   };
   static struct ix86_target_opts isa_opts[] =
   {
@@ -3455,6 +3456,7 @@ ix86_option_override_internal (bool main_args_p,
   const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
   const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
   const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
+  /* Add PTA_PTWRITE here if it has a cpu.  */
 
   const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
     | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -5387,6 +5389,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
     IX86_ATTR_ISA ("movdiri", OPT_mmovdiri),
     IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
+    IX86_ATTR_ISA ("ptwrite",  OPT_mptwrite),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
@@ -31298,6 +31301,7 @@ ix86_init_mmx_sse_builtins (void)
   const struct builtin_description * d;
   enum ix86_builtin_func_type ftype;
   size_t i;
+  tree decl;
 
   /* Add all special builtins with variable number of operands.  */
   for (i = 0, d = bdesc_special_args;
@@ -31325,7 +31329,21 @@ ix86_init_mmx_sse_builtins (void)
 	continue;
 
       ftype = (enum ix86_builtin_func_type) d->flag;
-      def_builtin2 (d->mask, d->name, ftype, d->code);
+      decl = def_builtin2 (d->mask, d->name, ftype, d->code);
+
+      /* Avoid edges for ptwrites generated by vartrace pass.  */
+      if (decl)
+	{
+	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+						    NULL_TREE);
+	  TREE_NOTHROW (decl) = 1;
+	}
+      else
+	{
+	  ix86_builtins_isa[(int)d->code].leaf_p = true;
+	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
+	}
+
     }
   BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST,
 		 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 787bf9f..c4f518a 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -189,6 +189,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_MOVDIRI_P(x) TARGET_ISA_MOVDIRI_P(x)
 #define TARGET_MOVDIR64B	TARGET_ISA_MOVDIR64B
 #define TARGET_MOVDIR64B_P(x) TARGET_ISA_MOVDIR64B_P(x)
+#define TARGET_PTWRITE	TARGET_ISA_PTWRITE
+#define TARGET_PTWRITE_P(x)	TARGET_ISA_PTWRITE_P(x)
 
 #define TARGET_LP64	TARGET_ABI_64
 #define TARGET_LP64_P(x)	TARGET_ABI_64_P(x)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 97b1ca3..a3e04b2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -291,6 +291,8 @@
   UNSPECV_CLRSSBSY
   UNSPECV_MOVDIRI
   UNSPECV_MOVDIR64B
+
+  UNSPECV_PTWRITE
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -20976,6 +20978,14 @@
   "movdir64b\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
 
+(define_insn "ptwrite<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+		    UNSPECV_PTWRITE)]
+  "TARGET_PTWRITE"
+  "ptwrite\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index d9bd909a..a4824b0 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -909,6 +909,10 @@ mrdrnd
 Target Report Mask(ISA_RDRND) Var(ix86_isa_flags) Save
 Support RDRND built-in functions and code generation.
 
+mptwrite
+Target Report Mask(ISA_PTWRITE) Var(ix86_isa_flags2) Save
+Support PTWRITE built-in functions and code generation.
+
 mf16c
 Target Report Mask(ISA_F16C) Var(ix86_isa_flags) Save
 Support F16C built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 63c5e59..1e649a6 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -243,4 +243,28 @@ _rdrand64_step (unsigned long long *__P)
 
 #endif /* __x86_64__  */
 
+#ifndef __PTWRITE__
+#pragma GCC push_options
+#pragma GCC target("ptwrite")
+#define __DISABLE_PTWRITE__
+#endif
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite64 (unsigned long long __B)
+{
+  __builtin_ia32_ptwrite64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite32 (unsigned __B)
+{
+  __builtin_ia32_ptwrite32 (__B);
+}
+#ifdef __DISABLE_PTWRITE__
+#undef __DISABLE_PTWRITE__
+#pragma GCC pop_options
+#endif /* __DISABLE_PTWRITE__ */
+
 #endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 9d08584..7dad71c 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21478,6 +21478,14 @@ unsigned int __builtin_ia32_rdrand32_step (unsigned int *)
 unsigned int __builtin_ia32_rdrand64_step (unsigned long long *)
 @end smallexample
 
+The following built-in function is available when @option{-mptwrite} is
+used.  All of them generate the machine instruction that is part of the
+name.
+
+@smallexample
+void __builtin_ia32_ptwrite32 (unsigned) void __builtin_ia32_ptwrite64
+(unsigned long long) @end smallexample
+
 The following built-in functions are available when @option{-msse4a} is used.
 All of them generate the machine instruction that is part of the name.
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6019e1f..ed462ae 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1254,8 +1254,8 @@ See RS/6000 and PowerPC Options.
 -mmmx  -msse  -msse2  -msse3  -mssse3  -msse4.1  -msse4.2  -msse4  -mavx @gol
 -mavx2  -mavx512f  -mavx512pf  -mavx512er  -mavx512cd  -mavx512vl @gol
 -mavx512bw  -mavx512dq  -mavx512ifma  -mavx512vbmi  -msha  -maes @gol
--mpclmul  -mfsgsbase  -mrdrnd  -mf16c  -mfma -mpconfig -mwbnoinvd @gol
--mprefetchwt1  -mclflushopt  -mxsavec  -mxsaves @gol
+-mpclmul  -mfsgsbase  -mptwrite  -mrdrnd  -mf16c  -mfma @gol
+-mpconfig -mwbnoinvd -mprefetchwt1  -mclflushopt  -mxsavec  -mxsaves @gol
 -msse4a  -m3dnow  -m3dnowa  -mpopcnt  -mabm  -mbmi  -mtbm  -mfma4  -mxop @gol
 -mlzcnt  -mbmi2  -mfxsr  -mxsave  -mxsaveopt  -mrtm  -mlwp  -mmpx  @gol
 -mmwaitx  -mclzero  -mpku  -mthreads -mgfni  -mvaes  @gol
@@ -27157,6 +27157,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mfsgsbase
 @opindex mfsgsbase
 @need 200
+@itemx -mptwrite
+@opindex mptwrite
+@need 200
 @itemx -mrdrnd
 @opindex mrdrnd
 @need 200
@@ -27262,8 +27265,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @opindex mavx512vpopcntdq
 These switches enable the use of instructions in the MMX, SSE,
 SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD,
-SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
-AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES,
+SHA, AES, PCLMUL, FSGSBASE, PTWRITE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP,
+ABM, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES,
 FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2,
 GFNI, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B,
 AVX512VPOPCNTDQ3DNow!@: or enhanced 3DNow!@: extended instruction sets.
diff --git a/gcc/testsuite/gcc.target/i386/ptwrite-1.c b/gcc/testsuite/gcc.target/i386/ptwrite-1.c
new file mode 100644
index 0000000..e689e4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ptwrite-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite" } */
+/* { dg-final { scan-assembler-times "ptwrite\[ \\t\]" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "ptwrite\[ \\t\]" 2 { target { ! ia32} } } } */
+
+#include "immintrin.h"
+
+void
+test_ptwrite (unsigned __A, unsigned long long __B)
+{
+  _ptwrite32 (__A);
+
+#ifdef __x86_64__
+  _ptwrite64 (__B);
+#endif
+}
-- 
2.5.5

