This adds an OpenACC specific thread synchronization builtin and the nvptx pattern it expands to. Committed to gomp-4_0-branch.

Bernd
Index: gcc/ChangeLog.gomp
===================================================================
--- gcc/ChangeLog.gomp	(revision 223869)
+++ gcc/ChangeLog.gomp	(working copy)
@@ -1,5 +1,13 @@
 2015-05-29  Bernd Schmidt  <ber...@codesourcery.com>
 
+	* config/nvptx/nvptx.md (UNSPECV_BARSYNC): New constant.
+	(oacc_threadbarrier): New expander.
+	(threadbarrier_insn): New pattern.
+	* config/nvptx/nvptx.c (nvptx_cannot_copy_insn_p):
+	* omp-builtins.def (BUILT_IN_GOACC_THREADBARRIER): Define.
+	* builtins.c (expand_oacc_threadbarrier): Expand it.
+	(expand_builtin): Handle it.
+
 	* gimple.def (GIMPLE_OMP_ENTRY_END): New code.
 	* gimple.h (gimple_build_omp_entry_end): Declare.
 	(CASE_GIMPLE_OMP): Add GIMPLE_OMP_ENTRY_END.
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 223867)
+++ gcc/builtins.c	(working copy)
@@ -5946,6 +5946,19 @@ expand_builtin_acc_on_device (tree exp,
 #endif
 }
 
+/* Expand a thread synchronization point for OpenACC threads.  */
+static void
+expand_oacc_threadbarrier (void)
+{
+#ifdef HAVE_oacc_threadbarrier
+  rtx insn = GEN_FCN (CODE_FOR_oacc_threadbarrier) ();
+  if (insn != NULL_RTX)
+    {
+      emit_insn (insn);
+    }
+#endif
+}
+
 
 /* Expand a thread-id/thread-count builtin for OpenACC.  */
 static rtx
@@ -7217,6 +7230,10 @@ expand_builtin (tree exp, rtx target, rt
     case BUILT_IN_GOACC_THREAD_BROADCAST_LL:
       return expand_builtin_oacc_thread_broadcast (exp, target);
 
+    case BUILT_IN_GOACC_THREADBARRIER:
+      expand_oacc_threadbarrier ();
+      return const0_rtx;
+
     default:	/* just do library call, if unknown builtin */
       break;
     }
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 223867)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -2123,14 +2123,20 @@ nvptx_vector_alignment (const_tree type)
   return MIN (align, BIGGEST_ALIGNMENT);
 }
 
+/* Indicate that INSN cannot be duplicated.  This is true for insns
+   that generate a unique id.  To be on the safe side, we also
+   exclude instructions that have to be executed simultaneously by
+   all threads in a warp.  */
+
 static bool
 nvptx_cannot_copy_insn_p (rtx_insn *insn)
 {
   if (recog_memoized (insn) == CODE_FOR_oacc_thread_broadcastsi)
     return true;
+  if (recog_memoized (insn) == CODE_FOR_threadbarrier_insn)
+    return true;
   return false;
 }
-
 
 /* Record a symbol for mkoffload to enter into the mapping table.  */
 
@@ -2255,7 +2261,7 @@ nvptx_file_end (void)
 #undef TARGET_VECTOR_ALIGNMENT
 #define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
 
-#undef  TARGET_CANNOT_COPY_INSN_P
+#undef TARGET_CANNOT_COPY_INSN_P
 #define TARGET_CANNOT_COPY_INSN_P nvptx_cannot_copy_insn_p
 
 struct gcc_target targetm = TARGET_INITIALIZER;
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 223867)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -62,6 +62,7 @@ (define_c_enum "unspecv" [
    UNSPECV_CAS
    UNSPECV_XCHG
    UNSPECV_WARP_BCAST
+   UNSPECV_BARSYNC
 ])
 
 (define_attr "subregs_ok" "false,true"
@@ -1457,3 +1458,16 @@ (define_insn "atomic_fetch_<logic><mode>
 	(match_dup 1))]
   "<MODE>mode == SImode || TARGET_SM35"
   "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;")
+
+;; ??? Mark as not predicable later?
+(define_insn "threadbarrier_insn"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_BARSYNC)]
+  ""
+  "bar.sync\\t%0;")
+
+(define_expand "oacc_threadbarrier"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_BARSYNC)]
+  ""
+{
+  operands[0] = const0_rtx;
+})
Index: gcc/omp-builtins.def
===================================================================
--- gcc/omp-builtins.def	(revision 223867)
+++ gcc/omp-builtins.def	(working copy)
@@ -81,6 +81,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREAD
 		   BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREAD_BROADCAST_LL, "GOACC_thread_broadcast_ll",
 		   BT_FN_ULONGLONG_ULONGLONG, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREADBARRIER, "GOACC_threadbarrier",
+		   BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST)
 
 DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device",
 			    BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)

Reply via email to