On 03/02/2018 05:55 PM, Cesar Philippidis wrote:
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 28ae263c867..ac2731233dd 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1418,10 +1418,16 @@
[(set_attr "atomic" "true")])
(define_insn "nvptx_barsync"
- [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
+ [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
+ (match_operand:SI 1 "const_int_operand")]
UNSPECV_BARSYNC)]
""
- "\\tbar.sync\\t%0;"
+ {
+ if (!REG_P (operands[0]))
+ return "\\tbar.sync\\t%0;";
+ else
+ return "\\tbar.sync\\t%0, %1;";
+ }
[(set_attr "predicable" "false")])
This is wrong. The first operand can be a register or a constant, and
the second operand is independent. Whether or not we print the second
operand is independent of whether the first is a register.
In this patch I've reserved INTVAL (operands[1]) == 0 for the "no second
operand" case.
Committed.
Thanks,
- Tom
[nvptx] Add thread count parm to bar.sync
2018-03-23 Tom de Vries <t...@codesourcery.com>
* config/nvptx/nvptx.md (nvptx_barsync): Add and handle operand.
* config/nvptx/nvptx.c (nvptx_cta_sync): Change arguments to take in a
lock and thread count. Update call to gen_nvptx_barsync.
(nvptx_single, nvptx_process_pars): Update calls to nvptx_cta_sync.
---
gcc/config/nvptx/nvptx.c | 22 ++++++++++++++--------
gcc/config/nvptx/nvptx.md | 10 ++++++++--
3 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 12441cb..32f2efb 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -3939,13 +3939,14 @@ nvptx_shared_propagate (bool pre_p, bool is_call, basic_block block,
return empty;
}
-/* Emit a CTA-level synchronization barrier. We use different
- markers for before and after synchronizations. */
+/* Emit a CTA-level synchronization barrier (bar.sync). LOCK is the
+ barrier number, which is an integer or a register. THREADS is the
+ number of threads controlled by the barrier. */
static rtx
-nvptx_cta_sync (bool after)
+nvptx_cta_sync (rtx lock, int threads)
{
- return gen_nvptx_barsync (GEN_INT (after));
+ return gen_nvptx_barsync (lock, GEN_INT (threads));
}
#if WORKAROUND_PTXJIT_BUG
@@ -4195,6 +4196,8 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
/* Includes worker mode, do spill & fill. By construction
we should never have worker mode only. */
broadcast_data_t data;
+ rtx barrier = GEN_INT (0);
+ int threads = 0;
data.base = oacc_bcast_sym;
data.ptr = 0;
@@ -4207,14 +4210,14 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
false),
before);
/* Barrier so other workers can see the write. */
- emit_insn_before (nvptx_cta_sync (false), tail);
+ emit_insn_before (nvptx_cta_sync (barrier, threads), tail);
data.offset = 0;
emit_insn_before (nvptx_gen_shared_bcast (pvar, PM_write, 0, &data,
false), tail);
/* This barrier is needed to avoid worker zero clobbering
the broadcast buffer before all the other workers have
had a chance to read this instance of it. */
- emit_insn_before (nvptx_cta_sync (false), tail);
+ emit_insn_before (nvptx_cta_sync (barrier, threads), tail);
}
extract_insn (tail);
@@ -4331,12 +4334,15 @@ nvptx_process_pars (parallel *par)
bool empty = nvptx_shared_propagate (true, is_call,
par->forked_block, par->fork_insn,
false);
+ rtx barrier = GEN_INT (0);
+ int threads = 0;
if (!empty || !is_call)
{
/* Insert begin and end synchronizations. */
- emit_insn_before (nvptx_cta_sync (false), par->forked_insn);
- emit_insn_before (nvptx_cta_sync (false), par->join_insn);
+ emit_insn_before (nvptx_cta_sync (barrier, threads),
+ par->forked_insn);
+ emit_insn_before (nvptx_cta_sync (barrier, threads), par->join_insn);
}
}
else if (par->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 2b4bcb3a..2609222 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1421,10 +1421,16 @@
[(set_attr "atomic" "true")])
(define_insn "nvptx_barsync"
- [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
+ [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
+ (match_operand:SI 1 "const_int_operand")]
UNSPECV_BARSYNC)]
""
- "\\tbar.sync\\t%0;"
+ {
+ if (INTVAL (operands[1]) == 0)
+ return "\\tbar.sync\\t%0;";
+ else
+ return "\\tbar.sync\\t%0, %1;";
+ }
[(set_attr "predicable" "false")])
(define_insn "nvptx_nounroll"