Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-23 Thread Jeff Law
On 10/16/2017 04:56 AM, Wilco Dijkstra wrote:
> This patch cleans up autopref scheduling.
> 
> The code is greatly simplified.  Sort accesses on the offset first, and
> only if the offsets are the same fall back to other comparisons in
> rank_for_schedule.  This doesn't at all restore the original behaviour
> since we no longer compare the base address, but it now defines a total
> sorting order.  More work will be required to improve the sorting so
> that only loads/stores with the same base are affected.
> 
> AArch64 bootstrap completes.
> 
> OK for commit?
> 
> ChangeLog:
> 2017-10-03  Wilco Dijkstra  
> 
>     PR rtl-optimization/82396
>     * gcc/haifa-sched.c (autopref_multipass_init): Simplify
>     initialization.
>     (autopref_rank_data): Simplify sort order.
>     * gcc/sched-int.h (autopref_multipass_data_): Remove
>     multi_mem_insn_p, min_offset and max_offset.
OK. Sorry for the delay.

jeff


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-16 Thread Maxim Kuvyrkov
> On Oct 16, 2017, at 1:56 PM, Wilco Dijkstra  wrote:
> 
> This patch cleans up autopref scheduling.
> 
> The code is greatly simplified.  Sort accesses on the offset first, and
> only if the offsets are the same fall back to other comparisons in
> rank_for_schedule.  This doesn't at all restore the original behaviour
> since we no longer compare the base address, but it now defines a total
> sorting order.  More work will be required to improve the sorting so
> that only loads/stores with the same base are affected.
> 
> AArch64 bootstrap completes.
> 
> OK for commit?

Looks good to me.  I'm not an official scheduler maintainer, so wait for an ack 
from a maintainer.

--
Maxim Kuvyrkov
www.linaro.org




[PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-16 Thread Wilco Dijkstra
This patch cleans up autopref scheduling.

The code is greatly simplified.  Sort accesses on the offset first, and
only if the offsets are the same fall back to other comparisons in
rank_for_schedule.  This doesn't at all restore the original behaviour
since we no longer compare the base address, but it now defines a total
sorting order.  More work will be required to improve the sorting so
that only loads/stores with the same base are affected.

AArch64 bootstrap completes.

OK for commit?

ChangeLog:
2017-10-03  Wilco Dijkstra  

    PR rtl-optimization/82396
    * gcc/haifa-sched.c (autopref_multipass_init): Simplify
    initialization.
    (autopref_rank_data): Simplify sort order.
    * gcc/sched-int.h (autopref_multipass_data_): Remove
    multi_mem_insn_p, min_offset and max_offset.
--

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 
549e8961411ecd0a04ac3b24ba78b5d53e63258a..77ba8c5292a0801bbbcb35ed61ab3040015f6677
 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -5568,9 +5568,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 
   gcc_assert (data->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED);
   data->base = NULL_RTX;
-  data->min_offset = 0;
-  data->max_offset = 0;
-  data->multi_mem_insn_p = false;
+  data->offset = 0;
   /* Set insn entry initialized, but not relevant for auto-prefetcher.  */
   data->status = AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
 
@@ -5585,10 +5583,9 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 {
   int n_elems = XVECLEN (pat, 0);
 
-  int i = 0;
-  rtx prev_base = NULL_RTX;
-  int min_offset = 0;
-  int max_offset = 0;
+  int i, offset;
+  rtx base, prev_base = NULL_RTX;
+  int min_offset = INT_MAX;
 
   for (i = 0; i < n_elems; i++)
 {
@@ -5596,38 +5593,23 @@ autopref_multipass_init (const rtx_insn *insn, int 
write)
   if (GET_CODE (set) != SET)
 return;
 
- rtx base = NULL_RTX;
- int offset = 0;
   if (!analyze_set_insn_for_autopref (set, write, , ))
 return;
 
- if (i == 0)
-   {
- prev_base = base;
- min_offset = offset;
- max_offset = offset;
-   }
   /* Ensure that all memory operations in the PARALLEL use the same
  base register.  */
- else if (REGNO (base) != REGNO (prev_base))
+ if (i > 0 && REGNO (base) != REGNO (prev_base))
 return;
- else
-   {
- min_offset = MIN (min_offset, offset);
- max_offset = MAX (max_offset, offset);
-   }
+ prev_base = base;
+ min_offset = MIN (min_offset, offset);
 }
 
-  /* If we reached here then we have a valid PARALLEL of multiple memory
-    ops with prev_base as the base and min_offset and max_offset
-    containing the offsets range.  */
+  /* If we reached here then we have a valid PARALLEL of multiple memory 
ops
+    with prev_base as the base and min_offset containing the offset.  */
   gcc_assert (prev_base);
   data->base = prev_base;
-  data->min_offset = min_offset;
-  data->max_offset = max_offset;
-  data->multi_mem_insn_p = true;
+  data->offset = min_offset;
   data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
-
   return;
 }
 
@@ -5637,7 +5619,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 return;
 
   if (!analyze_set_insn_for_autopref (set, write, >base,
-  >min_offset))
+  >offset))
 return;
 
   /* This insn is relevant for the auto-prefetcher.
@@ -5646,63 +5628,6 @@ autopref_multipass_init (const rtx_insn *insn, int write)
   data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
 }
 
-
-/* Helper for autopref_rank_for_schedule.  Given the data of two
-   insns relevant to the auto-prefetcher modelling code DATA1 and DATA2
-   return their comparison result.  Return 0 if there is no sensible
-   ranking order for the two insns.  */
-
-static int
-autopref_rank_data (autopref_multipass_data_t data1,
-    autopref_multipass_data_t data2)
-{
-  /* Simple case when both insns are simple single memory ops.  */
-  if (!data1->multi_mem_insn_p && !data2->multi_mem_insn_p)
-    return data1->min_offset - data2->min_offset;
-
-  /* Two load/store multiple insns.  Return 0 if the offset ranges
- overlap and the difference between the minimum offsets otherwise.  */
-  else if (data1->multi_mem_insn_p && data2->multi_mem_insn_p)
-    {
-  int min1 = data1->min_offset;
-  int max1 = data1->max_offset;
-  int min2 = data2->min_offset;
-  int max2 = data2->max_offset;
-
-  if (max1 < min2 || min1 > max2)
-   return min1 - min2;
-  else
-   return 0;
-    }
-
-  /* The other two cases is a pair of a load/store multiple and
- a simple memory op.  Return 0 if 

Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-06 Thread Wilco Dijkstra
Maxim Kuvyrkov wrote:

Note I've committed: https://gcc.gnu.org/ml/gcc-patches/2017-10/msg00309.html 
which does
change qsort to (qsort) like Jakub proposed.

> I think that this is the best solution so far.  Could you add a comment like 
> the following?
> ==
> Ideally, we would call autopref_rank_data() here, but we can't since it is 
> not guaranteed
> to return transitive results fro multi_mem_insns.  We use an approximation 
> here and rely
> on lookahead_guard below to force instruction order according to 
> autopref_rank_data().
> ==

The issue is that autopref_rank_data() doesn't do anything useful. It checks 
for overlaps
which shouldn't happen much, if at all. And as discussed declaring a comparison 
as
"unordered" is simply not possible. lookahead_guard can't fix things up either, 
so there is
really no point in keeping this function. Similarly all the min/max offset 
calculations are
redundant even if we assume the offsets in a LDP/STP instruction are completely 
random.

Wilco







Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-06 Thread Maxim Kuvyrkov
> On Oct 5, 2017, at 11:28 PM, Alexander Monakov  wrote:
> 
> On Thu, 5 Oct 2017, Maxim Kuvyrkov wrote:
>> I'm still working on analysis, but it appears to me that Alexander's patch
>> (current state of trunk) fails qsort check due to not being symmetric for
>> load/store analysis (write == 0 or write == 1) in comparisons with
>> "irrelevant" instructions.  Wilco's patch does not seem to address that, and,
>> possibly, makes the failure latent (I may be wrong here, it's late and I
>> didn't finish analysis yet).
> 
> Yes, your analysis is incomplete, it should be easy to see that for 
> always-false
> multi_mem_insn_p, autopref_rank_for_schedule implements lexicographical order.
> The problem is that when multi_mem_insn_p may be true, autopref_rank_data is
> not guaranteed to be transitive.

Agree.

> 
> I think your patch loses transitivity in autopref_rank_for_schedule, see 
> Wilco's
> response.

Agree.

> 
> FWIW, this hunk from my patch posted back on Friday is sufficient to restore
> bootstrap as confirmed (again, back on Friday) by Steve.  It avoids the fancy
> non-transitive comparison for qsort (but autopref_rank_data is still used in
> multipref_dfa_lookahead_guard).
> 
> (I'm surprised Kyrill wasn't Cc'ed - adding him now)
> 
> Alexander
> 
>   * haifa-sched.c (autopref_rank_for_schedule): Do not invoke
>   autopref_rank_data, order by min_offset.
> 
> diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
> index 549e8961411..cea1242e1f1 100644
> --- a/gcc/haifa-sched.c
> +++ b/gcc/haifa-sched.c
> @@ -5725,7 +5669,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, 
> const rtx_insn *insn2)
>   int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
> 
>   if (!irrel1 && !irrel2)
> - r = autopref_rank_data (data1, data2);
> + r = data1->min_offset - data2->min_offset;
>   else
>   r = irrel2 - irrel1;
> }

I think that this is the best solution so far.  Could you add a comment like 
the following?
==
Ideally, we would call autopref_rank_data() here, but we can't since it is not 
guaranteed to return transitive results fro multi_mem_insns.  We use an 
approximation here and rely on lookahead_guard below to force instruction order 
according to autopref_rank_data().
==

I think that the above patch qualifies as obvious to unbreak the bootstrap.  
Wilco, any objection to the above fix?

Regards,

--
Maxim Kuvyrkov
www.linaro.org







Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-06 Thread Christophe Lyon
On 5 October 2017 at 22:28, Alexander Monakov  wrote:
> On Thu, 5 Oct 2017, Maxim Kuvyrkov wrote:
>> I'm still working on analysis, but it appears to me that Alexander's patch
>> (current state of trunk) fails qsort check due to not being symmetric for
>> load/store analysis (write == 0 or write == 1) in comparisons with
>> "irrelevant" instructions.  Wilco's patch does not seem to address that, and,
>> possibly, makes the failure latent (I may be wrong here, it's late and I
>> didn't finish analysis yet).
>
> Yes, your analysis is incomplete, it should be easy to see that for 
> always-false
> multi_mem_insn_p, autopref_rank_for_schedule implements lexicographical order.
> The problem is that when multi_mem_insn_p may be true, autopref_rank_data is
> not guaranteed to be transitive.
>
> I think your patch loses transitivity in autopref_rank_for_schedule, see 
> Wilco's
> response.
>
> FWIW, this hunk from my patch posted back on Friday is sufficient to restore
> bootstrap as confirmed (again, back on Friday) by Steve.  It avoids the fancy
> non-transitive comparison for qsort (but autopref_rank_data is still used in
> multipref_dfa_lookahead_guard).
>
> (I'm surprised Kyrill wasn't Cc'ed - adding him now)
>
> Alexander
>
> * haifa-sched.c (autopref_rank_for_schedule): Do not invoke
> autopref_rank_data, order by min_offset.
>
> diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
> index 549e8961411..cea1242e1f1 100644
> --- a/gcc/haifa-sched.c
> +++ b/gcc/haifa-sched.c
> @@ -5725,7 +5669,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, 
> const rtx_insn *insn2)
>int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
>
>if (!irrel1 && !irrel2)
> -   r = autopref_rank_data (data1, data2);
> +   r = data1->min_offset - data2->min_offset;
>else
> r = irrel2 - irrel1;
>  }

Hi,

FWIW, I've ran validations with this small patch, and it fixes:
- the aarch64-linux-gnu build problem
- a few other regressions (ICEs) that had probably already been
reported (not sure I saw all the regression reports after r253295)

See 
http://people.linaro.org/~christophe.lyon/cross-validation/gcc-test-patches/253456-aarch64-bootstrap.patch/report-build-info.html

Where "REF-BUILDFAILED" for aarch64-linux-gnu means the reference
build failed and the patched build succeeded.
The "REGRESSED" cell for aarch64-none-elf with ilp32 only "restores" a
previous failure of gcc.target/aarch64/aapcs64/func-ret-3.c execution,
 -O3 -g

Thanks,

Christophe


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-05 Thread Alexander Monakov
On Thu, 5 Oct 2017, Maxim Kuvyrkov wrote:
> I'm still working on analysis, but it appears to me that Alexander's patch
> (current state of trunk) fails qsort check due to not being symmetric for
> load/store analysis (write == 0 or write == 1) in comparisons with
> "irrelevant" instructions.  Wilco's patch does not seem to address that, and,
> possibly, makes the failure latent (I may be wrong here, it's late and I
> didn't finish analysis yet).

Yes, your analysis is incomplete, it should be easy to see that for always-false
multi_mem_insn_p, autopref_rank_for_schedule implements lexicographical order.
The problem is that when multi_mem_insn_p may be true, autopref_rank_data is
not guaranteed to be transitive.

I think your patch loses transitivity in autopref_rank_for_schedule, see Wilco's
response.

FWIW, this hunk from my patch posted back on Friday is sufficient to restore
bootstrap as confirmed (again, back on Friday) by Steve.  It avoids the fancy
non-transitive comparison for qsort (but autopref_rank_data is still used in
multipref_dfa_lookahead_guard).

(I'm surprised Kyrill wasn't Cc'ed - adding him now)

Alexander

* haifa-sched.c (autopref_rank_for_schedule): Do not invoke
autopref_rank_data, order by min_offset.

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 549e8961411..cea1242e1f1 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -5725,7 +5669,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, const 
rtx_insn *insn2)
   int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
 
   if (!irrel1 && !irrel2)
-   r = autopref_rank_data (data1, data2);
+   r = data1->min_offset - data2->min_offset;
   else
r = irrel2 - irrel1;
 }


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-05 Thread Wilco Dijkstra
Maxim Kuvyrkov wrote:

> What the heck, I'll jump in as well.

Why not - how many engineers does it take to sort?!?

> I'm still working on analysis, but it appears to me that Alexander's patch 
> (current state of trunk) fails qsort check due to not being symmetric for
> load/store analysis (write == 0 or write == 1) in comparisons with 
> "irrelevant"
> instructions.  Wilco's patch  does not seem to address that, and, possibly,
> makes the failure latent (I may be wrong here, it's late and I didn't finish 
> analysis yet).
>
> I'm currently bootstrapping the following patch (on aarch64-linux-gnu, 
> arm-linux-gnueabihf will follow tomorrow), which (like Wilco's patch) seems
> to unbreak bootstrap, but is less invasive and preserves handling of
> multi_mem_insns.  Would please interested  parties help me test it?

So what you're saying is that if we compare a load with a store (or a store
with a load), we should always return 0 and use the original instruction order?
This won't fix the issue of overlaps in autopref_rank_data, but I believe this
creates a new sorting order problem:

A. Load off 8
B. Store off 12
C. Load off 4

So A < B (instruction order), B < C (instruction order) but A > C (offset 
order)...

The key here is that sorting functions should only return zero if the two
objects being compared are really indistinguishable, not as a way to say they
are not comparable(!!!) and then defer to a different comparison order. It is
impossible to combine multiple different comparisons to create a total sorting
order that way. 

However it is feasible to divide things into several classes, order the classes
with respect to each other and use a different sort within each class. If you 
want
to treat loads/stores equally, that's fine, but then they end up in the same 
class
and thus you have to use a comparison that orders both loads and stores.

Wilco

Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-05 Thread Steve Ellcey
On Thu, 2017-10-05 at 20:33 +0300, Maxim Kuvyrkov wrote:

> I'm currently bootstrapping the following patch (on aarch64-linux-
> gnu, arm-linux-gnueabihf will follow tomorrow), which (like Wilco's
> patch) seems to unbreak bootstrap, but is less invasive and preserves
> handling of multi_mem_insns.  Would please interested parties help me
> test it?
> 
> Comments?

This patch is not applying to my ToT.  The code
in autopref_rank_for_schedule doesn't seem to match what I have in my
tree.

Steve Ellcey


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-05 Thread Maxim Kuvyrkov
> On Oct 5, 2017, at 8:20 PM, Steve Ellcey  wrote:
> 
> On Wed, 2017-10-04 at 13:24 +, Wilco Dijkstra wrote:
>> Richard Sandiford wrote:
>>> 
>>> 
>>> I don't think it's reasonable to commit this as obvious.  You said
>>> yourself in the covering message that "it doesn't at all restore
>>> the original behaviour since we no longer compare the base address".
>>> So even with the bootstrap failure, I think the patch needed review
>>> before going in.
>>> 
>>> Christophe's message doesn't change anything because you knew when you
>>> posted the patch that it fixed the failure.
>> Well my understanding was that it is OK to fix a bootstrap failure. I 
>> believe my
>> patch is trivial since it mostly removes redundant code. Also I took Jakub's
>> review as an OK as there were no technical objections. However since you
>> seem to disagree, I will revert it.
>> 
>> We have now had 5 days of no builds for a major target, which is a huge
>> inconvenience. So I don't think it is reasonable to wait any longer.
>> The alternative is to revert the original patch that caused the bootstrap 
>> failure
>> plus the patch(es) that unexpectedly changed the behaviour of the scheduler
>> (I don't think there was any testing as to what effect those had on the 
>> schedule).
>> 
>> So the question is who will do that and when?
>> 
>> Wilco
> 
> The aarch64 bootstrap is still broken.  I am adding the scheduler
> maintainers to the CC list on this email to see if one on them can
> review/approve Wilco's patch which was applied and then reverted.  If
> not, can one of the global maintainers revert the original patch that
> broke the bootstrap?

What the heck, I'll jump in as well.

I'm still working on analysis, but it appears to me that Alexander's patch 
(current state of trunk) fails qsort check due to not being symmetric for 
load/store analysis (write == 0 or write == 1) in comparisons with "irrelevant" 
instructions.  Wilco's patch does not seem to address that, and, possibly, 
makes the failure latent (I may be wrong here, it's late and I didn't finish 
analysis yet).

I'm currently bootstrapping the following patch (on aarch64-linux-gnu, 
arm-linux-gnueabihf will follow tomorrow), which (like Wilco's patch) seems to 
unbreak bootstrap, but is less invasive and preserves handling of 
multi_mem_insns.  Would please interested parties help me test it?

Comments?

--
Maxim Kuvyrkov
www.linaro.org



0001-Fix-autopref_rank_for_schedule.patch
Description: Binary data


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-05 Thread Steve Ellcey
On Wed, 2017-10-04 at 13:24 +, Wilco Dijkstra wrote:
> Richard Sandiford wrote:
> > 
> > 
> > I don't think it's reasonable to commit this as obvious.  You said
> > yourself in the covering message that "it doesn't at all restore
> > the original behaviour since we no longer compare the base address".
> > So even with the bootstrap failure, I think the patch needed review
> > before going in.
> > 
> > Christophe's message doesn't change anything because you knew when you
> > posted the patch that it fixed the failure.
> Well my understanding was that it is OK to fix a bootstrap failure. I believe 
> my
> patch is trivial since it mostly removes redundant code. Also I took Jakub's
> review as an OK as there were no technical objections. However since you
> seem to disagree, I will revert it.
> 
> We have now had 5 days of no builds for a major target, which is a huge
> inconvenience. So I don't think it is reasonable to wait any longer.
> The alternative is to revert the original patch that caused the bootstrap 
> failure
> plus the patch(es) that unexpectedly changed the behaviour of the scheduler
> (I don't think there was any testing as to what effect those had on the 
> schedule).
> 
> So the question is who will do that and when?
> 
> Wilco

The aarch64 bootstrap is still broken.  I am adding the scheduler
maintainers to the CC list on this email to see if one on them can
review/approve Wilco's patch which was applied and then reverted.  If
not, can one of the global maintainers revert the original patch that
broke the bootstrap?

Steve Ellcey
sell...@cavium.com


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Eric Botcazou
> We have now had 5 days of no builds for a major target, which is a huge
> inconvenience. So I don't think it is reasonable to wait any longer.

well, to put things in perspective, you broke the SPARC compiler about one 
month ago and have done anything AFAIK since then to repair the breakage so...

-- 
Eric Botcazou


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Jakub Jelinek
On Wed, Oct 04, 2017 at 02:12:11PM +0100, Ramana Radhakrishnan wrote:
> On Wed, Oct 4, 2017 at 12:01 PM, Richard Sandiford
>  wrote:
> > Wilco Dijkstra  writes:
> >> Christophe Lyon wrote:
> >>
> >>> FWIW, I confirm that this patch fixes the build failure I reported at:
> >>> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html
> >>
> >> Thanks, now committed as r253399.
> >
> > I don't think it's reasonable to commit this as obvious.  You said
> > yourself in the covering message that "it doesn't at all restore
> > the original behaviour since we no longer compare the base address".
> > So even with the bootstrap failure, I think the patch needed review
> > before going in.
> 
> I agree that this patch shouldn't have gone in without review from a
> maintainer of the appropriate area regardless of whether this fixes a
> bootstrap failure or not.
> 
> However we need a scheduler maintainer or global reviewer to please
> help review this patch or help come up with an alternative patch. A
> primary platform broken for 5 days with a commit and no public
> response from the original poster is really not appropriate behaviour
> in this community. If not, the original patch should have been
> considered for reverting under the 48 hour rule .

A workaround that could be committed as obvious would be wrapping
some qsort call affected by this into (), i.e. instead of doing
qsort (...); do (qsort) (...);
That way you revert to previous behavior for the problematic qsort and not
change anything else.
The committed patch isn't in any way obvious and really needs to be reviewed
by a scheduler maintainer.

Jakub


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Ramana Radhakrishnan
On Wed, Oct 4, 2017 at 2:39 PM, Alexander Monakov  wrote:
> On Wed, 4 Oct 2017, Ramana Radhakrishnan wrote:
>> However we need a scheduler maintainer or global reviewer to please
>> help review this patch or help come up with an alternative patch. A
>> primary platform broken for 5 days with a commit and no public
>> response from the original poster is really not appropriate behaviour
>> in this community. If not, the original patch should have been
>> considered for reverting under the 48 hour rule .
>
> I responded within 30 minutes of the original report by Christophe:
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01984.html
>

My apologies.

I tried looking in my theading in my gmail and was surprised not to
see a response and somehow I missed this in the archives.

Ramana


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Alexander Monakov
On Wed, 4 Oct 2017, Ramana Radhakrishnan wrote:
> However we need a scheduler maintainer or global reviewer to please
> help review this patch or help come up with an alternative patch. A
> primary platform broken for 5 days with a commit and no public
> response from the original poster is really not appropriate behaviour
> in this community. If not, the original patch should have been
> considered for reverting under the 48 hour rule .

I responded within 30 minutes of the original report by Christophe:
https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01984.html

I think Wilco's patch is reasonable. A potentially simpler alternative
to just restore bootstrap is to take only the second hunk of my patch
above (i.e. without removal of autopref_rank_data and associated cleanups).

FWIW in the previous thread for a closely related issue I Cc'ed Maxim and
Kyrill: https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01229.html

Alexander


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Jakub Jelinek
On Wed, Oct 04, 2017 at 01:24:15PM +, Wilco Dijkstra wrote:
> Well my understanding was that it is OK to fix a bootstrap failure. I believe 
> my
> patch is trivial since it mostly removes redundant code. Also I took Jakub's
> review as an OK as there were no technical objections. However since you

I've only commented (off-list) on the ChangeLog nits, not on anything else.

Jakub


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Wilco Dijkstra
Richard Sandiford wrote:
>
> I don't think it's reasonable to commit this as obvious.  You said
> yourself in the covering message that "it doesn't at all restore
> the original behaviour since we no longer compare the base address".
> So even with the bootstrap failure, I think the patch needed review
> before going in.
>
> Christophe's message doesn't change anything because you knew when you
> posted the patch that it fixed the failure.

Well my understanding was that it is OK to fix a bootstrap failure. I believe my
patch is trivial since it mostly removes redundant code. Also I took Jakub's
review as an OK as there were no technical objections. However since you
seem to disagree, I will revert it.

We have now had 5 days of no builds for a major target, which is a huge
inconvenience. So I don't think it is reasonable to wait any longer.
The alternative is to revert the original patch that caused the bootstrap 
failure
plus the patch(es) that unexpectedly changed the behaviour of the scheduler
(I don't think there was any testing as to what effect those had on the 
schedule).

So the question is who will do that and when?

Wilco

Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Ramana Radhakrishnan
On Wed, Oct 4, 2017 at 12:01 PM, Richard Sandiford
 wrote:
> Wilco Dijkstra  writes:
>> Christophe Lyon wrote:
>>
>>> FWIW, I confirm that this patch fixes the build failure I reported at:
>>> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html
>>
>> Thanks, now committed as r253399.
>
> I don't think it's reasonable to commit this as obvious.  You said
> yourself in the covering message that "it doesn't at all restore
> the original behaviour since we no longer compare the base address".
> So even with the bootstrap failure, I think the patch needed review
> before going in.

I agree that this patch shouldn't have gone in without review from a
maintainer of the appropriate area regardless of whether this fixes a
bootstrap failure or not.

However we need a scheduler maintainer or global reviewer to please
help review this patch or help come up with an alternative patch. A
primary platform broken for 5 days with a commit and no public
response from the original poster is really not appropriate behaviour
in this community. If not, the original patch should have been
considered for reverting under the 48 hour rule .

regards
Ramana

>
> Christophe's message doesn't change anything because you knew when you
> posted the patch that it fixed the failure.
>
> Richard


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Richard Sandiford
Wilco Dijkstra  writes:
> Christophe Lyon wrote:
>
>> FWIW, I confirm that this patch fixes the build failure I reported at:
>> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html
>
> Thanks, now committed as r253399.

I don't think it's reasonable to commit this as obvious.  You said
yourself in the covering message that "it doesn't at all restore
the original behaviour since we no longer compare the base address".
So even with the bootstrap failure, I think the patch needed review
before going in.

Christophe's message doesn't change anything because you knew when you
posted the patch that it fixed the failure.

Richard


Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Wilco Dijkstra
Christophe Lyon wrote:

> FWIW, I confirm that this patch fixes the build failure I reported at:
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html

Thanks, now committed as r253399.

Wilco

Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-04 Thread Christophe Lyon
On 3 October 2017 at 18:34, Wilco Dijkstra  wrote:
> r253236 broke AArch64 bootstrap. Earlier revision r253071 changed scheduling
> behaviour on AArch64 as autopref scheduling no longer checks the base.
>
> This patch fixes the bootstrap failure and cleans up autopref scheduling.
> The code is greatly simplified.  Sort accesses on the offset first, and
> only if the offsets are the same fall back to other comparisons in
> rank_for_schedule.  This doesn't at all restore the original behaviour
> since we no longer compare the base address, but it now defines a total
> sorting order.  More work will be required to improve the sorting so
> that only loads/stores with the same base are affected.
>
> AArch64 bootstrap completes. I'd like to commit this ASAP as the AArch64
> bootstrap has been broken for days now.
>
> ChangeLog:
> 2017-10-03  Wilco Dijkstra  
>
> PR rtl-optimization/82396
> * gcc/haifa-sched.c (autopref_multipass_init): Simplify
> initialization.
> (autopref_rank_data): Simplify sort order.
> * gcc/sched-int.h (autopref_multipass_data_): Remove
> multi_mem_insn_p, min_offset and max_offset.
> --
>

Hi,

FWIW, I confirm that this patch fixes the build failure I reported at:
https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html

Thanks,

Christophe

> diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
> index 
> 549e8961411ecd0a04ac3b24ba78b5d53e63258a..77ba8c5292a0801bbbcb35ed61ab3040015f6677
>  100644
> --- a/gcc/haifa-sched.c
> +++ b/gcc/haifa-sched.c
> @@ -5568,9 +5568,7 @@ autopref_multipass_init (const rtx_insn *insn, int 
> write)
>
>gcc_assert (data->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED);
>data->base = NULL_RTX;
> -  data->min_offset = 0;
> -  data->max_offset = 0;
> -  data->multi_mem_insn_p = false;
> +  data->offset = 0;
>/* Set insn entry initialized, but not relevant for auto-prefetcher.  */
>data->status = AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
>
> @@ -5585,10 +5583,9 @@ autopref_multipass_init (const rtx_insn *insn, int 
> write)
>  {
>int n_elems = XVECLEN (pat, 0);
>
> -  int i = 0;
> -  rtx prev_base = NULL_RTX;
> -  int min_offset = 0;
> -  int max_offset = 0;
> +  int i, offset;
> +  rtx base, prev_base = NULL_RTX;
> +  int min_offset = INT_MAX;
>
>for (i = 0; i < n_elems; i++)
> {
> @@ -5596,38 +5593,23 @@ autopref_multipass_init (const rtx_insn *insn, int 
> write)
>   if (GET_CODE (set) != SET)
> return;
>
> - rtx base = NULL_RTX;
> - int offset = 0;
>   if (!analyze_set_insn_for_autopref (set, write, , ))
> return;
>
> - if (i == 0)
> -   {
> - prev_base = base;
> - min_offset = offset;
> - max_offset = offset;
> -   }
>   /* Ensure that all memory operations in the PARALLEL use the same
>  base register.  */
> - else if (REGNO (base) != REGNO (prev_base))
> + if (i > 0 && REGNO (base) != REGNO (prev_base))
> return;
> - else
> -   {
> - min_offset = MIN (min_offset, offset);
> - max_offset = MAX (max_offset, offset);
> -   }
> + prev_base = base;
> + min_offset = MIN (min_offset, offset);
> }
>
> -  /* If we reached here then we have a valid PARALLEL of multiple memory
> -ops with prev_base as the base and min_offset and max_offset
> -containing the offsets range.  */
> +  /* If we reached here then we have a valid PARALLEL of multiple memory 
> ops
> +with prev_base as the base and min_offset containing the offset.  */
>gcc_assert (prev_base);
>data->base = prev_base;
> -  data->min_offset = min_offset;
> -  data->max_offset = max_offset;
> -  data->multi_mem_insn_p = true;
> +  data->offset = min_offset;
>data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
> -
>return;
>  }
>
> @@ -5637,7 +5619,7 @@ autopref_multipass_init (const rtx_insn *insn, int 
> write)
>  return;
>
>if (!analyze_set_insn_for_autopref (set, write, >base,
> -  >min_offset))
> +  >offset))
>  return;
>
>/* This insn is relevant for the auto-prefetcher.
> @@ -5646,63 +5628,6 @@ autopref_multipass_init (const rtx_insn *insn, int 
> write)
>data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
>  }
>
> -
> -/* Helper for autopref_rank_for_schedule.  Given the data of two
> -   insns relevant to the auto-prefetcher modelling code DATA1 and DATA2
> -   return their comparison result.  Return 0 if there is no sensible
> -   ranking order for the two insns.  */
> -
> -static int
> -autopref_rank_data (autopref_multipass_data_t data1,
> -autopref_multipass_data_t data2)
> -{
> -  /* Simple case when both insns are simple 

[PATCH] Fix PR82396: qsort comparator non-negative on sorted output

2017-10-03 Thread Wilco Dijkstra
r253236 broke AArch64 bootstrap. Earlier revision r253071 changed scheduling
behaviour on AArch64 as autopref scheduling no longer checks the base.

This patch fixes the bootstrap failure and cleans up autopref scheduling.
The code is greatly simplified.  Sort accesses on the offset first, and
only if the offsets are the same fall back to other comparisons in
rank_for_schedule.  This doesn't at all restore the original behaviour
since we no longer compare the base address, but it now defines a total
sorting order.  More work will be required to improve the sorting so
that only loads/stores with the same base are affected.

AArch64 bootstrap completes. I'd like to commit this ASAP as the AArch64
bootstrap has been broken for days now.

ChangeLog:
2017-10-03  Wilco Dijkstra  

PR rtl-optimization/82396
* gcc/haifa-sched.c (autopref_multipass_init): Simplify
initialization.
(autopref_rank_data): Simplify sort order.
* gcc/sched-int.h (autopref_multipass_data_): Remove
multi_mem_insn_p, min_offset and max_offset.
--

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 
549e8961411ecd0a04ac3b24ba78b5d53e63258a..77ba8c5292a0801bbbcb35ed61ab3040015f6677
 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -5568,9 +5568,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 
   gcc_assert (data->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED);
   data->base = NULL_RTX;
-  data->min_offset = 0;
-  data->max_offset = 0;
-  data->multi_mem_insn_p = false;
+  data->offset = 0;
   /* Set insn entry initialized, but not relevant for auto-prefetcher.  */
   data->status = AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
 
@@ -5585,10 +5583,9 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 {
   int n_elems = XVECLEN (pat, 0);
 
-  int i = 0;
-  rtx prev_base = NULL_RTX;
-  int min_offset = 0;
-  int max_offset = 0;
+  int i, offset;
+  rtx base, prev_base = NULL_RTX;
+  int min_offset = INT_MAX;
 
   for (i = 0; i < n_elems; i++)
{
@@ -5596,38 +5593,23 @@ autopref_multipass_init (const rtx_insn *insn, int 
write)
  if (GET_CODE (set) != SET)
return;
 
- rtx base = NULL_RTX;
- int offset = 0;
  if (!analyze_set_insn_for_autopref (set, write, , ))
return;
 
- if (i == 0)
-   {
- prev_base = base;
- min_offset = offset;
- max_offset = offset;
-   }
  /* Ensure that all memory operations in the PARALLEL use the same
 base register.  */
- else if (REGNO (base) != REGNO (prev_base))
+ if (i > 0 && REGNO (base) != REGNO (prev_base))
return;
- else
-   {
- min_offset = MIN (min_offset, offset);
- max_offset = MAX (max_offset, offset);
-   }
+ prev_base = base;
+ min_offset = MIN (min_offset, offset);
}
 
-  /* If we reached here then we have a valid PARALLEL of multiple memory
-ops with prev_base as the base and min_offset and max_offset
-containing the offsets range.  */
+  /* If we reached here then we have a valid PARALLEL of multiple memory 
ops
+with prev_base as the base and min_offset containing the offset.  */
   gcc_assert (prev_base);
   data->base = prev_base;
-  data->min_offset = min_offset;
-  data->max_offset = max_offset;
-  data->multi_mem_insn_p = true;
+  data->offset = min_offset;
   data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
-
   return;
 }
 
@@ -5637,7 +5619,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 return;
 
   if (!analyze_set_insn_for_autopref (set, write, >base,
-  >min_offset))
+  >offset))
 return;
 
   /* This insn is relevant for the auto-prefetcher.
@@ -5646,63 +5628,6 @@ autopref_multipass_init (const rtx_insn *insn, int write)
   data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
 }
 
-
-/* Helper for autopref_rank_for_schedule.  Given the data of two
-   insns relevant to the auto-prefetcher modelling code DATA1 and DATA2
-   return their comparison result.  Return 0 if there is no sensible
-   ranking order for the two insns.  */
-
-static int
-autopref_rank_data (autopref_multipass_data_t data1,
-autopref_multipass_data_t data2)
-{
-  /* Simple case when both insns are simple single memory ops.  */
-  if (!data1->multi_mem_insn_p && !data2->multi_mem_insn_p)
-return data1->min_offset - data2->min_offset;
-
-  /* Two load/store multiple insns.  Return 0 if the offset ranges
- overlap and the difference between the minimum offsets otherwise.  */
-  else if (data1->multi_mem_insn_p && data2->multi_mem_insn_p)
-{
-  int min1 = data1->min_offset;
-  int max1 = data1->max_offset;
-  int min2 = data2->min_offset;
-  int