Hi Steve,
On Mon, Apr 09, 2018 at 10:10:13PM +0200, Thomas Koenig wrote:
the attached patch removes the parallel annotation from DO CONCURRENT.
As discussed in the PR, the autoparallellizer currently generates
wrong code. The only feasible way is to disable the annotation for
gcc-8 and work on the wrong-code issues for gcc-9. This is an 8
regression.
Regression-tested. OK for trunk?
Yes.
Thanks for the review!
However, just as I was about to commit, I thought of something else
and, hopefully, better.
The underlying problem is that the annotation does not go together
with -ftree-parallelize-loops - so let's simply not set it if that
flag is set.
This way, we can avoid speed regressions for people who do not use
-ftree-parallelize-loops, and it will be possible to downgrade the
PR to a missed-optimization bug; it is also not necessary to xfail
vect-do-concurrent-1.f90
Regression-tested. OK for trunk, for this version?
2018-04-09 Thomas Koenig <tkoe...@gcc.gnu.org>
PR fortran/83064
* trans-stmt.c (gfc_trans_forall_loop): Remove annotation for
parallell processing of DO CONCURRENT -ftree-parallelize-loops
is set.
2018-04-09 Thomas Koenig <tkoe...@gcc.gnu.org>
PR fortran/83064
* gfortran.dg/do_concurrent_5.f90: New test.
* gfortran.dg/vect/vect-do-concurrent-1.f90: Adjust dg-bogus
message.
Index: fortran/trans-stmt.c
===================================================================
--- fortran/trans-stmt.c (Revision 259222)
+++ fortran/trans-stmt.c (Arbeitskopie)
@@ -3642,7 +3642,10 @@ gfc_trans_forall_loop (forall_info *forall_tmp, tr
/* The exit condition. */
cond = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
count, build_int_cst (TREE_TYPE (count), 0));
- if (forall_tmp->do_concurrent)
+
+ /* PR 83064 means that we cannot use the annotation if the
+ autoparallelizer is active. */
+ if (forall_tmp->do_concurrent && ! flag_tree_parallelize_loops)
cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
build_int_cst (integer_type_node,
annot_expr_parallel_kind),
Index: testsuite/gfortran.dg/vect/vect-do-concurrent-1.f90
===================================================================
--- testsuite/gfortran.dg/vect/vect-do-concurrent-1.f90 (Revision 259222)
+++ testsuite/gfortran.dg/vect/vect-do-concurrent-1.f90 (Arbeitskopie)
@@ -12,4 +12,3 @@ subroutine test(n, a, b, c)
end subroutine test
! { dg-message "loop vectorized" "" { target *-*-* } 0 }
-! { dg-bogus " version\[^\n\r]* alias" "" { target *-*-* } 0 }
! { dg-do run }
! PR 83064 - this used to give wrong results.
! { dg-options "-O3 -ftree-parallelize-loops=2" }
! Original test case by Christian Felter
program main
use, intrinsic :: iso_fortran_env
implicit none
integer, parameter :: nsplit = 4
integer(int64), parameter :: ne = 20000000
integer(int64) :: stride, low(nsplit), high(nsplit), edof(ne), i
real(real64), dimension(nsplit) :: pi
edof(1::4) = 1
edof(2::4) = 2
edof(3::4) = 3
edof(4::4) = 4
stride = ceiling(real(ne)/nsplit)
do i = 1, nsplit
high(i) = stride*i
end do
do i = 2, nsplit
low(i) = high(i-1) + 1
end do
low(1) = 1
high(nsplit) = ne
pi = 0
do concurrent (i = 1:nsplit)
pi(i) = sum(compute( low(i), high(i) ))
end do
if (abs (sum(pi) - atan(1.0d0)) > 1e-5) call abort
contains
pure function compute( low, high ) result( ttt )
integer(int64), intent(in) :: low, high
real(real64), dimension(nsplit) :: ttt
integer(int64) :: j, k
ttt = 0
! Unrolled loop
! do j = low, high, 4
! k = 1
! ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 )
! k = 2
! ttt(k) = ttt(k) + (-1)**(j+2) / real( 2*j+1 )
! k = 3
! ttt(k) = ttt(k) + (-1)**(j+3) / real( 2*j+3 )
! k = 4
! ttt(k) = ttt(k) + (-1)**(j+4) / real( 2*j+5 )
! end do
! Loop with modulo operation
! do j = low, high
! k = mod( j, nsplit ) + 1
! ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 )
! end do
! Loop with subscripting via host association
do j = low, high
k = edof(j)
ttt(k) = ttt(k) + (-1.0_real64)**(j+1) / real( 2*j-1 )
end do
end function
end program main