Hi Steve,

On Mon, Apr 09, 2018 at 10:10:13PM +0200, Thomas Koenig wrote:


the attached patch removes the parallel annotation from DO CONCURRENT.
As discussed in the PR, the autoparallellizer currently generates
wrong code. The only feasible way is to disable the annotation for
gcc-8 and work on the wrong-code issues for gcc-9. This is an 8
regression.

Regression-tested. OK for trunk?


Yes.

Thanks for the review!

However, just as I was about to commit, I thought of something else
and, hopefully, better.

The underlying problem is that the annotation does not go together
with -ftree-parallelize-loops - so let's simply not set it if that
flag is set.

This way, we can avoid speed regressions for people who do not use
-ftree-parallelize-loops, and it will be possible to downgrade the
PR to a missed-optimization bug; it is also not necessary to xfail
vect-do-concurrent-1.f90

Regression-tested. OK for trunk, for this version?


2018-04-09  Thomas Koenig  <tkoe...@gcc.gnu.org>

        PR fortran/83064
        * trans-stmt.c (gfc_trans_forall_loop): Remove annotation for
        parallell processing of DO CONCURRENT -ftree-parallelize-loops
        is set.

2018-04-09  Thomas Koenig  <tkoe...@gcc.gnu.org>

        PR fortran/83064
        * gfortran.dg/do_concurrent_5.f90: New test.
        * gfortran.dg/vect/vect-do-concurrent-1.f90: Adjust dg-bogus
        message.
Index: fortran/trans-stmt.c
===================================================================
--- fortran/trans-stmt.c	(Revision 259222)
+++ fortran/trans-stmt.c	(Arbeitskopie)
@@ -3642,7 +3642,10 @@ gfc_trans_forall_loop (forall_info *forall_tmp, tr
       /* The exit condition.  */
       cond = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
 			      count, build_int_cst (TREE_TYPE (count), 0));
-      if (forall_tmp->do_concurrent)
+
+      /* PR 83064 means that we cannot use the annotation if the
+	 autoparallelizer is active.  */
+      if (forall_tmp->do_concurrent && ! flag_tree_parallelize_loops)
 	cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		       build_int_cst (integer_type_node,
 				      annot_expr_parallel_kind),
Index: testsuite/gfortran.dg/vect/vect-do-concurrent-1.f90
===================================================================
--- testsuite/gfortran.dg/vect/vect-do-concurrent-1.f90	(Revision 259222)
+++ testsuite/gfortran.dg/vect/vect-do-concurrent-1.f90	(Arbeitskopie)
@@ -12,4 +12,3 @@ subroutine test(n, a, b, c)
 end subroutine test
 
 ! { dg-message "loop vectorized" "" { target *-*-* } 0 }
-! { dg-bogus " version\[^\n\r]* alias" "" { target *-*-* } 0 }
! { dg-do  run }
! PR 83064 - this used to give wrong results.
! { dg-options "-O3 -ftree-parallelize-loops=2" }
! Original test case by Christian Felter

program main
    use, intrinsic :: iso_fortran_env
    implicit none

    integer, parameter :: nsplit = 4
    integer(int64), parameter :: ne = 20000000
    integer(int64) :: stride, low(nsplit), high(nsplit), edof(ne), i
    real(real64), dimension(nsplit) :: pi
    
    edof(1::4) = 1
    edof(2::4) = 2
    edof(3::4) = 3
    edof(4::4) = 4
    
    stride = ceiling(real(ne)/nsplit)
    do i = 1, nsplit
        high(i) = stride*i
    end do
    do i = 2, nsplit
        low(i) = high(i-1) + 1
    end do
    low(1) = 1
    high(nsplit) = ne

    pi = 0
    do concurrent (i = 1:nsplit)
        pi(i) = sum(compute( low(i), high(i) ))
    end do
    if (abs (sum(pi) - atan(1.0d0)) > 1e-5) call abort
    
contains
    
    pure function compute( low, high ) result( ttt )        
        integer(int64), intent(in) :: low, high
        real(real64), dimension(nsplit) :: ttt
        integer(int64) :: j, k
        
        ttt = 0

        ! Unrolled loop
!         do j = low, high, 4
!             k = 1
!             ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 )                            
!             k = 2
!             ttt(k) = ttt(k) + (-1)**(j+2) / real( 2*j+1 )                            
!             k = 3
!             ttt(k) = ttt(k) + (-1)**(j+3) / real( 2*j+3 )                            
!             k = 4
!             ttt(k) = ttt(k) + (-1)**(j+4) / real( 2*j+5 )                            
!         end do
        
        ! Loop with modulo operation
!         do j = low, high
!             k = mod( j, nsplit ) + 1
!             ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 )                                        
!         end do
        
        ! Loop with subscripting via host association
        do j = low, high
            k = edof(j)
            ttt(k) = ttt(k) + (-1.0_real64)**(j+1) / real( 2*j-1 )                                        
        end do
    end function
    
end program main

Reply via email to