Hello! > 2014-08-07 Kugan Vivekanandarajah <kug...@linaro.org> > > * calls.c (precompute_arguments): Check > promoted_for_signed_and_unsigned_p and set the promoted mode. > (promoted_for_signed_and_unsigned_p): New function. > (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p > and set the promoted mode. > * expr.h (promoted_for_signed_and_unsigned_p): New function definition. > * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if > SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
This patch regresses: Running target unix FAIL: libgomp.fortran/simd7.f90 -O2 execution test FAIL: libgomp.fortran/simd7.f90 -Os execution test on alphaev6-linux-gnu. The problem can be illustrated with attached testcase with a crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in missing SImode extension after DImode shift of SImode subregs for this part: --cut here-- # test.23_12 = PHI <0(37), 1(36)> _242 = ivtmp.181_73 + 2147483645; _240 = _242 * 2; _63 = (integer(kind=4)) _240; if (ubound.6_99 <= 2) goto <bb 39>; else goto <bb 40>; ;; succ: 39 ;; 40 ;; basic block 39, loop depth 1 ;; pred: 38 pretmp_337 = test.23_12 | l_76; goto <bb 45>; ;; succ: 45 ;; basic block 40, loop depth 1 ;; pred: 38 _11 = *c_208[0]; if (_11 != _63) goto <bb 45>; else goto <bb 42>; --cut here-- this expands to: (code_label 592 591 593 35 "" [0 uses]) (note 593 592 0 NOTE_INSN_BASIC_BLOCK) ;; _63 = (integer(kind=4)) _240; (insn 594 593 595 (set (reg:SI 538) (const_int 1073741824 [0x40000000])) -1 (nil)) (insn 595 594 596 (set (reg:SI 539) (plus:SI (reg:SI 538) (const_int 1073741824 [0x40000000]))) -1 (nil)) (insn 596 595 597 (set (reg:SI 537) (plus:SI (reg:SI 539) (const_int -3 [0xfffffffffffffffd]))) -1 (expr_list:REG_EQUAL (const_int 2147483645 [0x7ffffffd]) (nil))) (insn 597 596 598 (set (reg:SI 536 [ D.1700 ]) (plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0) (reg:SI 537))) -1 (nil)) (insn 598 597 599 (set (reg:DI 540) (ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0) (const_int 1 [0x1]))) -1 (nil)) (insn 599 598 0 (set (reg:DI 145 [ D.1694 ]) (reg:DI 540)) -1 (nil)) ... (note 610 609 0 NOTE_INSN_BASIC_BLOCK) ;; _11 = *c_208[0]; (insn 611 610 0 (set (reg:DI 120 [ D.1694 ]) (sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4 A128]))) simd7.f90:12 -1 (nil)) ;; if (_11 != _63) (insn 612 611 613 40 (set (reg:DI 545) (eq:DI (reg:DI 120 [ D.1694 ]) (reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1 (nil)) (jump_insn 613 612 616 40 (set (pc) (if_then_else (eq (reg:DI 545) (const_int 0 [0])) (label_ref 0) (pc))) simd7.f90:12 -1 (int_list:REG_BR_PROB 450 (nil))) which results in following asm: $L35: addl $25,$7,$2 # 597 addsi3/1 [length = 4] addq $2,$2,$2 # 598 ashldi3/1 [length = 4] <------ here bne $24,$L145 # 601 *bcc_normal [length = 4] lda $4,4($20) # 627 *adddi_internal/2 [length = 4] ldl $8,0($20) # 611 *extendsidi2_1/2 [length = 4] lda $3,3($31) # 74 *movdi/2 [length = 4] cmpeq $8,$2,$2 # 612 *setcc_internal [length = 4] <-- compare bne $2,$L40 # 613 *bcc_normal [length = 4] br $31,$L88 # 2403 jump [length = 4] .align 4 ... Tracking the values with the debugger shows wrong calculation: 0x000000012000108c <+1788>: addl t10,t12,t1 0x0000000120001090 <+1792>: addq t1,t1,t1 ... 0x00000001200010a4 <+1812>: cmpeq t6,t1,t1 0x00000001200010a8 <+1816>: bne t1,0x1200010c0 <foo_+1840> (gdb) si 0x000000012000108c 17 l = l .or. any (b /= 7 + i) (gdb) i r t10 t12 t10 0x7 7 t12 0x7ffffffd 2147483645 (gdb) si 0x0000000120001090 17 l = l .or. any (b /= 7 + i) (gdb) i r t1 t1 0xffffffff80000004 -2147483644 (gdb) si 18 l = l .or. any (c /= 8 + 2 * i) (gdb) i r t1 t1 0xffffffff00000008 -4294967288 At this point, the calculation should zero-extend SImode value to full DImode, since compare operates on DImode values. The problematic insn is (insn 599), which is now a DImode assignment instead of zero-extend, due to: --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt) GET_MODE (target), temp, unsignedp); } - convert_move (SUBREG_REG (target), temp, unsignedp); + if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED) + && (GET_CODE (temp) == SUBREG) + && (GET_MODE (target) == GET_MODE (temp)) + && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp)))) + emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp)); + else + convert_move (SUBREG_REG (target), temp, unsignedp); } else if (nontemporal && emit_storent_insn (target, temp)) ; When compiling this code, we have: lhs = _63 target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0) temp = (subreg:SI (reg:DI 540) 0) So, the code assumes that it is possible to copy (reg:DI 540) directly to (reg:DI 154). However, this is not the case, since we still have garbage in the top 32bits. Reverting the part above fixes the runtime failure, since (insn 599) is now: (insn 599 598 0 (set (reg:DI 145 [ D.1694 ]) (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1 (nil)) It looks to me that we have also to check the temp with SUBREG_PROMOTED_*. Uros.
subroutine foo (d, e, f, g, m, n) integer :: i, j, b(2:9), c(3:n), d(:), e(2:n), f(2:,3:), n integer, allocatable :: g(:), h(:), k, m logical :: l l = .false. allocate (h(2:7)) i = 4; j = 4; b = 7; c = 8; d = 9; e = 10; f = 11; g = 12; h = 13; k = 14; m = 15 !$omp simd linear(b)linear(c:2)linear(d:3)linear(e:4)linear(f:5)linear(g:6) & !$omp & linear(h:7)linear(k:8)linear(m:9) reduction(.or.:l) do i = 0, 63 l = l .or. any (b /= 7 + i) l = l .or. any (c /= 8 + 2 * i) b = b + 1; c = c + 2 d = d + 3; e = e + 4; f = f + 5; g = g + 6 h = h + 7; k = k + 8; m = m + 9 end do if (l .or. i /= 64) call abort if (any (b /= 7 + 64) .or. any (c /= 8 + 2 * 64)) call abort end subroutine interface subroutine foo (d, e, f, g, m, n) integer :: d(:), e(2:n), f(2:,3:), n integer, allocatable :: g(:), m end subroutine end interface integer, parameter :: n = 8 integer :: d(2:18), e(3:n+1), f(5:6,7:9) integer, allocatable :: g(:), m allocate (g(7:10)) call foo (d, e, f, g, m, n) end