https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78200
--- Comment #1 from Venkataramanan <venkataramanan.kumar at amd dot com> --- (In reply to Venkataramanan from comment #0) > Noticed 5% regression with 429.mcf of cpu2006 on x86_64 AVX2 (bdver4) with > GCC trunk gcc version 7.0.0 20161028 (experimental) (GCC). > > Flag used is -O3 -mavx2 -mprefer-avx128 > > Not seen with GCC 6.1 or with GCC trunk for -O3 -mavx -mprefer-avx128 > > Assembly difference is observed in hot function primal_bea_mpp of pbeampp.c. > > -O3 -mavx -mprefer-avx128 -O3 -mavx2 -mprefer-avx128 > > .L98: | .L98: > ------------------------------------| jle .L97 <== order of > comparison > cmpl $2, %r9d | cmpl $2, %r9d is > different. > jne .L97 | jne .L97 > testq %rdi, %rdi | ----------------------------------- > jle .L97 | ----------------------------------- > .L99: | .L99: > addq $1, %r13 | addq $1, %r13 > movq %rdi, %r12 | movq %rdi, %r12 > movq perm(,%r13,8), %r9 | movq perm(,%r13,8), %r9 > sarq $63, %r12 | sarq $63, %r12 > movq %rdi, 8(%r9) | movq %rdi, 8(%r9) > + +-- 12 lines: xorq %r12, %rdi-------|+ +-- 12 lines: xorq %r12, %rdi------ > jle .L97 | jle .L97 > movq 8(%rax), %r14 | movq 8(%rax), %r14 > movq (%rax), %rdi | movq (%rax), %rdi > subq (%r14), %rdi | subq (%r14), %rdi > movq 16(%rax), %r14 | movq 16(%rax), %r14 > addq (%r14), %rdi | addq (%r14), %rdi > jns .L98 | cmpq $0, %rdi > ------------------------------------| jge .L98 > > > Gimple optimzied dump shows > > GCC trunk -O3 -mavx -mprefer-avx128 > ;; basic block 20, loop depth 2, count 0, freq 1067, maybe hot > ;; Invalid sum of incoming frequencies 1216, should be 1067 > ;; prev block 19, next block 21, flags: (NEW, REACHABLE, VISITED) > ;; pred: 18 [64.0%] (FALSE_VALUE,EXECUTABLE) > # RANGE [0, 1] > _496 = _512 == 2; > # RANGE [0, 1] > _495 = red_cost_503 > 0; > # RANGE [0, 1] > _494 = _495 & _496; > if (_494 != 0) > goto <bb 21>; > else > goto <bb 22>; > for GCC trunk -O3 -mavx -mprefer-avx128 optimized dumps look like this. ;; basic block 20, loop depth 2, count 0, freq 1067, maybe hot ;; Invalid sum of incoming frequencies 1216, should be 1067 ;; prev block 19, next block 21, flags: (NEW, REACHABLE, VISITED) ;; pred: 18 [64.0%] (FALSE_VALUE,EXECUTABLE) # RANGE [0, 1] _340 = _23 == 2; # RANGE [0, 1] _341 = red_cost_86 > 0; # RANGE [0, 1] _338 = _340 & _341; if (_338 != 0) goto <bb 21>; else goto <bb 22>;