https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118360

            Bug ID: 118360
           Summary: [avr] Expensive shift instead of bit test
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: gjl at gcc dot gnu.org
  Target Milestone: ---

$ avr-gcc-15 -Os -mmcu=atmega8 -S -dp

long fun1 (int a, long b)
{
    if (a & 1)
        b ^= 8;
    return b;
}

compiles to:

fun1:
        push r16                 ;  39  [c=4 l=1]  pushqi1/0
        push r17                 ;  40  [c=4 l=1]  pushqi1/0
/* prologue: function */
/* frame size = 0 */
/* stack size = 2 */
.L__stack_usage = 2
        andi r24,lo8(1)  ;  48  [c=4 l=1]  *andqi3/1
        ldi r25,0                ;  49  [c=4 l=1]  movqi_insn/0
        ldi r26,0                ;  50  [c=4 l=1]  movqi_insn/0
        ldi r27,0                ;  51  [c=4 l=1]  movqi_insn/0
        ldi r18,3        ;  47  [c=28 l=7]  *ashlsi3_const/3
        1:      
        lsl r24
        rol r25
        rol r26
        rol r27
        dec r18 
        brne 1b 
        movw r16,r24     ;  60  [c=4 l=1]  *movhi/0
        movw r18,r26     ;  61  [c=4 l=1]  *movhi/0
        eor r16,r20      ;  34  [c=4 l=1]  *xorqi3
        eor r17,r21      ;  35  [c=4 l=1]  *xorqi3
        eor r18,r22      ;  36  [c=4 l=1]  *xorqi3
        eor r19,r23      ;  37  [c=4 l=1]  *xorqi3
        movw r22,r16     ;  62  [c=4 l=1]  *movhi/0
        movw r24,r18     ;  63  [c=4 l=1]  *movhi/0
/* epilogue start */
        pop r17          ;  43  [c=4 l=1]  popqi
        pop r16          ;  44  [c=4 l=1]  popqi
        ret              ;  45  [c=0 l=1]  return_from_epilogue

whereas the very similar (tests the inverted bit)

long fun_not1 (int a, long b)
{
    if (!(a & 1))
        b ^= 8;
    return b;
}

gives

fun_not1:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
        movw r18,r24     ;  52  [c=4 l=1]  *movhi/0
        mov r24,r22      ;  39  [c=4 l=1]  movqi_insn/0
        mov r22,r20      ;  40  [c=4 l=1]  movqi_insn/0
        mov r25,r23      ;  41  [c=4 l=1]  movqi_insn/0
        mov r23,r21      ;  42  [c=4 l=1]  movqi_insn/0
        sbrc r18,0       ;  43  [c=4 l=2]  *sbrx_branchhi
        rjmp .L3        
        ldi r18,lo8(8)   ;  50  [c=4 l=1]  movqi_insn/1
        eor r22,r18      ;  51  [c=4 l=1]  *xorqi3
.L3:
/* epilogue start */
        ret              ;  46  [c=0 l=1]  return

Notice that AVR is an 8-bit architecture and doesn't have a barrel shifter,
hence 32-bit shifts are very expensive.

* Some of the middle-end RTL lowering or tree optimizers don't even consider
costs.

* Even when costs are considered, they may be a bad proxy for the real costs. 
One example is when arithmetic is performed by a library call that appears to
be cheap with -Os ("only" one CALL), but a libcall is still unwanted and too
expansive when a simple bit-test can do.

Reply via email to