http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50256

             Bug #: 50256
           Summary: AVR GCC - several unnecessary register moves
    Classification: Unclassified
           Product: gcc
           Version: 4.3.3
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: nickpar...@eaton.com


Hi,

AVR GCC seems to generate inefficent code.  Function below multiplies two
unsigned 24-bit max values, then effectively shifts right by 24 shifts.

uint32_t MulU3U3S3(uint32_t a_u3, uint32_t b_u3)
{
    uint32_t answer;

    asm volatile
    (

        "push r0"                  "\n\t"
        "push r1"                  "\n\t"

        "clr r20"                  "\n\t"      // zero register

        // 0 byte shifts
        "mul %A1,%A2"              "\n\t"      // a1a2
        "mov r2,r0"                "\n\t"
        "mov r3,r1"                "\n\t"

        // 1 byte shifts
        "mul %A1,%B2"              "\n\t"
        "add r3,r0"                "\n\t"
        "adc r4,r1"                "\n\t"
        "adc r5,r20"               "\n\t"

        "mul %A2,%B1"              "\n\t"
        "add r3,r0"                "\n\t"
        "adc r4,r1"                "\n\t"
        "adc r5,r20"               "\n\t"

        // 2 byte shifts
        "mul %A1,%C2"              "\n\t"
        "add r4,r0"                "\n\t"
        "adc r5,r1"                "\n\t"
        "adc r6,r20"               "\n\t"

        "mul %A2,%C1"              "\n\t"
        "add r4,r0"                "\n\t"
        "adc r5,r1"                "\n\t"
        "adc r6,r20"               "\n\t"

        "mul %B2,%B1"              "\n\t"
        "add r4,r0"                "\n\t"
        "adc r5,r1"                "\n\t"
        "adc r6,r20"               "\n\t"

        // 3 byte shifts
        "mul %B1,%C2"              "\n\t"
        "add r5,r0"                "\n\t"
        "adc r6,r1"                "\n\t"
        "adc r7,r20"               "\n\t"

        "mul %B2,%C1"              "\n\t"
        "add r5,r0"                "\n\t"
        "adc r6,r1"                "\n\t"
        "adc r7,r20"               "\n\t"

        // 4 byte shifts
        "mul %C2,%C1"              "\n\t"
        "add r6,r0"                "\n\t"
        "adc r7,r1"                "\n\t"

        "mov %A0,r5"               "\n\t"
        "mov %B0,r6"               "\n\t"
        "mov %C0,r7"               "\n\t"
        "clr %D0"                  "\n\t"

        "pop r1"                    "\n\t"
        "pop r0"                    "\n\t"

    : "=&r" (answer)
    : "r" (a_u3), "r" (b_u3)
    : "r0","r1","r2","r3","r4","r5","r6","r7","r20"
    );

    return (answer);
}

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Calling code 
(note moves after function..why cant function leave answer in place?)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 878 040c 6CE5              ldi r22,lo8(167772)
 879 040e 7FE8              ldi r23,hi8(167772)
 880 0410 82E0              ldi r24,hlo8(167772)
 881 0412 90E0              ldi r25,hhi8(167772)
 882 0414 20EA              ldi r18,lo8(100000)
 883 0416 36E8              ldi r19,hi8(100000)
 884 0418 41E0              ldi r20,hlo8(100000)
 885 041a 50E0              ldi r21,hhi8(100000)
 886 041c 0E94 0000         call MulU3U3S3
 887 0420 7B01              movw r14,r22
 888 0422 8C01              movw r16,r24

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Called code is below. Note that
- one argument is unnecessarily moved to a new location
- at end, result is unnecessarily moved to a new location

also this code is unnecessary too

 283 010e 8901              movw r16,r18
 284 0110 9A01              movw r18,r20

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

263                .global MulU3U3S3
 265                MulU3U3S3:
 266                .LFB8:
 267                .LM19:
 268                .LVL22:
 269 00f6 2F92              push r2
 270 00f8 3F92              push r3
 271 00fa 4F92              push r4
 272 00fc 5F92              push r5
 273 00fe 6F92              push r6
 274 0100 7F92              push r7
 275 0102 CF92              push r12
 276 0104 DF92              push r13
 277 0106 EF92              push r14
 278 0108 FF92              push r15
 279 010a 0F93              push r16
 280 010c 1F93              push r17
 281                /* prologue: function */
 282                /* frame size = 0 */
 283 010e 8901              movw r16,r18
 284 0110 9A01              movw r18,r20
 285                .LM20:
 286 0112 6801              movw r12,r16
 287 0114 7901              movw r14,r18
 288                /* #APP */
 289                 ;  326 "maths_mul.c" 1
 290 0116 0F92              push r0
 291 0118 1F92              push r1
 292 011a 4427              clr r20
 293 011c 6C9D              mul r22,r12
 294 011e 202C              mov r2,r0
 295 0120 312C              mov r3,r1
 296 0122 6D9D              mul r22,r13
 297 0124 300C              add r3,r0
 298 0126 411C              adc r4,r1
 299 0128 541E              adc r5,r20
 300 012a C79E              mul r12,r23
 301 012c 300C              add r3,r0
 302 012e 411C              adc r4,r1
 303 0130 541E              adc r5,r20
 304 0132 6E9D              mul r22,r14
 305 0134 400C              add r4,r0
 306 0136 511C              adc r5,r1
 307 0138 641E              adc r6,r20
 308 013a C89E              mul r12,r24
 309 013c 400C              add r4,r0
 310 013e 511C              adc r5,r1
 311 0140 641E              adc r6,r20
 312 0142 D79E              mul r13,r23
 313 0144 400C              add r4,r0
 314 0146 511C              adc r5,r1
 315 0148 641E              adc r6,r20
 316 014a 7E9D              mul r23,r14
 317 014c 500C              add r5,r0
 318 014e 611C              adc r6,r1
 319 0150 741E              adc r7,r20
 320 0152 D89E              mul r13,r24
 321 0154 500C              add r5,r0
 322 0156 611C              adc r6,r1
 323 0158 741E              adc r7,r20
 324 015a E89E              mul r14,r24
 325 015c 600C              add r6,r0
 326 015e 711C              adc r7,r1
 327 0160 052D              mov r16,r5
 328 0162 162D              mov r17,r6
 329 0164 272D              mov r18,r7
 330 0166 3327              clr r19
 331 0168 1F90              pop r1
 332 016a 0F90              pop r0
 333                    
 334                 ;  0 "" 2
 335                .LVL23:
 336                .LM21:
 337                /* #NOAPP */
 338 016c B801              movw r22,r16
 339                .LVL24:
 340 016e C901              movw r24,r18
 341                .LVL25:
 342                /* epilogue start */
 343 0170 1F91              pop r17
 344 0172 0F91              pop r16
 345 0174 FF90              pop r15
 346 0176 EF90              pop r14
 347 0178 DF90              pop r13
 348 017a CF90              pop r12
 349 017c 7F90              pop r7
 350 017e 6F90              pop r6
 351 0180 5F90              pop r5
 352 0182 4F90              pop r4
 353 0184 3F90              pop r3
 354 0186 2F90              pop r2
 355 0188 0895              ret

Reply via email to