Sean> Does anyone know if using packed when it's not needed
    Sean> results in less efficient code?

Yes, it definitely does on some (non-mainstream) architectures.  We
talked about this before I think...

...ah yes: http://article.gmane.org/gmane.linux.drivers.openib/8396

The assembly there came from compiling with no optimization, but if
anything the packed version in that code:

    struct foo { int a; };
    struct bar { int b; } __attribute__((packed));
    
    int c(struct foo *x) { return x->a; }
    int d(struct bar *x) { return x->b; }

looks worse with -O2.  ia64 compiled with -O2 goes from one bundle to six:

    0000000000000000 <c>:
       0:       13 40 00 40 10 10       [MBB]       ld4 r8=[r32]
       6:       00 00 00 00 10 80                   nop.b 0x0
       c:       08 00 84 00                         br.ret.sptk.many b0;;
    
    0000000000000010 <d>:
      10:       09 70 00 40 00 21       [MMI]       mov r14=r32
      16:       f0 10 80 00 42 00                   adds r15=2,r32
      1c:       34 00 01 84                         adds r32=3,r32;;
      20:       19 80 04 1c 00 14       [MMB]       ld1 r16=[r14],1
      26:       f0 00 3c 00 20 00                   ld1 r15=[r15]
      2c:       00 00 00 20                         nop.b 0x0;;
      30:       09 70 00 1c 00 10       [MMI]       ld1 r14=[r14]
      36:       80 00 80 00 20 e0                   ld1 r8=[r32]
      3c:       f1 78 bd 53                         shl r15=r15,16;;
      40:       01 00 00 00 01 00       [MII]       nop.m 0x0
      46:       e0 70 dc ee 29 00                   shl r14=r14,8
      4c:       81 38 9d 53                         shl r8=r8,24;;
      50:       0b 70 40 1c 0e 20       [MMI]       or r14=r16,r14;;
      56:       f0 70 3c 1c 40 00                   or r15=r14,r15
      5c:       00 00 04 00                         nop.i 0x0;;
      60:       11 00 00 00 01 00       [MIB]       nop.m 0x0
      66:       80 78 20 1c 40 80                   or r8=r15,r8
      6c:       08 00 84 00                         br.ret.sptk.many b0;;

and sparc64 goes similarly crazy:

    0000000000000000 <c>:
       0:       81 c3 e0 08     retl 
       4:       d0 42 00 00     ldsw  [ %o0 ], %o0
       8:       30 68 00 06     b,a   %xcc, 20 <d>

    0000000000000020 <d>:
      20:       c6 0a 00 00     ldub  [ %o0 ], %g3
      24:       c2 0a 20 01     ldub  [ %o0 + 1 ], %g1
      28:       c4 0a 20 02     ldub  [ %o0 + 2 ], %g2
      2c:       87 28 f0 18     sllx  %g3, 0x18, %g3
      30:       d0 0a 20 03     ldub  [ %o0 + 3 ], %o0
      34:       83 28 70 10     sllx  %g1, 0x10, %g1
      38:       82 10 40 03     or  %g1, %g3, %g1
      3c:       85 28 b0 08     sllx  %g2, 8, %g2
      40:       84 10 80 01     or  %g2, %g1, %g2
      44:       90 12 00 02     or  %o0, %g2, %o0
      48:       81 c3 e0 08     retl 
      4c:       91 3a 20 00     sra  %o0, 0, %o0
      50:       30 68 00 04     b,a   %xcc, 60 <d+0x40>

Note that mainstream architectures that handle unaligned accesses
sanely do fine with packed.  eg ppc64:

    0000000000000000 <.c>:
       0:       e8 63 00 02     lwa     r3,0(r3)
       4:       4e 80 00 20     blr
    
    0000000000000014 <.d>:
      14:       e8 63 00 02     lwa     r3,0(r3)
      18:       4e 80 00 20     blr

x86_64:

    0000000000000000 <c>:
       0:       8b 07                   mov    (%rdi),%eax
       2:       c3                      retq   
    
    0000000000000010 <d>:
      10:       8b 07                   mov    (%rdi),%eax
      12:       c3                      retq   

 - R.
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to