The following block of code appears to produce an un-needed memcpy on both
Intel and PowerPC platforms. There is no aliasing or side-effect that I can
think of that could possibly force such copying to occur -- the problem seems
to be that gcc is not aware of the lifetime of large structures kept on stack.

The full source code:

class TV
{
   private:
      float truth;
      float confidence;
      int stuff[444];
   public:
      TV(void);
      float getT(void);
};

extern TV my_tv_maker(float tr);
extern void other(TV *);

float my_subr(float tr)
{
   TV tv;
   other (&tv);  // force constructor TV::TV to run first
   tv = my_tv_maker(434.23);  // over-write previous tv.
   return tv.getT();
}


PowerPC assembly, created with gcc -S -O2 -c

.L._Z7my_subrf:
.LFB2:
   mflr 0
.LCFI0:
   std 28,-32(1)
.LCFI1:
   std 29,-24(1)
.LCFI2:
   std 0,16(1)
.LCFI3:
   stdu 1,-3728(1)    make room for two instances of TV on stack
.LCFI4:
   addi 29,1,112      one instance of TV
   addi 28,1,1904     second instance of TV
   mr 3,29
   bl _ZN2TVC1Ev      call constructor on instance 1
   nop
   mr 3,29
   bl _Z5otherP2TV    call other() on instance 1
   nop
   lfs 1,....@toc(2)
   mr 3,28
   bl _Z11my_tv_makerf  call my_tv_make on instance 2
   nop
   mr 4,28
   mr 3,29
   li 5,1784
   bl memcpy           copy instance 2 over to 1! waste of CPU!
   nop
   mr 3,29
   bl _ZN2TV4getTEv    call method on instance 1
   nop
   addi 1,1,3728
   ld 0,16(1)
   ld 28,-32(1)
   ld 29,-24(1)
   mtlr 0
   blr

The missed optimizations are: -- two copies of the instance are not needed; the
copy is not needed either.  For large structures, this can be a significant
time-waster.

Exactly the same problem shows up in Intel as well:

_Z7my_subrf:
.LFB2:
   pushl %ebp
.LCFI0:
   movl  %esp, %ebp
.LCFI1:
   subl  $3608, %esp
.LCFI2:
   movl  %ebx, -8(%ebp)
.LCFI3:
   leal  -1792(%ebp), %ebx    instance 1 of TV
   movl  %esi, -4(%ebp)
.LCFI4:
   leal  -3592(%ebp), %esi    instance 2 of TV
   movl  %ebx, (%esp)
   call  _ZN2TVC1Ev           call constructor on instance 1
   movl  %ebx, (%esp)
   call  _Z5otherP2TV          call other() on instance 1
   movl  %esi, (%esp)
   movl  $0x43d91d71, 4(%esp)
   call  _Z11my_tv_makerf      call my_tv_maker on instance 2
   subl  $4, %esp
   movl  %esi, 4(%esp)
   movl  %ebx, (%esp)
   movl  $1784, 8(%esp)
   call  memcpy                 copy instance 2 to instance 1
   movl  %ebx, (%esp)
   call  _ZN2TV4getTEv         call getT() on instance 1
   movl  -8(%ebp), %ebx
   movl  -4(%ebp), %esi
   movl  %ebp, %esp
   popl  %ebp
   ret


-- 
           Summary: missed optimization: un-needed copy of structure.
           Product: gcc
           Version: 4.1.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: linasvepstas at gmail dot com
 GCC build triplet: powerpc64-unknown-linux-gnu
  GCC host triplet: powerpc64-unknown-linux-gnu
GCC target triplet: powerpc64-unknown-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39081

Reply via email to