------- Comment #2 from tbptbp at gmail dot com  2006-08-11 06:07 -------
Subject: Re:  missed optimization, redundant scalar SSE comparisons

On 11 Aug 2006 05:52:26 -0000, pinskia at gcc dot gnu dot org
<[EMAIL PROTECTED]> wrote:
> Using unsigned char and a temp variable removes the problem of zero extending
> the grabbing of the flags.
For sure the code looks better when sprinkling uchars - modulo partial
stalls,  but gcc decidedly doesn't want to fuse those comparisons ;)

00418ed0 <kdlib::lbox_t::set_category(unsigned int, float, kdlib::sideness_t)>:
  418ed0:       sub    $0x10,%esp
  418ed3:       mov    %esi,0x4(%esp)
  418ed7:       mov    0x14(%esp),%esi
  418edb:       mov    %edi,0x8(%esp)
  418edf:       mov    0x18(%esp),%edi
  418ee3:       mov    %ebx,(%esp)
  418ee6:       mov    %ebp,0xc(%esp)
  418eea:       movzbl 0x34(%esi),%edx
  418eee:       movss  0x1c(%esp),%xmm1
  418ef4:       shl    $0x4,%edi
  418ef7:       movzbl 0x18(%esp),%ecx
  418efc:       movss  (%edi,%esi,1),%xmm0
  418f01:       comiss %xmm0,%xmm1
  418f04:       seta   %al
  418f07:       comiss %xmm1,%xmm0
  418f0a:       mov    %eax,%ebp
  418f0c:       mov    %edx,%eax
  418f0e:       sete   %bl
  418f11:       shr    $0x3,%al
  418f14:       and    $0x7,%eax
  418f17:       sar    %cl,%eax
  418f19:       mov    0x20(%esp),%ecx
  418f1d:       and    %eax,%ebx
  418f1f:       comiss 0x8(%edi,%esi,1),%xmm1
  418f24:       setb   %al
  418f27:       and    $0xfffffff8,%edx
  418f2a:       and    %ebp,%eax
  418f2c:       and    $0x1,%ebx
  418f2f:       cmove  %ebp,%ecx
  418f32:       or     %eax,%edx
  418f34:       and    $0x3,%ecx
  418f37:       add    %ecx,%ecx
  418f39:       or     %ecx,%edx
  418f3b:       mov    %dl,0x34(%esi)
  418f3e:       mov    (%esp),%ebx
  418f41:       mov    0x4(%esp),%esi
  418f45:       mov    0x8(%esp),%edi
  418f49:       mov    0xc(%esp),%ebp
  418f4d:       add    $0x10,%esp
  418f50:       ret


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28691

Reply via email to