https://bugs.llvm.org/show_bug.cgi?id=34563

            Bug ID: 34563
           Summary: [x86] codegen for fcmp oeq is inconsistent
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedb...@nondot.org
          Reporter: spatel+l...@rotateright.com
                CC: llvm-bugs@lists.llvm.org

bool fcmp_oeq(double f1, double f2) {
  return f1 == f2;
}

bool fcmp_oeq_twice(double f1, double f2, double f3, double f4) {
  return f1 == f2 && f3 == f4;
}


Or as IR:

define i1 @fcmp_oeq(double %f1, double %f2) {
  %cmp = fcmp oeq double %f1, %f2
  ret i1 %cmp
}

define i1 @fcmp_oeq_twice(double %f1, double %f2, double %f3, double %f4) {
  %cmp1 = fcmp oeq double %f1, %f2
  %cmp2 = fcmp oeq double %f3, %f4
  %and = and i1 %cmp1, %cmp2
  ret i1 %and
}

----------------------------------------------------------------------------

$ ./llc -o - -mtriple=x86_64-unknown-unknown fcmps.ll

fcmp_oeq(double, double):                          # @fcmp_oeq(double, double)
        cmpeqsd %xmm1, %xmm0
        movq    %xmm0, %rax
        andl    $1, %eax
        retq
fcmp_oeq_twice(double, double, double, double):                 #
@fcmp_oeq_twice(double, double, double, double)
        ucomisd %xmm1, %xmm0
        setnp   %al
        sete    %cl
        andb    %al, %cl
        ucomisd %xmm3, %xmm2
        setnp   %dl
        sete    %al
        andb    %dl, %al
        andb    %cl, %al
        retq

-----------------------------------------------------------------------------

x86 doesn't have a 'setcc' for oeq (?!), so if we're using 'ucomisd', we have
to do an and-of-setcc to generate that predicate. If we use 'cmpeqsd' as in the
first example, we incur a vector-to-scalar register move. That might not be as
fast?

The inconsistency here should be investigated. But it's also possible that
we're doing the wrong thing for both cases. In the 2nd example if we use
'cmpeqsd', then we could reduce the instruction count with something like:

        cmpeqsd %xmm1, %xmm0
        cmpeqsd %xmm3, %xmm2
        andps   %xmm0, %xmm2
        movd    %xmm0, %eax
        andl    $1, %eax

-- 
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to