https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80124

            Bug ID: 80124
           Summary: Possible bug in _mm_cmpeq_ps
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: guille at berkeley dot edu
  Target Milestone: ---

The following code (below) seems to trigger a gcc bug because the following:

(A) * _mm_cmpeq_ps(r, r)
(B) * _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(r),
_mm_castps_si128(r)))

which should give the same result, don't. 
In particular (B) seems to give the correct result, but (A) doesn't. 

* I tested it on:
"gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-apple-darwin16.4.0/7.0.1/lto-wrapper
Target: x86_64-apple-darwin16.4.0
Configured with: ../configure --without-multilib --enable-languages=c,c++
Thread model: posix
gcc version 7.0.1 20170129 (experimental) (GCC)"

* no flags are necessary (other than maybe '-msse -msse2')


--------------------------------------------------------------------------------
#include <iostream>
#include <cassert>
#include <xmmintrin.h>
#include <emmintrin.h>


#if !defined(__SSE__) || !defined(__SSE2__)
#error "needs SSE+SSE2"
#else

static void helper_print(const char name[], const __m128 v)
{
    for (int i = 0; i < 4; ++i)
    {
        std::cerr << name << "[" << i << "] = ";
        for (int j = 31; j >= 0; --j) std::cerr << ((((const unsigned int
*)&v)[i] >> j) & 1);
        std::cerr << ";\n";
    }
}

static __m128 ones()
{
    const __m128 u = _mm_undefined_ps();
    const __m128 r = _mm_cmpeq_ps(u, u);

    if (_mm_movemask_ps(_mm_cmpeq_ps(r, r)) != 0xf)                        //
in case of error, print debugging info
    {
        helper_print("r", r);
        const __m128 cmp = _mm_cmpeq_ps(r, r);
        helper_print("cmp", cmp);                                          //
<-- this should be all 1s, but is all 0s
        std::cerr << "_mm_movemask_ps(cmp)=" << _mm_movemask_ps(cmp) << "\n";
    }
// 
assert(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(r),
_mm_castps_si128(r)))) == 0xf);        // <-- this passes
    assert(_mm_movemask_ps(_mm_cmpeq_ps(r, r)) == 0xf);                        
                                        // <-- this breaks!

    return r;
}
#endif

int main()
{
    [[maybe_unused]] const __m128 r = ones();
    return 0;
}

Reply via email to