https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88473
Bug ID: 88473
Summary: AVX512: constant folding on mask does not remove
unnecessary instructions
Product: gcc
Version: 8.2.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: [email protected]
Target Milestone: ---
[code]
#include <immintrin.h>
void test(void* data, void* data2)
{
__m128i v = _mm_load_si128((__m128i const*)data);
__mmask8 m = _mm_testn_epi16_mask(v, v);
m = _kor_mask8(m, 0x0f);
m = _kor_mask8(m, 0xf0);
v = _mm_maskz_add_epi16(m, v, v);
_mm_store_si128((__m128i*)data2, v);
}
[/code]
Code compiled using gcc 8.2 with -O3 -march=skylake-avx512 . gcc was able to
fold constant expressions and simplify masked vector add to non-masked one.
However original version of folded expression is still present in output:
[asm]
test(void*, void*):
vmovdqa64 xmm0, XMMWORD PTR [rdi]
mov eax, 15
vptestnmw k1, xmm0, xmm0
kmovb k2, eax
vpaddw xmm0, xmm0, xmm0
mov eax, -16
kmovb k3, eax
vmovaps XMMWORD PTR [rsi], xmm0
korb k0, k1, k2
korb k0, k0, k3
ret
[/asm]
clang properly cleaned it up:
[asm]
test(void*, void*): # @test(void*, void*)
vmovdqa xmm0, xmmword ptr [rdi]
vpaddw xmm0, xmm0, xmm0
vmovdqa xmmword ptr [rsi], xmm0
ret
[/asm]