https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120429
Bug ID: 120429 Summary: pcmpeqd isn't used for all 1s in *movv2si_internal Product: gcc Version: 16.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: hjl.tools at gmail dot com CC: liuhongt at gcc dot gnu.org, ubizjak at gmail dot com Target Milestone: --- [hjl@gnu-zen4-1 pr117839]$ cat dl-3.c struct __pthread_mutex_s { int __lock; unsigned int __count; int __owner; unsigned int __nusers; int __kind; short __spins; short __elision; void *p[2]; }; typedef union { struct __pthread_mutex_s __data; char __size[40]; long int __align; } pthread_mutex_t; typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t; void foo (__rtld_lock_recursive_t *lock, int i) { lock[i] = (__rtld_lock_recursive_t) {{ { -1, -1, -1, -1, 1, -1, -1, { ((void *)-1) , ((void *)-1) } } }}; } [hjl@gnu-zen4-1 pr117839]$ /export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/ -O2 -march=x86-64 -S dl-3.c -m32 [hjl@gnu-zen4-1 pr117839]$ cat dl-3.s .file "dl-3.c" .text .p2align 4 .globl foo .type foo, @function foo: .LFB0: .cfi_startproc movl 8(%esp), %eax movl 4(%esp), %edx movq .LC1, %xmm0 <<< pcmpeqd %xmm0, %xmm0 should be used instead leal (%eax,%eax,4), %eax leal (%edx,%eax,8), %eax movl $-1, (%eax) movl $-1, 4(%eax) movl $-1, 8(%eax) movl $-1, 12(%eax) movl $1, 16(%eax) movl $-1, 20(%eax) movq %xmm0, 24(%eax) ret .cfi_endproc .LFE0: .size foo, .-foo .section .rodata.cst8,"aM",@progbits,8 .align 8 .LC1: .long -1 .long -1 .ident "GCC: (GNU) 16.0.0 20250524 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-zen4-1 pr117839]$