https://bugs.llvm.org/show_bug.cgi?id=41248
Bug ID: 41248
Summary: Use paddusb for saturated add/sub
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: [email protected]
Reporter: [email protected]
CC: [email protected], [email protected],
[email protected], [email protected]
Code from other PR:
void add_clamp_2(unsigned char *ptr) {
for (unsigned i = 0; i != 16; ++i) {
unsigned char s = *ptr;
*ptr++ = (s + HI) > 255 ? 255 : (s + HI);
}
}
void add_clamp_3(unsigned char *ptr) {
for (unsigned i = 0; i != 16; ++i) {
unsigned char s = *ptr;
*ptr++ = (s + HI) >= 255 ? 255 : (s + HI);
}
}
add_clamp_2(unsigned char*): # @add_clamp_2(unsigned
char*)
movdqu xmm0, xmmword ptr [rdi]
pminub xmm0, xmmword ptr [rip + .LCPI2_0]
paddb xmm0, xmmword ptr [rip + .LCPI2_1]
movdqu xmmword ptr [rdi], xmm0
ret
add_clamp_3(unsigned char*): # @add_clamp_3(unsigned
char*)
movdqu xmm0, xmmword ptr [rdi]
pminub xmm0, xmmword ptr [rip + .LCPI3_0]
paddb xmm0, xmmword ptr [rip + .LCPI3_1]
movdqu xmmword ptr [rdi], xmm0
ret
ICC 19 uses paddusb here
add_clamp_2(unsigned char*):
movdqu xmm0, XMMWORD PTR [rdi] #22.29
paddusb xmm0, XMMWORD PTR .L_2il0floatpacket.1[rip] #22.29
movdqu XMMWORD PTR [rdi], xmm0 #22.10
ret #24.1
add_clamp_3(unsigned char*):
movdqu xmm0, XMMWORD PTR [rdi] #29.30
paddusb xmm0, XMMWORD PTR .L_2il0floatpacket.1[rip] #29.30
movdqu XMMWORD PTR [rdi], xmm0 #29.10
ret
Maybe use paddusb as well?
--
You are receiving this mail because:
You are on the CC list for the bug._______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs