https://bugs.llvm.org/show_bug.cgi?id=41248

            Bug ID: 41248
           Summary: Use paddusb for saturated add/sub
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: [email protected]
          Reporter: [email protected]
                CC: [email protected], [email protected],
                    [email protected], [email protected]

Code from other PR:

void add_clamp_2(unsigned char *ptr) {
    for (unsigned i = 0; i != 16; ++i) {
        unsigned char s = *ptr;
        *ptr++ = (s + HI) > 255 ? 255 : (s + HI);
    }
}

void add_clamp_3(unsigned char *ptr) {
    for (unsigned i = 0; i != 16; ++i) {
        unsigned char s = *ptr;
        *ptr++ = (s + HI) >= 255 ? 255 : (s + HI);
    }
}

add_clamp_2(unsigned char*):                      # @add_clamp_2(unsigned
char*)
        movdqu  xmm0, xmmword ptr [rdi]
        pminub  xmm0, xmmword ptr [rip + .LCPI2_0]
        paddb   xmm0, xmmword ptr [rip + .LCPI2_1]
        movdqu  xmmword ptr [rdi], xmm0
        ret

add_clamp_3(unsigned char*):                      # @add_clamp_3(unsigned
char*)
        movdqu  xmm0, xmmword ptr [rdi]
        pminub  xmm0, xmmword ptr [rip + .LCPI3_0]
        paddb   xmm0, xmmword ptr [rip + .LCPI3_1]
        movdqu  xmmword ptr [rdi], xmm0
        ret

ICC 19 uses paddusb here
add_clamp_2(unsigned char*):
        movdqu    xmm0, XMMWORD PTR [rdi]                       #22.29
        paddusb   xmm0, XMMWORD PTR .L_2il0floatpacket.1[rip]   #22.29
        movdqu    XMMWORD PTR [rdi], xmm0                       #22.10
        ret                                                     #24.1
add_clamp_3(unsigned char*):
        movdqu    xmm0, XMMWORD PTR [rdi]                       #29.30
        paddusb   xmm0, XMMWORD PTR .L_2il0floatpacket.1[rip]   #29.30
        movdqu    XMMWORD PTR [rdi], xmm0                       #29.10
        ret   

Maybe use paddusb as well?

-- 
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to