Issue 89533
Summary Inverted movemasks result in redundant logic
Labels new issue
Assignees
Reporter Validark
    I wrote this `tokenize` function: (https://zig.godbolt.org/z/oYosTb1zK)

```zig
export fn tokenize(source: [*]const u8) extern struct { start: [*]const u8, end: [*]const u8 } {
    var cur = source[0..];
    const start = cur;

    while (true) {
        const V = @Vector(@bitSizeOf(usize), u8);
        const vec: V = cur[0..@sizeOf(V)].*;

       const identifier_bitstring = ~(@as(usize, @bitCast(vec == @as(V, @splat('_')))));

        cur = cur[@ctz(identifier_bitstring)..];
        if (identifier_bitstring != 0) break;
    }

    // our token span is start..end
    const end = cur;
    return .{ .start = start, .end = end };
}
```

Next I made the following change:
```diff
-       const identifier_bitstring = ~(@as(usize, @bitCast(vec == @as(V, @splat('_')))));
+       const identifier_bitstring =  (@as(usize, @bitCast(vec != @as(V, @splat('_')))));
```

Unfortunately, this results in different emit.

First version (Zen 4): 

```asm
.LCPI0_1:
 .byte   95
tokenize1:
        vpbroadcastb    zmm0, byte ptr [rip + .LCPI0_1]
        mov     rax, rdi
        mov     rdx, rdi
.LBB0_1:
        vmovdqu64       zmm1, zmmword ptr [rdx]
 mov     rcx, rdx
        vpcmpneqb       k1, zmm1, zmm0
 vpcmpeqb        k0, zmm1, zmm0 ; do the same work, but this time not inverted, so we can use jb rather than je?
        kmovq   rdx, k1
 tzcnt   rdx, rdx
        add     rdx, rcx
        kortestq k0, k0
        jb      .LBB0_1
        vzeroupper
 ret
```

Second version (Zen 4):

```asm
LCPI1_1:
 .byte   95
tokenize2:
        vpbroadcastb    zmm0, byte ptr [rip + .LCPI1_1]
        mov     rax, rdi
        mov     rdx, rdi
.LBB1_1:
        vpcmpneqb       k0, zmm0, zmmword ptr [rdx]
 mov     rcx, rdx
        kmovq   rdx, k0
        tzcnt   rdx, rdx
        add     rdx, rcx
        kortestq        k0, k0
 je      .LBB1_1
        vzeroupper
        ret
```

First version (Zen 3):

```asm
.LCPI0_1:
        .byte 95
tokenize1:
        vpbroadcastb    ymm0, byte ptr [rip + .LCPI0_1]
        mov     rax, rdi
        mov     rdx, rdi
.LBB0_1:
        mov     rcx, rdx
        vpcmpeqb        ymm2, ymm0, ymmword ptr [rcx + 32]
        vpcmpeqb        ymm1, ymm0, ymmword ptr [rdx]
        vpmovmskb       esi, ymm2
        vpmovmskb edx, ymm1
        shl     rsi, 32
        or      rsi, rdx
 mov     rdx, rsi ; preserve non-inverted rsi so we can cmp against -1 later??
        not     rdx
        tzcnt   rdx, rdx
        add rdx, rcx
        cmp     rsi, -1
        je      .LBB0_1
 vzeroupper
        ret
```

Second version (Zen 3):

```asm
LCPI1_1:
        .byte   95
tokenize2:
 vpbroadcastb    ymm0, byte ptr [rip + .LCPI1_1]
        mov     rax, rdi
        mov     rdx, rdi
.LBB1_1:
        mov     rcx, rdx
 vpcmpeqb        ymm2, ymm0, ymmword ptr [rcx + 32]
        vpcmpeqb ymm1, ymm0, ymmword ptr [rdx]
        vpmovmskb       esi, ymm2
 vpmovmskb       edx, ymm1
        not     esi
        not     edx ; do 2 not's before combining these bitstrings instead of just doing 1??
 shl     rsi, 32
        or      rsi, rdx
        tzcnt   rdx, rsi
        add     rdx, rcx
        test    rsi, rsi ; use inverted value instead of preserving the non-inverted value and doing cmp -1??
 je      .LBB1_1
        vzeroupper
 ret
```

https://zig.godbolt.org/z/oYosTb1zK
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to