pcc wrote:

I'm fixing the code generation for the test cases that I'm adding 
(inhibit-zext-constant-hoist.ll) which were all extracted from a build of a 
large internal program built with CFI. Previously f1 looked like this where 
align was hoisted:
```
f1:                                     # @f1
        .cfi_startproc
# %bb.0:
        movl    $__typeid__ZTS1S_align, %eax
        movzbl  %al, %ecx
        movb    $64, %al
        subb    %cl, %al
        movzbl  %al, %eax
        testl   %edi, %edi
        je      .LBB0_3
# %bb.1:
        movq    (%rsi), %r8
        movl    $__typeid__ZTS1S_global_addr, %edx
        movq    %r8, %rdi
        subq    %rdx, %rdi
        movq    %rdi, %rdx
                                        # kill: def $cl killed $cl killed $rcx
        shrq    %cl, %rdx
        movl    %eax, %ecx
        shlq    %cl, %rdi
        orq     %rdx, %rdi
        cmpq    $__typeid__ZTS1S_size_m1@ABS8, %rdi
        jbe     .LBB0_4
.LBB0_2:
        ud1l    2(%eax), %eax
.LBB0_3:
        movq    (%rdx), %r8
        movl    $__typeid__ZTS1S_global_addr, %esi
        movq    %r8, %rdi
        subq    %rsi, %rdi
        movq    %rdi, %rsi
                                        # kill: def $cl killed $cl killed $rcx
        shrq    %cl, %rsi
        movl    %eax, %ecx
        shlq    %cl, %rdi
        orq     %rsi, %rdi
        cmpq    $__typeid__ZTS1S_size_m1@ABS8, %rdi
        movq    %rdx, %rsi
        ja      .LBB0_2
.LBB0_4:
        movq    %rsi, %rdi
        jmpq    *(%r8)                          # TAILCALL
.Lfunc_end0:
        .size   f1, .Lfunc_end0-f1
        .cfi_endproc
```
Now f1 looks like this:
```
f1:                                     # @f1
        .cfi_startproc
# %bb.0:
        testl   %edi, %edi
        je      .LBB0_3
# %bb.1:
        movq    (%rsi), %rax
        movl    $__typeid__ZTS1S_global_addr, %ecx
        movq    %rax, %rdx
        subq    %rcx, %rdx
        rorq    $__typeid__ZTS1S_align, %rdx
        cmpq    $__typeid__ZTS1S_size_m1@ABS8, %rdx
        jbe     .LBB0_4
.LBB0_2:
        ud1l    2(%eax), %eax
.LBB0_3:
        movq    (%rdx), %rax
        movl    $__typeid__ZTS1S_global_addr, %ecx
        movq    %rax, %rsi
        subq    %rcx, %rsi
        rorq    $__typeid__ZTS1S_align, %rsi
        cmpq    $__typeid__ZTS1S_size_m1@ABS8, %rsi
        movq    %rdx, %rsi
        ja      .LBB0_2
.LBB0_4:
        movq    %rsi, %rdi
        jmpq    *(%rax)                         # TAILCALL
.Lfunc_end0:
        .size   f1, .Lfunc_end0-f1
        .cfi_endproc
```
The other cases look similar before my change.

The poor codegen issue was introduced (I believe) by 
https://github.com/llvm/llvm-project/pull/71040 which removed the zext 
ConstantExprs that LowerTypeTests was using to keep everything in the same 
basic block for matching.

I think that because it's the zext being hoisted and not the shifts it wouldn't 
make a difference to use fshl/fshr but I can check.

https://github.com/llvm/llvm-project/pull/141326
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to