================
@@ -478,146 +890,232 @@ entry:
 }
 
 define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind {
-; LA32-LABEL: bcmp_eq_zero:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 16
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    sltui $a0, $a0, 1
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_eq_zero:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    ld.w $a2, $a0, 0
+; LA32-UAL-NEXT:    ld.w $a3, $a1, 0
+; LA32-UAL-NEXT:    ld.w $a4, $a0, 4
+; LA32-UAL-NEXT:    ld.w $a5, $a1, 4
+; LA32-UAL-NEXT:    ld.w $a6, $a0, 8
+; LA32-UAL-NEXT:    ld.w $a7, $a1, 8
+; LA32-UAL-NEXT:    ld.w $a0, $a0, 12
+; LA32-UAL-NEXT:    ld.w $a1, $a1, 12
+; LA32-UAL-NEXT:    xor $a2, $a2, $a3
+; LA32-UAL-NEXT:    xor $a3, $a4, $a5
+; LA32-UAL-NEXT:    xor $a4, $a6, $a7
+; LA32-UAL-NEXT:    xor $a0, $a0, $a1
+; LA32-UAL-NEXT:    or $a1, $a2, $a3
+; LA32-UAL-NEXT:    or $a0, $a4, $a0
+; LA32-UAL-NEXT:    or $a0, $a1, $a0
+; LA32-UAL-NEXT:    sltui $a0, $a0, 1
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_eq_zero:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 16
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    sltui $a0, $a0, 1
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_eq_zero:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    ld.d $a2, $a0, 0
+; LA64-UAL-NEXT:    ld.d $a3, $a1, 0
+; LA64-UAL-NEXT:    ld.d $a0, $a0, 8
+; LA64-UAL-NEXT:    ld.d $a1, $a1, 8
+; LA64-UAL-NEXT:    xor $a2, $a2, $a3
+; LA64-UAL-NEXT:    xor $a0, $a0, $a1
+; LA64-UAL-NEXT:    or $a0, $a2, $a0
+; LA64-UAL-NEXT:    sltui $a0, $a0, 1
+; LA64-UAL-NEXT:    ret
+;
+; LA32-NUAL-LABEL: bcmp_eq_zero:
+; LA32-NUAL:       # %bb.0: # %entry
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT:    ori $a2, $zero, 16
+; LA32-NUAL-NEXT:    bl bcmp
+; LA32-NUAL-NEXT:    sltui $a0, $a0, 1
+; LA32-NUAL-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT:    addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT:    ret
+;
+; LA64-NUAL-LABEL: bcmp_eq_zero:
+; LA64-NUAL:       # %bb.0: # %entry
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT:    ori $a2, $zero, 16
+; LA64-NUAL-NEXT:    pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT:    jirl $ra, $ra, 0
+; LA64-NUAL-NEXT:    sltui $a0, $a0, 1
+; LA64-NUAL-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT:    addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT:    ret
 entry:
   %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16)
   %ret = icmp eq i32 %bcmp, 0
   ret i1 %ret
 }
 
 define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind {
-; LA32-LABEL: bcmp_lt_zero:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ori $a2, $zero, 4
-; LA32-NEXT:    bl bcmp
-; LA32-NEXT:    srli.w $a0, $a0, 31
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
+; LA32-UAL-LABEL: bcmp_lt_zero:
+; LA32-UAL:       # %bb.0: # %entry
+; LA32-UAL-NEXT:    move $a0, $zero
+; LA32-UAL-NEXT:    ret
 ;
-; LA64-LABEL: bcmp_lt_zero:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    addi.d $sp, $sp, -16
-; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT:    ori $a2, $zero, 4
-; LA64-NEXT:    pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT:    jirl $ra, $ra, 0
-; LA64-NEXT:    slti $a0, $a0, 0
-; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 16
-; LA64-NEXT:    ret
+; LA64-UAL-LABEL: bcmp_lt_zero:
+; LA64-UAL:       # %bb.0: # %entry
+; LA64-UAL-NEXT:    move $a0, $zero
----------------
zhaoqi5 wrote:

After `expand-memcmp` pass, the original IR is expanded and optimized to:

```
  %0 = load i32, ptr %s1, align 1
  %1 = load i32, ptr %s2, align 1
  %2 = icmp ne i32 %0, %1
  %3 = zext i1 %2 to i32
  ret i1 false
```

So the result is always `false`.

And I noticed that the test `bcmp_ge_zero` always returns `true`, so seems 
always assume bcmp never returns negative result? I am not sure if this is the 
assumption of llvm or the misprocessing of this pass. I tried below using 
`-O0`(will call bcmp actually):

```
#include <stdio.h>
#include <string.h>
int main () {
  char *s0 = "0000000";
  char *s1 = "1111111";
  printf("= : %d\n", bcmp(s1, s1, 7));
  printf("> : %d\n", bcmp(s1, s0, 7));
  printf("< : %d\n", bcmp(s0, s1, 7));
  return 0;
}
```

The result is:

```
= : 0
> : 1
< : -1
```

https://github.com/llvm/llvm-project/pull/166526
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to