================
@@ -478,146 +890,232 @@ entry:
}
define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind {
-; LA32-LABEL: bcmp_eq_zero:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: ori $a2, $zero, 16
-; LA32-NEXT: bl bcmp
-; LA32-NEXT: sltui $a0, $a0, 1
-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
+; LA32-UAL-LABEL: bcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
;
-; LA64-LABEL: bcmp_eq_zero:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -16
-; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT: ori $a2, $zero, 16
-; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: sltui $a0, $a0, 1
-; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 16
-; LA64-NEXT: ret
+; LA64-UAL-LABEL: bcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
entry:
%bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16)
%ret = icmp eq i32 %bcmp, 0
ret i1 %ret
}
define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind {
-; LA32-LABEL: bcmp_lt_zero:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: ori $a2, $zero, 4
-; LA32-NEXT: bl bcmp
-; LA32-NEXT: srli.w $a0, $a0, 31
-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
+; LA32-UAL-LABEL: bcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
;
-; LA64-LABEL: bcmp_lt_zero:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: addi.d $sp, $sp, -16
-; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64-NEXT: ori $a2, $zero, 4
-; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
-; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: slti $a0, $a0, 0
-; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT: addi.d $sp, $sp, 16
-; LA64-NEXT: ret
+; LA64-UAL-LABEL: bcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: move $a0, $zero
----------------
zhaoqi5 wrote:
After `expand-memcmp` pass, the original IR is expanded and optimized to:
```
%0 = load i32, ptr %s1, align 1
%1 = load i32, ptr %s2, align 1
%2 = icmp ne i32 %0, %1
%3 = zext i1 %2 to i32
ret i1 false
```
So the result is always `false`.
And I noticed that the test `bcmp_ge_zero` always returns `true`, so seems
always assume bcmp never returns negative result? I am not sure if this is the
assumption of llvm or the misprocessing of this pass. I tried below using
`-O0`(will call bcmp actually):
```
#include <stdio.h>
#include <string.h>
int main () {
char *s0 = "0000000";
char *s1 = "1111111";
printf("= : %d\n", bcmp(s1, s1, 7));
printf("> : %d\n", bcmp(s1, s0, 7));
printf("< : %d\n", bcmp(s0, s1, 7));
return 0;
}
```
The result is:
```
= : 0
> : 1
< : -1
```
https://github.com/llvm/llvm-project/pull/166526
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits