Hi t.p.northover,
Hi Tim and other reviewers,
In the front end CGBuiltin.cpp, we generate incorrect code for scalar right
shift uint64_t by 64, which is incorrectly transferred into "LSR X0, #63".
So when we try to right shift 0xf000000000000000 by 64, the result is 0x1, but
it should be 0x0.
As according to LLVM reference, the right shift amount of i64 should be [0,
63], we can't generate such IR like "lshr i64 %tmp, 64". To fix this problem,
this patch just return 0 result when the shift amount is 64.
Ask for code review.
Thanks,
-Hao
http://reviews.llvm.org/D3755
Files:
lib/CodeGen/CGBuiltin.cpp
test/CodeGen/aarch64-neon-intrinsics.c
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -5580,38 +5580,41 @@
case NEON::BI__builtin_neon_vshld_n_u64: {
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
return Builder.CreateShl(
- Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
- Amt->getZExtValue())),
- "vshr_n");
+ Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
}
case NEON::BI__builtin_neon_vshrd_n_s64: {
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
return Builder.CreateAShr(
Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
Amt->getZExtValue())),
- "vshr_n");
+ "shrd_n");
}
case NEON::BI__builtin_neon_vshrd_n_u64: {
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
- return Builder.CreateLShr(
- Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
- Amt->getZExtValue())),
- "vshr_n");
+ uint64_t ShiftAmt = Amt->getZExtValue();
+ // Right-shifting an unsigned value by its size yields 0.
+ if (ShiftAmt == 64)
+ return ConstantInt::get(Int64Ty, 0);
+ return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
+ "shrd_n");
}
case NEON::BI__builtin_neon_vsrad_n_s64: {
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
Ops[1] = Builder.CreateAShr(
Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
Amt->getZExtValue())),
- "vshr_n");
+ "shrd_n");
return Builder.CreateAdd(Ops[0], Ops[1]);
}
case NEON::BI__builtin_neon_vsrad_n_u64: {
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
- Ops[1] = Builder.CreateLShr(
- Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
- Amt->getZExtValue())),
- "vshr_n");
+ uint64_t ShiftAmt = Amt->getZExtValue();
+ // Right-shifting an unsigned value by its size yields 0.
+ // As Op + 0 = Op, return Ops[0] directly.
+ if (ShiftAmt == 64)
+ return Ops[0];
+ Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
+ "shrd_n");
return Builder.CreateAdd(Ops[0], Ops[1]);
}
case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
Index: test/CodeGen/aarch64-neon-intrinsics.c
===================================================================
--- test/CodeGen/aarch64-neon-intrinsics.c
+++ test/CodeGen/aarch64-neon-intrinsics.c
@@ -8572,11 +8572,24 @@
}
uint64_t test_vshrd_n_u64(uint64_t a) {
-// CHECK-LABEL: test_vshrd_n_u64
-// CHECK: {{ushr d[0-9]+, d[0-9]+, #64|lsr x0, x0, #63}}
+// CHECK-AARCH64-LABEL: test_vshrd_n_u64
+// CHECK-AARCH64: {{ushr d[0-9]+, d[0-9]+, #64}}
+
+// CHECK-ARM64-LABEL: test_vshrd_n_u64
+// CHECK-ARM64: mov x0, xzr
return (uint64_t)vshrd_n_u64(a, 64);
}
+uint64_t test_vshrd_n_u64_2() {
+// CHECK-AARCH64-LABEL: test_vshrd_n_u64_2
+// CHECK-AARCH64: {{ushr d[0-9]+, d[0-9]+, #64}}
+
+// CHECK-ARM64-LABEL: test_vshrd_n_u64_2
+// CHECK-ARM64: mov x0, xzr
+ uint64_t a = UINT64_C(0xf000000000000000);
+ return vshrd_n_u64(a, 64);
+}
+
uint64x1_t test_vshr_n_u64(uint64x1_t a) {
// CHECK-LABEL: test_vshr_n_u64
// CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #1
@@ -8625,6 +8638,15 @@
return (uint64_t)vsrad_n_u64(a, b, 63);
}
+uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
+// CHECK-AArch64-LABEL: test_vsrad_n_u64_2
+// CHECK-AARCH64: {{usra d[0-9]+, d[0-9]+, #64}}
+
+// CHECK-ARM64-LABEL: test_vsrad_n_u64_2
+// CHECK-ARM64-NOT: add
+ return (uint64_t)vsrad_n_u64(a, b, 64);
+}
+
uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
// CHECK-LABEL: test_vsra_n_u64
// CHECK: usra d{{[0-9]+}}, d{{[0-9]+}}, #1
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits