================
@@ -75,25 +75,29 @@
TEST_CONSTEXPR(match_m128(_mm_blendv_ps((__m128)(__v4sf){0.0f, 1.0f, 2.0f, 3.0f}
__m128d test_mm_ceil_pd(__m128d x) {
// CHECK-LABEL: test_mm_ceil_pd
- // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.pd(<2 x double>
%{{.*}}, i32 2)
+ // CHECK %{{.*}} = call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{.*}})
return _mm_ceil_pd(x);
}
__m128 test_mm_ceil_ps(__m128 x) {
// CHECK-LABEL: test_mm_ceil_ps
- // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ps(<4 x float>
%{{.*}}, i32 2)
+ // CHECK: %{{.*}} = call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{.*}})
return _mm_ceil_ps(x);
}
__m128d test_mm_ceil_sd(__m128d x, __m128d y) {
// CHECK-LABEL: test_mm_ceil_sd
- // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.sd(<2 x double>
%{{.*}}, <2 x double> %{{.*}}, i32 2)
+ // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: %[[B:.*]] = call double @llvm.ceil.f64(double %[[A:.*]])
+ // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0
----------------
stomfaig wrote:
You're right, we do not get the same instruction. We get:
```asm
roundss xmm1, xmm1, 10
blendps xmm0, xmm1, 1
```
I can either try to implement a pattern to convert this to a single `roundss/p`
or lower directly to X86 here. Which one do you think is better?
https://github.com/llvm/llvm-project/pull/171227
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits