| Issue |
76556
|
| Summary |
[clang][ARM] -O1 vectorize will get wrong result
|
| Labels |
clang
|
| Assignees |
|
| Reporter |
CoTinker
|
demo.c
```
#include<stdio.h>
#include<arm_neon.h>
typedef unsigned int __uint32_t;
typedef float float32_t;
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
uint32x2_t vector_res_uint32x2;
float32x2_t vector_float32x2;
static uint32_t result_uint32x2[2];
uint32_t expected_uint32x2 [] = { 0x0, 0x5 };
float32_t buffer_float32x2 [] = { (float32_t)-16, (float32_t)-15, };
int main()
{
vector_float32x2 = __extension__ ({
float32x2_t __ret;
__ret = (float32x2_t) __builtin_neon_vld1_v(buffer_float32x2, 9);
__ret;
});
vector_float32x2 = __extension__ ({
float32x2_t __ret;
float32_t __s0 = -15.3f;
float32x2_t __s1 = vector_float32x2;
__ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__s1, 0);
__ret;
});
vector_float32x2 = __extension__ ({
float32x2_t __ret;
float32_t __s0 = 5.3f;
float32x2_t __s1 = vector_float32x2;
__ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__s1, 1);
__ret;
});
// printf("%f %f\n", vector_float32x2[0], vector_float32x2[1]);
vector_res_uint32x2 = vcvt_u32_f32(vector_float32x2);
__extension__ ({
uint32x2_t __s1 = vector_res_uint32x2;
__builtin_neon_vst1_v(result_uint32x2, (int8x8_t)__s1, 18);
});
{
int i;
for(i=0; i<2 ; i++) {
if (result_uint32x2[i] != expected_uint32x2[i]) {
fprintf(stderr, "ERROR\n");
fprintf(stderr, "%d : %d\n",i,result_uint32x2[i]);
}
}
}
}
```
compile command:
```
% clang demo.c -static -lm -o test -O1 -march=armv8-a
% ./test
ERROR
0 : 5
```
```
% clang demo.c -static -lm -o test -O0 -march=armv8-a
% ./test
```
I find the optimize pass that cause this problem is `InlinerPass`
https://godbolt.org/z/hfr9sPhvr
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs