github-actions[bot] wrote: <!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning: <details> <summary> You can test this locally with the following command: </summary> ``````````bash git-clang-format --diff HEAD~1 HEAD --extensions h,cpp,cl -- clang/test/Driver/opencl-libclc.cl libclc/test/geometric/cross.cl libclc/test/integer/add_sat.cl libclc/test/integer/sub_sat.cl libclc/test/math/cos.cl libclc/test/math/fabs.cl libclc/test/math/rsqrt.cl libclc/test/misc/as_type.cl libclc/test/misc/convert.cl libclc/test/work-item/get_group_id.cl clang/include/clang/Driver/CommonArgs.h clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/CommonArgs.cpp `````````` </details> <details> <summary> View the diff from clang-format here. </summary> ``````````diff diff --git a/libclc/test/geometric/cross.cl b/libclc/test/geometric/cross.cl index 4cb8c53be..1a8462489 100644 --- a/libclc/test/geometric/cross.cl +++ b/libclc/test/geometric/cross.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,38 +8,44 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s // CHECK-LABEL: define protected amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]] -// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[F]], i64 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 -// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 -// CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 -// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 -// CHECK-NEXT: [[TMP6:%.*]] = fneg float [[TMP5]] -// CHECK-NEXT: [[NEG_I_I:%.*]] = fmul float [[TMP4]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP2]], float [[TMP3]], float [[NEG_I_I]]) +// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) +// local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] +// !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] +// !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual +// [[META10:![0-9]+]] { CHECK-NEXT: [[ENTRY:.*:]] CHECK-NEXT: [[TMP0:%.*]] = +// load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr +// addrspace(1) [[F]], i64 16 CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, +// ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]] CHECK-NEXT: +// [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 CHECK-NEXT: +// [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 CHECK-NEXT: +// [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 CHECK-NEXT: +// [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 CHECK-NEXT: +// [[TMP6:%.*]] = fneg float [[TMP5]] CHECK-NEXT: [[NEG_I_I:%.*]] = fmul +// float [[TMP4]], [[TMP6]] CHECK-NEXT: [[TMP7:%.*]] = tail call float +// @llvm.fmuladd.f32(float [[TMP2]], float [[TMP3]], float [[NEG_I_I]]) // CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 // CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 // CHECK-NEXT: [[TMP10:%.*]] = fneg float [[TMP3]] // CHECK-NEXT: [[NEG3_I_I:%.*]] = fmul float [[TMP9]], [[TMP10]] -// CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP4]], float [[TMP8]], float [[NEG3_I_I]]) -// CHECK-NEXT: [[TMP12:%.*]] = fneg float [[TMP8]] -// CHECK-NEXT: [[NEG6_I_I:%.*]] = fmul float [[TMP2]], [[TMP12]] -// CHECK-NEXT: [[TMP13:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP9]], float [[TMP5]], float [[NEG6_I_I]]) -// CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float [[TMP7]], i64 0 -// CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 1 -// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP13]], i64 2 -// CHECK-NEXT: store <4 x float> [[VECINIT8_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: ret void +// CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float +// [[TMP4]], float [[TMP8]], float [[NEG3_I_I]]) CHECK-NEXT: [[TMP12:%.*]] = +// fneg float [[TMP8]] CHECK-NEXT: [[NEG6_I_I:%.*]] = fmul float [[TMP2]], +// [[TMP12]] CHECK-NEXT: [[TMP13:%.*]] = tail call float +// @llvm.fmuladd.f32(float [[TMP9]], float [[TMP5]], float [[NEG6_I_I]]) +// CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> <float poison, float +// poison, float poison, float 0.000000e+00>, float [[TMP7]], i64 0 CHECK-NEXT: +// [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 1 +// CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <4 x float> [[TMP15]], +// float [[TMP13]], i64 2 CHECK-NEXT: store <4 x float> [[VECINIT8_I_I]], ptr +// addrspace(1) [[F]], align 16, !tbaa [[TBAA11]] CHECK-NEXT: ret void // -__kernel void foo(__global float4 *f) { - *f = cross(f[0], f[1]); -} +__kernel void foo(__global float4 *f) { *f = cross(f[0], f[1]); } //. // CHECK: [[META6]] = !{i32 1} // CHECK: [[META7]] = !{!"none"} diff --git a/libclc/test/integer/add_sat.cl b/libclc/test/integer/add_sat.cl index ef5bf77b6..8c16b2371 100644 --- a/libclc/test/integer/add_sat.cl +++ b/libclc/test/integer/add_sat.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,15 +8,23 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s // CHECK-LABEL: define protected amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], ptr addrspace(1) noundef readonly align 1 captures(none) [[B:%.*]], ptr addrspace(1) noundef readonly align 1 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(1) [[B]], align 1, !tbaa [[TBAA10:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(1) [[C]], align 1, !tbaa [[TBAA10]] -// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.sadd.sat.i8(i8 [[TMP0]], i8 [[TMP1]]) -// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]] +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) +// initializes((0, 1)) [[A:%.*]], ptr addrspace(1) noundef readonly align 1 +// captures(none) [[B:%.*]], ptr addrspace(1) noundef readonly align 1 +// captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] +// !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual +// [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type +// [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(1) [[B]], align 1, !tbaa +// [[TBAA10:![0-9]+]] CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(1) +// [[C]], align 1, !tbaa [[TBAA10]] CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail +// call noundef i8 @llvm.sadd.sat.i8(i8 [[TMP0]], i8 [[TMP1]]) CHECK-NEXT: store +// i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]] // CHECK-NEXT: ret void // __kernel void foo(__global char *a, __global char *b, __global char *c) { diff --git a/libclc/test/integer/sub_sat.cl b/libclc/test/integer/sub_sat.cl index 7c3f0a3aa..5da7c6f7c 100644 --- a/libclc/test/integer/sub_sat.cl +++ b/libclc/test/integer/sub_sat.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,13 +8,20 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s // CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_char( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) +// initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) +// local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] +// !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] +// !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.ssub.sat.i8(i8 [[X]], i8 [[Y]]) -// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 +// @llvm.ssub.sat.i8(i8 [[X]], i8 [[Y]]) CHECK-NEXT: store i8 +// [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10:![0-9]+]] // CHECK-NEXT: ret void // __kernel void test_subsat_char(__global char *a, char x, char y) { @@ -22,10 +30,15 @@ __kernel void test_subsat_char(__global char *a, char x, char y) { } // CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_uchar( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META9]] { +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) +// initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) +// local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] +// !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META13:![0-9]+]] +// !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META9]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.usub.sat.i8(i8 [[X]], i8 [[Y]]) -// CHECK-NEXT: store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i8 +// @llvm.usub.sat.i8(i8 [[X]], i8 [[Y]]) CHECK-NEXT: store i8 +// [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]] // CHECK-NEXT: ret void // __kernel void test_subsat_uchar(__global uchar *a, uchar x, uchar y) { @@ -34,10 +47,15 @@ __kernel void test_subsat_uchar(__global uchar *a, uchar x, uchar y) { } // CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_long( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META9]] { +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) +// initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) +// local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] +// !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META14:![0-9]+]] +// !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META9]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i64 @llvm.ssub.sat.i64(i64 [[X]], i64 [[Y]]) -// CHECK-NEXT: store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15:![0-9]+]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i64 +// @llvm.ssub.sat.i64(i64 [[X]], i64 [[Y]]) CHECK-NEXT: store i64 +// [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15:![0-9]+]] // CHECK-NEXT: ret void // __kernel void test_subsat_long(__global long *a, long x, long y) { @@ -46,10 +64,15 @@ __kernel void test_subsat_long(__global long *a, long x, long y) { } // CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_ulong( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META9]] { +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) +// initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) +// local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] +// !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META17:![0-9]+]] +// !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META9]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i64 @llvm.usub.sat.i64(i64 [[X]], i64 [[Y]]) -// CHECK-NEXT: store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15]] +// CHECK-NEXT: [[ELT_SAT_I_I:%.*]] = tail call noundef i64 +// @llvm.usub.sat.i64(i64 [[X]], i64 [[Y]]) CHECK-NEXT: store i64 +// [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15]] // CHECK-NEXT: ret void // __kernel void test_subsat_ulong(__global ulong *a, ulong x, ulong y) { diff --git a/libclc/test/math/cos.cl b/libclc/test/math/cos.cl index 79272cce4..4a95787e8 100644 --- a/libclc/test/math/cos.cl +++ b/libclc/test/math/cos.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,283 +8,393 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s // CHECK-LABEL: define protected amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]] -// CHECK-NEXT: [[ELT_ABS_I_I_I:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) -// CHECK-NEXT: [[CMP_I_I:%.*]] = fcmp olt <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x4160000000000000) -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[ELT_ABS_I_I_I]], <4 x float> splat (float 0x3FE45F3060000000), <4 x float> splat (float 5.000000e-01)) -// CHECK-NEXT: [[ELT_TRUNC_I_I:%.*]] = tail call noundef <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]]) -// CHECK-NEXT: [[MUL_I30_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3FF921FB40000000) -// CHECK-NEXT: [[FNEG_I31_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I30_I_I_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[FNEG_I31_I_I_I_I]]) -// CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], [[MUL_I30_I_I_I_I]] -// CHECK-NEXT: [[SUB2_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], [[SUB_I_I_I_I]] -// CHECK-NEXT: [[SUB3_I_I_I_I:%.*]] = fsub <4 x float> [[SUB2_I_I_I_I]], [[MUL_I30_I_I_I_I]] -// CHECK-NEXT: [[SUB4_I_I_I_I:%.*]] = fsub <4 x float> [[SUB3_I_I_I_I]], [[TMP2]] -// CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd <4 x float> [[SUB_I_I_I_I]], [[SUB4_I_I_I_I]] -// CHECK-NEXT: [[MUL_I27_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3E74442D00000000) -// CHECK-NEXT: [[FNEG_I28_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I27_I_I_I_I]] -// CHECK-NEXT: [[TMP3:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x float> [[FNEG_I28_I_I_I_I]]) -// CHECK-NEXT: [[SUB5_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[MUL_I27_I_I_I_I]] -// CHECK-NEXT: [[SUB6_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[SUB5_I_I_I_I]] -// CHECK-NEXT: [[SUB7_I_I_I_I:%.*]] = fsub <4 x float> [[SUB6_I_I_I_I]], [[MUL_I27_I_I_I_I]] -// CHECK-NEXT: [[SUB8_I_I_I_I:%.*]] = fsub <4 x float> [[SUB7_I_I_I_I]], [[TMP3]] -// CHECK-NEXT: [[ADD9_I_I_I_I:%.*]] = fadd <4 x float> [[SUB5_I_I_I_I]], [[SUB8_I_I_I_I]] -// CHECK-NEXT: [[MUL_I_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3CF8469880000000) -// CHECK-NEXT: [[FNEG_I_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I_I_I_I_I]] -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3CF8469880000000), <4 x float> [[FNEG_I_I_I_I_I]]) -// CHECK-NEXT: [[SUB10_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[MUL_I_I_I_I_I]] -// CHECK-NEXT: [[SUB11_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[SUB10_I_I_I_I]] -// CHECK-NEXT: [[SUB12_I_I_I_I:%.*]] = fsub <4 x float> [[SUB11_I_I_I_I]], [[MUL_I_I_I_I_I]] -// CHECK-NEXT: [[ADD13_I_I_I_I:%.*]] = fadd <4 x float> [[SUB10_I_I_I_I]], [[SUB12_I_I_I_I]] -// CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <4 x float> [[TMP4]] -// CHECK-NEXT: [[CONV_I_I_I:%.*]] = fptosi <4 x float> [[ELT_TRUNC_I_I]] to <4 x i32> -// CHECK-NEXT: [[ASTYPE_I_I_I:%.*]] = bitcast <4 x float> [[ELT_ABS_I_I_I]] to <4 x i32> -// CHECK-NEXT: [[SHR_I_I_I:%.*]] = lshr <4 x i32> [[ASTYPE_I_I_I]], splat (i32 23) -// CHECK-NEXT: [[AND_I11_I_I:%.*]] = and <4 x i32> [[ASTYPE_I_I_I]], splat (i32 8388607) -// CHECK-NEXT: [[OR_I_I_I:%.*]] = or disjoint <4 x i32> [[AND_I11_I_I]], splat (i32 8388608) -// CHECK-NEXT: [[MUL_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -28220501) -// CHECK-NEXT: [[CONV_I1_I27_I_I:%.*]] = zext nneg <4 x i32> [[OR_I_I_I]] to <4 x i64> -// CHECK-NEXT: [[MUL_I28_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4266746795) -// CHECK-NEXT: [[SHR_I29_I_I:%.*]] = lshr <4 x i64> [[MUL_I28_I_I]], splat (i64 32) -// CHECK-NEXT: [[CONV_I2_I30_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I29_I_I]] to <4 x i32> -// CHECK-NEXT: [[MUL2_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1011060801) -// CHECK-NEXT: [[ADD_I_I_I:%.*]] = add <4 x i32> [[MUL2_I_I_I]], [[CONV_I2_I30_I_I]] -// CHECK-NEXT: [[MUL_I24_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1011060801) -// CHECK-NEXT: [[SHR_I25_I_I:%.*]] = lshr <4 x i64> [[MUL_I24_I_I]], splat (i64 32) -// CHECK-NEXT: [[CONV_I2_I26_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I25_I_I]] to <4 x i32> -// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD_I_I_I]], [[CONV_I2_I30_I_I]] -// CHECK-NEXT: [[SEXT_I_I1_I:%.*]] = zext <4 x i1> [[CMP_I_I_I]] to <4 x i32> -// CHECK-NEXT: [[ADD5_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT_I_I1_I]], [[CONV_I2_I26_I_I]] -// CHECK-NEXT: [[MUL6_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -614296167) -// CHECK-NEXT: [[ADD7_I_I_I:%.*]] = add <4 x i32> [[ADD5_I_I_I]], [[MUL6_I_I_I]] -// CHECK-NEXT: [[MUL_I20_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 3680671129) -// CHECK-NEXT: [[SHR_I21_I_I:%.*]] = lshr <4 x i64> [[MUL_I20_I_I]], splat (i64 32) -// CHECK-NEXT: [[CONV_I2_I22_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I21_I_I]] to <4 x i32> -// CHECK-NEXT: [[CMP9_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD7_I_I_I]], [[ADD5_I_I_I]] -// CHECK-NEXT: [[SEXT10_I_I_I:%.*]] = zext <4 x i1> [[CMP9_I_I_I]] to <4 x i32> -// CHECK-NEXT: [[ADD13_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT10_I_I_I]], [[CONV_I2_I22_I_I]] -// CHECK-NEXT: [[MUL14_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -181084736) -// CHECK-NEXT: [[ADD15_I_I_I:%.*]] = add <4 x i32> [[ADD13_I_I_I]], [[MUL14_I_I_I]] -// CHECK-NEXT: [[MUL_I16_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4113882560) -// CHECK-NEXT: [[SHR_I17_I_I:%.*]] = lshr <4 x i64> [[MUL_I16_I_I]], splat (i64 32) -// CHECK-NEXT: [[CONV_I2_I18_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I17_I_I]] to <4 x i32> -// CHECK-NEXT: [[CMP17_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD15_I_I_I]], [[ADD13_I_I_I]] -// CHECK-NEXT: [[SEXT18_I_I_I:%.*]] = zext <4 x i1> [[CMP17_I_I_I]] to <4 x i32> -// CHECK-NEXT: [[ADD21_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT18_I_I_I]], [[CONV_I2_I18_I_I]] -// CHECK-NEXT: [[MUL22_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -64530479) -// CHECK-NEXT: [[ADD23_I_I_I:%.*]] = add <4 x i32> [[ADD21_I_I_I]], [[MUL22_I_I_I]] -// CHECK-NEXT: [[MUL_I12_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4230436817) -// CHECK-NEXT: [[SHR_I13_I_I:%.*]] = lshr <4 x i64> [[MUL_I12_I_I]], splat (i64 32) -// CHECK-NEXT: [[CONV_I2_I14_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I13_I_I]] to <4 x i32> -// CHECK-NEXT: [[CMP25_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD23_I_I_I]], [[ADD21_I_I_I]] -// CHECK-NEXT: [[SEXT26_I_I_I:%.*]] = zext <4 x i1> [[CMP25_I_I_I]] to <4 x i32> -// CHECK-NEXT: [[ADD29_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT26_I_I_I]], [[CONV_I2_I14_I_I]] -// CHECK-NEXT: [[MUL30_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1313084713) -// CHECK-NEXT: [[ADD31_I_I_I:%.*]] = add <4 x i32> [[ADD29_I_I_I]], [[MUL30_I_I_I]] -// CHECK-NEXT: [[MUL_I8_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1313084713) -// CHECK-NEXT: [[SHR_I9_I_I:%.*]] = lshr <4 x i64> [[MUL_I8_I_I]], splat (i64 32) -// CHECK-NEXT: [[CONV_I2_I10_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I9_I_I]] to <4 x i32> -// CHECK-NEXT: [[CMP33_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD31_I_I_I]], [[ADD29_I_I_I]] -// CHECK-NEXT: [[SEXT34_I_I_I:%.*]] = zext <4 x i1> [[CMP33_I_I_I]] to <4 x i32> -// CHECK-NEXT: [[ADD37_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT34_I_I_I]], [[CONV_I2_I10_I_I]] -// CHECK-NEXT: [[MUL38_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -1560706194) -// CHECK-NEXT: [[ADD39_I_I_I:%.*]] = add <4 x i32> [[ADD37_I_I_I]], [[MUL38_I_I_I]] -// CHECK-NEXT: [[MUL_I5_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 2734261102) -// CHECK-NEXT: [[SHR_I6_I_I:%.*]] = lshr <4 x i64> [[MUL_I5_I_I]], splat (i64 32) -// CHECK-NEXT: [[CONV_I2_I_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I6_I_I]] to <4 x i32> -// CHECK-NEXT: [[CMP41_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD39_I_I_I]], [[ADD37_I_I_I]] -// CHECK-NEXT: [[SEXT42_I_I_I:%.*]] = zext <4 x i1> [[CMP41_I_I_I]] to <4 x i32> -// CHECK-NEXT: [[ADD45_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT42_I_I_I]], [[CONV_I2_I_I_I]] -// CHECK-NEXT: [[SUB47_I_I_I:%.*]] = add nsw <4 x i32> [[SHR_I_I_I]], splat (i32 -120) -// CHECK-NEXT: [[CMP48_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB47_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[COND51_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD39_I_I_I]], <4 x i32> [[ADD45_I_I_I]] -// CHECK-NEXT: [[COND53_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD31_I_I_I]], <4 x i32> [[ADD39_I_I_I]] -// CHECK-NEXT: [[COND55_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD23_I_I_I]], <4 x i32> [[ADD31_I_I_I]] -// CHECK-NEXT: [[COND57_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD15_I_I_I]], <4 x i32> [[ADD23_I_I_I]] -// CHECK-NEXT: [[COND59_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD7_I_I_I]], <4 x i32> [[ADD15_I_I_I]] -// CHECK-NEXT: [[COND61_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD_I_I_I]], <4 x i32> [[ADD7_I_I_I]] -// CHECK-NEXT: [[COND63_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[MUL_I_I_I]], <4 x i32> [[ADD_I_I_I]] -// CHECK-NEXT: [[DOTNEG_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer -// CHECK-NEXT: [[SUB66_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG_I_I_I]], [[SUB47_I_I_I]] -// CHECK-NEXT: [[CMP67_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB66_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[COND70_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND53_I_I_I]], <4 x i32> [[COND51_I_I_I]] -// CHECK-NEXT: [[COND72_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND55_I_I_I]], <4 x i32> [[COND53_I_I_I]] -// CHECK-NEXT: [[COND74_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND57_I_I_I]], <4 x i32> [[COND55_I_I_I]] -// CHECK-NEXT: [[COND76_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND59_I_I_I]], <4 x i32> [[COND57_I_I_I]] -// CHECK-NEXT: [[COND78_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND61_I_I_I]], <4 x i32> [[COND59_I_I_I]] -// CHECK-NEXT: [[COND80_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND63_I_I_I]], <4 x i32> [[COND61_I_I_I]] -// CHECK-NEXT: [[DOTNEG379_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer -// CHECK-NEXT: [[SUB83_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG379_I_I_I]], [[SUB66_I_I_I]] -// CHECK-NEXT: [[CMP84_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB83_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[COND87_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND72_I_I_I]], <4 x i32> [[COND70_I_I_I]] -// CHECK-NEXT: [[COND89_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND74_I_I_I]], <4 x i32> [[COND72_I_I_I]] -// CHECK-NEXT: [[COND91_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND76_I_I_I]], <4 x i32> [[COND74_I_I_I]] -// CHECK-NEXT: [[COND93_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND78_I_I_I]], <4 x i32> [[COND76_I_I_I]] -// CHECK-NEXT: [[COND95_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND80_I_I_I]], <4 x i32> [[COND78_I_I_I]] -// CHECK-NEXT: [[DOTNEG380_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer -// CHECK-NEXT: [[SUB98_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG380_I_I_I]], [[SUB83_I_I_I]] -// CHECK-NEXT: [[CMP99_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB98_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[COND102_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND89_I_I_I]], <4 x i32> [[COND87_I_I_I]] -// CHECK-NEXT: [[COND104_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND91_I_I_I]], <4 x i32> [[COND89_I_I_I]] -// CHECK-NEXT: [[COND106_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND93_I_I_I]], <4 x i32> [[COND91_I_I_I]] -// CHECK-NEXT: [[COND108_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND95_I_I_I]], <4 x i32> [[COND93_I_I_I]] -// CHECK-NEXT: [[DOTNEG381_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer -// CHECK-NEXT: [[SUB111_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SUB98_I_I_I]] -// CHECK-NEXT: [[CMP112_NOT_I_I_I:%.*]] = icmp eq <4 x i32> [[DOTNEG381_I_I_I]], [[SUB111_I_I_I]] -// CHECK-NEXT: [[SUB114_I_I_I:%.*]] = sub nsw <4 x i32> splat (i32 24), [[SHR_I_I_I]] -// CHECK-NEXT: [[SHL_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB47_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[SHL_I_I_I:%.*]] = shl <4 x i32> [[COND102_I_I_I]], [[SHL_MASK_I_I_I]] -// CHECK-NEXT: [[SHR_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB114_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[SHR116_I_I_I:%.*]] = lshr <4 x i32> [[COND104_I_I_I]], [[SHR_MASK_I_I_I]] -// CHECK-NEXT: [[OR117_I_I_I:%.*]] = or <4 x i32> [[SHL_I_I_I]], [[SHR116_I_I_I]] -// CHECK-NEXT: [[SHL120_I_I_I:%.*]] = shl <4 x i32> [[COND104_I_I_I]], [[SHL_MASK_I_I_I]] -// CHECK-NEXT: [[SHR122_I_I_I:%.*]] = lshr <4 x i32> [[COND106_I_I_I]], [[SHR_MASK_I_I_I]] -// CHECK-NEXT: [[OR123_I_I_I:%.*]] = or <4 x i32> [[SHL120_I_I_I]], [[SHR122_I_I_I]] -// CHECK-NEXT: [[SHL126_I_I_I:%.*]] = shl <4 x i32> [[COND106_I_I_I]], [[SHL_MASK_I_I_I]] -// CHECK-NEXT: [[SHR128_I_I_I:%.*]] = lshr <4 x i32> [[COND108_I_I_I]], [[SHR_MASK_I_I_I]] -// CHECK-NEXT: [[OR129_I_I_I:%.*]] = or <4 x i32> [[SHL126_I_I_I]], [[SHR128_I_I_I]] -// CHECK-NEXT: [[COND131_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND102_I_I_I]], <4 x i32> [[OR117_I_I_I]] -// CHECK-NEXT: [[COND133_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND104_I_I_I]], <4 x i32> [[OR123_I_I_I]] -// CHECK-NEXT: [[COND135_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND106_I_I_I]], <4 x i32> [[OR129_I_I_I]] -// CHECK-NEXT: [[SHR136_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 29) -// CHECK-NEXT: [[OR139_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND131_I_I_I]], <4 x i32> [[COND133_I_I_I]], <4 x i32> splat (i32 2)) -// CHECK-NEXT: [[OR142_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND133_I_I_I]], <4 x i32> [[COND135_I_I_I]], <4 x i32> splat (i32 2)) -// CHECK-NEXT: [[OR145_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND135_I_I_I]], <4 x i32> [[COND108_I_I_I]], <4 x i32> splat (i32 2)) -// CHECK-NEXT: [[AND146_I_I_I:%.*]] = and <4 x i32> [[SHR136_I_I_I]], splat (i32 1) -// CHECK-NEXT: [[SEXT148_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, [[AND146_I_I_I]] -// CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[SEXT148_I_I_I]], splat (i32 -2147483648) -// CHECK-NEXT: [[XOR_I_I_I:%.*]] = xor <4 x i32> [[OR139_I_I_I]], [[SEXT148_I_I_I]] -// CHECK-NEXT: [[XOR156_I_I_I:%.*]] = xor <4 x i32> [[OR142_I_I_I]], [[SEXT148_I_I_I]] -// CHECK-NEXT: [[XOR157_I_I_I:%.*]] = xor <4 x i32> [[OR145_I_I_I]], [[SEXT148_I_I_I]] -// CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 0 -// CHECK-NEXT: [[TMP7:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP6]], i1 false) -// CHECK-NEXT: [[VECINIT_I1_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 -// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 1 -// CHECK-NEXT: [[TMP9:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP8]], i1 false) -// CHECK-NEXT: [[VECINIT2_I2_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I1_I_I]], i32 [[TMP9]], i64 1 -// CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 2 -// CHECK-NEXT: [[TMP11:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP10]], i1 false) -// CHECK-NEXT: [[VECINIT4_I3_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I2_I_I]], i32 [[TMP11]], i64 2 -// CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 3 -// CHECK-NEXT: [[TMP13:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP12]], i1 false) -// CHECK-NEXT: [[VECINIT6_I4_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I3_I_I]], i32 [[TMP13]], i64 3 -// CHECK-NEXT: [[ADD159_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 1) -// CHECK-NEXT: [[SHL_MASK162_I_I_I:%.*]] = and <4 x i32> [[ADD159_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[SHL163_I_I_I:%.*]] = shl <4 x i32> [[XOR_I_I_I]], [[SHL_MASK162_I_I_I]] -// CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 31) -// CHECK-NEXT: [[SHR_MASK164_I_I_I:%.*]] = xor <4 x i32> [[TMP14]], splat (i32 31) -// CHECK-NEXT: [[SHR165_I_I_I:%.*]] = lshr <4 x i32> [[XOR156_I_I_I]], [[SHR_MASK164_I_I_I]] -// CHECK-NEXT: [[OR166_I_I_I:%.*]] = or <4 x i32> [[SHL163_I_I_I]], [[SHR165_I_I_I]] -// CHECK-NEXT: [[SHL169_I_I_I:%.*]] = shl <4 x i32> [[XOR156_I_I_I]], [[SHL_MASK162_I_I_I]] -// CHECK-NEXT: [[SHR171_I_I_I:%.*]] = lshr <4 x i32> [[XOR157_I_I_I]], [[SHR_MASK164_I_I_I]] -// CHECK-NEXT: [[OR172_I_I_I:%.*]] = or <4 x i32> [[SHL169_I_I_I]], [[SHR171_I_I_I]] -// CHECK-NEXT: [[SHR176_I_I_I:%.*]] = lshr <4 x i32> [[OR166_I_I_I]], splat (i32 9) -// CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 23) -// CHECK-NEXT: [[REASS_SUB:%.*]] = sub nsw <4 x i32> [[SHR176_I_I_I]], [[TMP15]] -// CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[REASS_SUB]], splat (i32 1056964608) +// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) +// local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] +// !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] +// !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual +// [[META10:![0-9]+]] { CHECK-NEXT: [[ENTRY:.*:]] CHECK-NEXT: [[TMP0:%.*]] = +// load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[ELT_ABS_I_I_I:%.*]] = tail call <4 x float> +// @llvm.fabs.v4f32(<4 x float> [[TMP0]]) CHECK-NEXT: [[CMP_I_I:%.*]] = fcmp +// olt <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x4160000000000000) +// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <4 x float> +// @llvm.fmuladd.v4f32(<4 x float> [[ELT_ABS_I_I_I]], <4 x float> splat (float +// 0x3FE45F3060000000), <4 x float> splat (float 5.000000e-01)) CHECK-NEXT: +// [[ELT_TRUNC_I_I:%.*]] = tail call noundef <4 x float> @llvm.trunc.v4f32(<4 x +// float> [[TMP1]]) CHECK-NEXT: [[MUL_I30_I_I_I_I:%.*]] = fmul <4 x float> +// [[ELT_TRUNC_I_I]], splat (float 0x3FF921FB40000000) CHECK-NEXT: +// [[FNEG_I31_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I30_I_I_I_I]] CHECK-NEXT: +// [[TMP2:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> +// [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> +// [[FNEG_I31_I_I_I_I]]) CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = fsub <4 x float> +// [[ELT_ABS_I_I_I]], [[MUL_I30_I_I_I_I]] CHECK-NEXT: [[SUB2_I_I_I_I:%.*]] = +// fsub <4 x float> [[ELT_ABS_I_I_I]], [[SUB_I_I_I_I]] CHECK-NEXT: +// [[SUB3_I_I_I_I:%.*]] = fsub <4 x float> [[SUB2_I_I_I_I]], [[MUL_I30_I_I_I_I]] +// CHECK-NEXT: [[SUB4_I_I_I_I:%.*]] = fsub <4 x float> [[SUB3_I_I_I_I]], +// [[TMP2]] CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd <4 x float> +// [[SUB_I_I_I_I]], [[SUB4_I_I_I_I]] CHECK-NEXT: [[MUL_I27_I_I_I_I:%.*]] = +// fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3E74442D00000000) +// CHECK-NEXT: [[FNEG_I28_I_I_I_I:%.*]] = fneg <4 x float> +// [[MUL_I27_I_I_I_I]] CHECK-NEXT: [[TMP3:%.*]] = tail call noundef <4 x +// float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat +// (float 0x3E74442D00000000), <4 x float> [[FNEG_I28_I_I_I_I]]) CHECK-NEXT: +// [[SUB5_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[MUL_I27_I_I_I_I]] +// CHECK-NEXT: [[SUB6_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], +// [[SUB5_I_I_I_I]] CHECK-NEXT: [[SUB7_I_I_I_I:%.*]] = fsub <4 x float> +// [[SUB6_I_I_I_I]], [[MUL_I27_I_I_I_I]] CHECK-NEXT: [[SUB8_I_I_I_I:%.*]] = +// fsub <4 x float> [[SUB7_I_I_I_I]], [[TMP3]] CHECK-NEXT: [[ADD9_I_I_I_I:%.*]] +// = fadd <4 x float> [[SUB5_I_I_I_I]], [[SUB8_I_I_I_I]] CHECK-NEXT: +// [[MUL_I_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float +// 0x3CF8469880000000) CHECK-NEXT: [[FNEG_I_I_I_I_I:%.*]] = fneg <4 x float> +// [[MUL_I_I_I_I_I]] CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <4 x float> +// @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float +// 0x3CF8469880000000), <4 x float> [[FNEG_I_I_I_I_I]]) CHECK-NEXT: +// [[SUB10_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[MUL_I_I_I_I_I]] +// CHECK-NEXT: [[SUB11_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], +// [[SUB10_I_I_I_I]] CHECK-NEXT: [[SUB12_I_I_I_I:%.*]] = fsub <4 x float> +// [[SUB11_I_I_I_I]], [[MUL_I_I_I_I_I]] CHECK-NEXT: [[ADD13_I_I_I_I:%.*]] = +// fadd <4 x float> [[SUB10_I_I_I_I]], [[SUB12_I_I_I_I]] CHECK-NEXT: +// [[FNEG_I_I_I_I:%.*]] = fneg <4 x float> [[TMP4]] CHECK-NEXT: +// [[CONV_I_I_I:%.*]] = fptosi <4 x float> [[ELT_TRUNC_I_I]] to <4 x i32> +// CHECK-NEXT: [[ASTYPE_I_I_I:%.*]] = bitcast <4 x float> [[ELT_ABS_I_I_I]] +// to <4 x i32> CHECK-NEXT: [[SHR_I_I_I:%.*]] = lshr <4 x i32> +// [[ASTYPE_I_I_I]], splat (i32 23) CHECK-NEXT: [[AND_I11_I_I:%.*]] = and <4 +// x i32> [[ASTYPE_I_I_I]], splat (i32 8388607) CHECK-NEXT: [[OR_I_I_I:%.*]] +// = or disjoint <4 x i32> [[AND_I11_I_I]], splat (i32 8388608) CHECK-NEXT: +// [[MUL_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -28220501) +// CHECK-NEXT: [[CONV_I1_I27_I_I:%.*]] = zext nneg <4 x i32> [[OR_I_I_I]] to +// <4 x i64> CHECK-NEXT: [[MUL_I28_I_I:%.*]] = mul nuw nsw <4 x i64> +// [[CONV_I1_I27_I_I]], splat (i64 4266746795) CHECK-NEXT: [[SHR_I29_I_I:%.*]] = +// lshr <4 x i64> [[MUL_I28_I_I]], splat (i64 32) CHECK-NEXT: +// [[CONV_I2_I30_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I29_I_I]] to <4 x +// i32> CHECK-NEXT: [[MUL2_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat +// (i32 1011060801) CHECK-NEXT: [[ADD_I_I_I:%.*]] = add <4 x i32> +// [[MUL2_I_I_I]], [[CONV_I2_I30_I_I]] CHECK-NEXT: [[MUL_I24_I_I:%.*]] = mul +// nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1011060801) CHECK-NEXT: +// [[SHR_I25_I_I:%.*]] = lshr <4 x i64> [[MUL_I24_I_I]], splat (i64 32) +// CHECK-NEXT: [[CONV_I2_I26_I_I:%.*]] = trunc nuw nsw <4 x i64> +// [[SHR_I25_I_I]] to <4 x i32> CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult <4 x +// i32> [[ADD_I_I_I]], [[CONV_I2_I30_I_I]] CHECK-NEXT: [[SEXT_I_I1_I:%.*]] = +// zext <4 x i1> [[CMP_I_I_I]] to <4 x i32> CHECK-NEXT: [[ADD5_I_I_I:%.*]] = +// add nuw nsw <4 x i32> [[SEXT_I_I1_I]], [[CONV_I2_I26_I_I]] CHECK-NEXT: +// [[MUL6_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -614296167) +// CHECK-NEXT: [[ADD7_I_I_I:%.*]] = add <4 x i32> [[ADD5_I_I_I]], +// [[MUL6_I_I_I]] CHECK-NEXT: [[MUL_I20_I_I:%.*]] = mul nuw nsw <4 x i64> +// [[CONV_I1_I27_I_I]], splat (i64 3680671129) CHECK-NEXT: [[SHR_I21_I_I:%.*]] = +// lshr <4 x i64> [[MUL_I20_I_I]], splat (i64 32) CHECK-NEXT: +// [[CONV_I2_I22_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I21_I_I]] to <4 x +// i32> CHECK-NEXT: [[CMP9_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD7_I_I_I]], +// [[ADD5_I_I_I]] CHECK-NEXT: [[SEXT10_I_I_I:%.*]] = zext <4 x i1> +// [[CMP9_I_I_I]] to <4 x i32> CHECK-NEXT: [[ADD13_I_I_I:%.*]] = add nuw nsw +// <4 x i32> [[SEXT10_I_I_I]], [[CONV_I2_I22_I_I]] CHECK-NEXT: +// [[MUL14_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -181084736) +// CHECK-NEXT: [[ADD15_I_I_I:%.*]] = add <4 x i32> [[ADD13_I_I_I]], +// [[MUL14_I_I_I]] CHECK-NEXT: [[MUL_I16_I_I:%.*]] = mul nuw nsw <4 x i64> +// [[CONV_I1_I27_I_I]], splat (i64 4113882560) CHECK-NEXT: [[SHR_I17_I_I:%.*]] = +// lshr <4 x i64> [[MUL_I16_I_I]], splat (i64 32) CHECK-NEXT: +// [[CONV_I2_I18_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I17_I_I]] to <4 x +// i32> CHECK-NEXT: [[CMP17_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD15_I_I_I]], +// [[ADD13_I_I_I]] CHECK-NEXT: [[SEXT18_I_I_I:%.*]] = zext <4 x i1> +// [[CMP17_I_I_I]] to <4 x i32> CHECK-NEXT: [[ADD21_I_I_I:%.*]] = add nuw nsw +// <4 x i32> [[SEXT18_I_I_I]], [[CONV_I2_I18_I_I]] CHECK-NEXT: +// [[MUL22_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -64530479) +// CHECK-NEXT: [[ADD23_I_I_I:%.*]] = add <4 x i32> [[ADD21_I_I_I]], +// [[MUL22_I_I_I]] CHECK-NEXT: [[MUL_I12_I_I:%.*]] = mul nuw nsw <4 x i64> +// [[CONV_I1_I27_I_I]], splat (i64 4230436817) CHECK-NEXT: [[SHR_I13_I_I:%.*]] = +// lshr <4 x i64> [[MUL_I12_I_I]], splat (i64 32) CHECK-NEXT: +// [[CONV_I2_I14_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I13_I_I]] to <4 x +// i32> CHECK-NEXT: [[CMP25_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD23_I_I_I]], +// [[ADD21_I_I_I]] CHECK-NEXT: [[SEXT26_I_I_I:%.*]] = zext <4 x i1> +// [[CMP25_I_I_I]] to <4 x i32> CHECK-NEXT: [[ADD29_I_I_I:%.*]] = add nuw nsw +// <4 x i32> [[SEXT26_I_I_I]], [[CONV_I2_I14_I_I]] CHECK-NEXT: +// [[MUL30_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1313084713) +// CHECK-NEXT: [[ADD31_I_I_I:%.*]] = add <4 x i32> [[ADD29_I_I_I]], +// [[MUL30_I_I_I]] CHECK-NEXT: [[MUL_I8_I_I:%.*]] = mul nuw nsw <4 x i64> +// [[CONV_I1_I27_I_I]], splat (i64 1313084713) CHECK-NEXT: [[SHR_I9_I_I:%.*]] +// = lshr <4 x i64> [[MUL_I8_I_I]], splat (i64 32) CHECK-NEXT: +// [[CONV_I2_I10_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I9_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP33_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD31_I_I_I]], +// [[ADD29_I_I_I]] CHECK-NEXT: [[SEXT34_I_I_I:%.*]] = zext <4 x i1> +// [[CMP33_I_I_I]] to <4 x i32> CHECK-NEXT: [[ADD37_I_I_I:%.*]] = add nuw nsw +// <4 x i32> [[SEXT34_I_I_I]], [[CONV_I2_I10_I_I]] CHECK-NEXT: +// [[MUL38_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -1560706194) +// CHECK-NEXT: [[ADD39_I_I_I:%.*]] = add <4 x i32> [[ADD37_I_I_I]], +// [[MUL38_I_I_I]] CHECK-NEXT: [[MUL_I5_I_I:%.*]] = mul nuw nsw <4 x i64> +// [[CONV_I1_I27_I_I]], splat (i64 2734261102) CHECK-NEXT: [[SHR_I6_I_I:%.*]] +// = lshr <4 x i64> [[MUL_I5_I_I]], splat (i64 32) CHECK-NEXT: +// [[CONV_I2_I_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I6_I_I]] to <4 x i32> +// CHECK-NEXT: [[CMP41_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD39_I_I_I]], +// [[ADD37_I_I_I]] CHECK-NEXT: [[SEXT42_I_I_I:%.*]] = zext <4 x i1> +// [[CMP41_I_I_I]] to <4 x i32> CHECK-NEXT: [[ADD45_I_I_I:%.*]] = add nuw nsw +// <4 x i32> [[SEXT42_I_I_I]], [[CONV_I2_I_I_I]] CHECK-NEXT: [[SUB47_I_I_I:%.*]] +// = add nsw <4 x i32> [[SHR_I_I_I]], splat (i32 -120) CHECK-NEXT: +// [[CMP48_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB47_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[COND51_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x +// i32> [[ADD39_I_I_I]], <4 x i32> [[ADD45_I_I_I]] CHECK-NEXT: +// [[COND53_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> +// [[ADD31_I_I_I]], <4 x i32> [[ADD39_I_I_I]] CHECK-NEXT: [[COND55_I_I_I:%.*]] = +// select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD23_I_I_I]], <4 x i32> +// [[ADD31_I_I_I]] CHECK-NEXT: [[COND57_I_I_I:%.*]] = select <4 x i1> +// [[CMP48_I_I_I]], <4 x i32> [[ADD15_I_I_I]], <4 x i32> [[ADD23_I_I_I]] +// CHECK-NEXT: [[COND59_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x +// i32> [[ADD7_I_I_I]], <4 x i32> [[ADD15_I_I_I]] CHECK-NEXT: +// [[COND61_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> +// [[ADD_I_I_I]], <4 x i32> [[ADD7_I_I_I]] CHECK-NEXT: [[COND63_I_I_I:%.*]] = +// select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[MUL_I_I_I]], <4 x i32> +// [[ADD_I_I_I]] CHECK-NEXT: [[DOTNEG_I_I_I:%.*]] = select <4 x i1> +// [[CMP48_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer +// CHECK-NEXT: [[SUB66_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG_I_I_I]], +// [[SUB47_I_I_I]] CHECK-NEXT: [[CMP67_I_I_I:%.*]] = icmp ugt <4 x i32> +// [[SUB66_I_I_I]], splat (i32 31) CHECK-NEXT: [[COND70_I_I_I:%.*]] = select +// <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND53_I_I_I]], <4 x i32> +// [[COND51_I_I_I]] CHECK-NEXT: [[COND72_I_I_I:%.*]] = select <4 x i1> +// [[CMP67_I_I_I]], <4 x i32> [[COND55_I_I_I]], <4 x i32> [[COND53_I_I_I]] +// CHECK-NEXT: [[COND74_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x +// i32> [[COND57_I_I_I]], <4 x i32> [[COND55_I_I_I]] CHECK-NEXT: +// [[COND76_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> +// [[COND59_I_I_I]], <4 x i32> [[COND57_I_I_I]] CHECK-NEXT: [[COND78_I_I_I:%.*]] +// = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND61_I_I_I]], <4 x i32> +// [[COND59_I_I_I]] CHECK-NEXT: [[COND80_I_I_I:%.*]] = select <4 x i1> +// [[CMP67_I_I_I]], <4 x i32> [[COND63_I_I_I]], <4 x i32> [[COND61_I_I_I]] +// CHECK-NEXT: [[DOTNEG379_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 +// x i32> splat (i32 -32), <4 x i32> zeroinitializer CHECK-NEXT: +// [[SUB83_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG379_I_I_I]], [[SUB66_I_I_I]] +// CHECK-NEXT: [[CMP84_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB83_I_I_I]], +// splat (i32 31) CHECK-NEXT: [[COND87_I_I_I:%.*]] = select <4 x i1> +// [[CMP84_I_I_I]], <4 x i32> [[COND72_I_I_I]], <4 x i32> [[COND70_I_I_I]] +// CHECK-NEXT: [[COND89_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x +// i32> [[COND74_I_I_I]], <4 x i32> [[COND72_I_I_I]] CHECK-NEXT: +// [[COND91_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> +// [[COND76_I_I_I]], <4 x i32> [[COND74_I_I_I]] CHECK-NEXT: [[COND93_I_I_I:%.*]] +// = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND78_I_I_I]], <4 x i32> +// [[COND76_I_I_I]] CHECK-NEXT: [[COND95_I_I_I:%.*]] = select <4 x i1> +// [[CMP84_I_I_I]], <4 x i32> [[COND80_I_I_I]], <4 x i32> [[COND78_I_I_I]] +// CHECK-NEXT: [[DOTNEG380_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 +// x i32> splat (i32 -32), <4 x i32> zeroinitializer CHECK-NEXT: +// [[SUB98_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG380_I_I_I]], [[SUB83_I_I_I]] +// CHECK-NEXT: [[CMP99_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB98_I_I_I]], +// splat (i32 31) CHECK-NEXT: [[COND102_I_I_I:%.*]] = select <4 x i1> +// [[CMP99_I_I_I]], <4 x i32> [[COND89_I_I_I]], <4 x i32> [[COND87_I_I_I]] +// CHECK-NEXT: [[COND104_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x +// i32> [[COND91_I_I_I]], <4 x i32> [[COND89_I_I_I]] CHECK-NEXT: +// [[COND106_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> +// [[COND93_I_I_I]], <4 x i32> [[COND91_I_I_I]] CHECK-NEXT: +// [[COND108_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> +// [[COND95_I_I_I]], <4 x i32> [[COND93_I_I_I]] CHECK-NEXT: +// [[DOTNEG381_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> splat +// (i32 -32), <4 x i32> zeroinitializer CHECK-NEXT: [[SUB111_I_I_I:%.*]] = +// sub nsw <4 x i32> zeroinitializer, [[SUB98_I_I_I]] CHECK-NEXT: +// [[CMP112_NOT_I_I_I:%.*]] = icmp eq <4 x i32> [[DOTNEG381_I_I_I]], +// [[SUB111_I_I_I]] CHECK-NEXT: [[SUB114_I_I_I:%.*]] = sub nsw <4 x i32> +// splat (i32 24), [[SHR_I_I_I]] CHECK-NEXT: [[SHL_MASK_I_I_I:%.*]] = and <4 +// x i32> [[SUB47_I_I_I]], splat (i32 31) CHECK-NEXT: [[SHL_I_I_I:%.*]] = shl +// <4 x i32> [[COND102_I_I_I]], [[SHL_MASK_I_I_I]] CHECK-NEXT: +// [[SHR_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB114_I_I_I]], splat (i32 31) +// CHECK-NEXT: [[SHR116_I_I_I:%.*]] = lshr <4 x i32> [[COND104_I_I_I]], +// [[SHR_MASK_I_I_I]] CHECK-NEXT: [[OR117_I_I_I:%.*]] = or <4 x i32> +// [[SHL_I_I_I]], [[SHR116_I_I_I]] CHECK-NEXT: [[SHL120_I_I_I:%.*]] = shl <4 +// x i32> [[COND104_I_I_I]], [[SHL_MASK_I_I_I]] CHECK-NEXT: [[SHR122_I_I_I:%.*]] +// = lshr <4 x i32> [[COND106_I_I_I]], [[SHR_MASK_I_I_I]] CHECK-NEXT: +// [[OR123_I_I_I:%.*]] = or <4 x i32> [[SHL120_I_I_I]], [[SHR122_I_I_I]] +// CHECK-NEXT: [[SHL126_I_I_I:%.*]] = shl <4 x i32> [[COND106_I_I_I]], +// [[SHL_MASK_I_I_I]] CHECK-NEXT: [[SHR128_I_I_I:%.*]] = lshr <4 x i32> +// [[COND108_I_I_I]], [[SHR_MASK_I_I_I]] CHECK-NEXT: [[OR129_I_I_I:%.*]] = or +// <4 x i32> [[SHL126_I_I_I]], [[SHR128_I_I_I]] CHECK-NEXT: +// [[COND131_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> +// [[COND102_I_I_I]], <4 x i32> [[OR117_I_I_I]] CHECK-NEXT: +// [[COND133_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> +// [[COND104_I_I_I]], <4 x i32> [[OR123_I_I_I]] CHECK-NEXT: +// [[COND135_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> +// [[COND106_I_I_I]], <4 x i32> [[OR129_I_I_I]] CHECK-NEXT: [[SHR136_I_I_I:%.*]] +// = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 29) CHECK-NEXT: +// [[OR139_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> +// [[COND131_I_I_I]], <4 x i32> [[COND133_I_I_I]], <4 x i32> splat (i32 2)) +// CHECK-NEXT: [[OR142_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 +// x i32> [[COND133_I_I_I]], <4 x i32> [[COND135_I_I_I]], <4 x i32> splat (i32 +// 2)) CHECK-NEXT: [[OR145_I_I_I:%.*]] = tail call <4 x i32> +// @llvm.fshl.v4i32(<4 x i32> [[COND135_I_I_I]], <4 x i32> [[COND108_I_I_I]], <4 +// x i32> splat (i32 2)) CHECK-NEXT: [[AND146_I_I_I:%.*]] = and <4 x i32> +// [[SHR136_I_I_I]], splat (i32 1) CHECK-NEXT: [[SEXT148_I_I_I:%.*]] = sub +// nsw <4 x i32> zeroinitializer, [[AND146_I_I_I]] CHECK-NEXT: [[TMP5:%.*]] = +// and <4 x i32> [[SEXT148_I_I_I]], splat (i32 -2147483648) CHECK-NEXT: +// [[XOR_I_I_I:%.*]] = xor <4 x i32> [[OR139_I_I_I]], [[SEXT148_I_I_I]] +// CHECK-NEXT: [[XOR156_I_I_I:%.*]] = xor <4 x i32> [[OR142_I_I_I]], +// [[SEXT148_I_I_I]] CHECK-NEXT: [[XOR157_I_I_I:%.*]] = xor <4 x i32> +// [[OR145_I_I_I]], [[SEXT148_I_I_I]] CHECK-NEXT: [[TMP6:%.*]] = +// extractelement <4 x i32> [[XOR_I_I_I]], i64 0 CHECK-NEXT: [[TMP7:%.*]] = +// tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP6]], i1 false) +// CHECK-NEXT: [[VECINIT_I1_I_I:%.*]] = insertelement <4 x i32> poison, i32 +// [[TMP7]], i64 0 CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> +// [[XOR_I_I_I]], i64 1 CHECK-NEXT: [[TMP9:%.*]] = tail call range(i32 0, 33) +// i32 @llvm.ctlz.i32(i32 [[TMP8]], i1 false) CHECK-NEXT: +// [[VECINIT2_I2_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I1_I_I]], i32 +// [[TMP9]], i64 1 CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> +// [[XOR_I_I_I]], i64 2 CHECK-NEXT: [[TMP11:%.*]] = tail call range(i32 0, +// 33) i32 @llvm.ctlz.i32(i32 [[TMP10]], i1 false) CHECK-NEXT: +// [[VECINIT4_I3_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I2_I_I]], i32 +// [[TMP11]], i64 2 CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> +// [[XOR_I_I_I]], i64 3 CHECK-NEXT: [[TMP13:%.*]] = tail call range(i32 0, +// 33) i32 @llvm.ctlz.i32(i32 [[TMP12]], i1 false) CHECK-NEXT: +// [[VECINIT6_I4_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I3_I_I]], i32 +// [[TMP13]], i64 3 CHECK-NEXT: [[ADD159_I_I_I:%.*]] = add nuw nsw <4 x i32> +// [[VECINIT6_I4_I_I]], splat (i32 1) CHECK-NEXT: [[SHL_MASK162_I_I_I:%.*]] = +// and <4 x i32> [[ADD159_I_I_I]], splat (i32 31) CHECK-NEXT: +// [[SHL163_I_I_I:%.*]] = shl <4 x i32> [[XOR_I_I_I]], [[SHL_MASK162_I_I_I]] +// CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 +// 31) CHECK-NEXT: [[SHR_MASK164_I_I_I:%.*]] = xor <4 x i32> [[TMP14]], splat +// (i32 31) CHECK-NEXT: [[SHR165_I_I_I:%.*]] = lshr <4 x i32> +// [[XOR156_I_I_I]], [[SHR_MASK164_I_I_I]] CHECK-NEXT: [[OR166_I_I_I:%.*]] = +// or <4 x i32> [[SHL163_I_I_I]], [[SHR165_I_I_I]] CHECK-NEXT: +// [[SHL169_I_I_I:%.*]] = shl <4 x i32> [[XOR156_I_I_I]], [[SHL_MASK162_I_I_I]] +// CHECK-NEXT: [[SHR171_I_I_I:%.*]] = lshr <4 x i32> [[XOR157_I_I_I]], +// [[SHR_MASK164_I_I_I]] CHECK-NEXT: [[OR172_I_I_I:%.*]] = or <4 x i32> +// [[SHL169_I_I_I]], [[SHR171_I_I_I]] CHECK-NEXT: [[SHR176_I_I_I:%.*]] = lshr +// <4 x i32> [[OR166_I_I_I]], splat (i32 9) CHECK-NEXT: [[TMP15:%.*]] = shl +// nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 23) CHECK-NEXT: +// [[REASS_SUB:%.*]] = sub nsw <4 x i32> [[SHR176_I_I_I]], [[TMP15]] CHECK-NEXT: +// [[TMP16:%.*]] = add <4 x i32> [[REASS_SUB]], splat (i32 1056964608) // CHECK-NEXT: [[OR177_I_I_I:%.*]] = or <4 x i32> [[TMP16]], [[TMP5]] -// CHECK-NEXT: [[ASTYPE178_I_I_I:%.*]] = bitcast <4 x i32> [[OR177_I_I_I]] to <4 x float> -// CHECK-NEXT: [[OR181_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[OR166_I_I_I]], <4 x i32> [[OR172_I_I_I]], <4 x i32> splat (i32 23)) -// CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 0 -// CHECK-NEXT: [[TMP18:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP17]], i1 false) -// CHECK-NEXT: [[VECINIT_I_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 -// CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 1 -// CHECK-NEXT: [[TMP20:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP19]], i1 false) -// CHECK-NEXT: [[VECINIT2_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I_I_I]], i32 [[TMP20]], i64 1 -// CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 2 -// CHECK-NEXT: [[TMP22:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP21]], i1 false) -// CHECK-NEXT: [[VECINIT4_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I_I_I]], i32 [[TMP22]], i64 2 -// CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 3 -// CHECK-NEXT: [[TMP24:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP23]], i1 false) -// CHECK-NEXT: [[VECINIT6_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I_I_I]], i32 [[TMP24]], i64 3 -// CHECK-NEXT: [[ADD183_I_I_NEG_I:%.*]] = xor <4 x i32> [[VECINIT6_I_I_I]], splat (i32 -1) -// CHECK-NEXT: [[ADD183_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I_I_I]], splat (i32 1) -// CHECK-NEXT: [[SHL_MASK186_I_I_I:%.*]] = and <4 x i32> [[ADD183_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[SHL187_I_I_I:%.*]] = shl <4 x i32> [[OR181_I_I_I]], [[SHL_MASK186_I_I_I]] -// CHECK-NEXT: [[TMP25:%.*]] = and <4 x i32> [[VECINIT6_I_I_I]], splat (i32 31) -// CHECK-NEXT: [[SHR_MASK189_I_I_I:%.*]] = xor <4 x i32> [[TMP25]], splat (i32 31) -// CHECK-NEXT: [[SHR190_I_I_I:%.*]] = lshr <4 x i32> [[OR172_I_I_I]], [[SHR_MASK189_I_I_I]] -// CHECK-NEXT: [[OR191_I_I_I:%.*]] = or <4 x i32> [[SHL187_I_I_I]], [[SHR190_I_I_I]] -// CHECK-NEXT: [[REASS_SUB10_I:%.*]] = sub nuw nsw <4 x i32> [[ADD183_I_I_NEG_I]], [[VECINIT6_I4_I_I]] -// CHECK-NEXT: [[ADD193_I_I_NEG_I:%.*]] = shl <4 x i32> [[REASS_SUB10_I]], splat (i32 23) -// CHECK-NEXT: [[SHR197_I_I_I:%.*]] = lshr <4 x i32> [[OR191_I_I_I]], splat (i32 9) -// CHECK-NEXT: [[REASS_SUB16_I_I:%.*]] = add <4 x i32> [[ADD193_I_I_NEG_I]], splat (i32 864026624) -// CHECK-NEXT: [[TMP26:%.*]] = or disjoint <4 x i32> [[SHR197_I_I_I]], [[REASS_SUB16_I_I]] -// CHECK-NEXT: [[OR198_I_I_I:%.*]] = or <4 x i32> [[TMP26]], [[TMP5]] -// CHECK-NEXT: [[ASTYPE199_I_I_I:%.*]] = bitcast <4 x i32> [[OR198_I_I_I]] to <4 x float> -// CHECK-NEXT: [[MUL200_I_I_I:%.*]] = fmul <4 x float> [[ASTYPE178_I_I_I]], splat (float 0x3FF921FB40000000) -// CHECK-NEXT: [[FNEG_I_I_I:%.*]] = fneg <4 x float> [[MUL200_I_I_I]] -// CHECK-NEXT: [[TMP27:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[FNEG_I_I_I]]) -// CHECK-NEXT: [[TMP28:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x float> [[TMP27]]) -// CHECK-NEXT: [[TMP29:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE199_I_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[TMP28]]) -// CHECK-NEXT: [[ADD204_I_I_I:%.*]] = fadd <4 x float> [[MUL200_I_I_I]], [[TMP29]] -// CHECK-NEXT: [[SUB205_I_I_I:%.*]] = fsub <4 x float> [[ADD204_I_I_I]], [[MUL200_I_I_I]] -// CHECK-NEXT: [[SUB206_I_I_I:%.*]] = fsub <4 x float> [[TMP29]], [[SUB205_I_I_I]] -// CHECK-NEXT: [[SHR207_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 30) -// CHECK-NEXT: [[ADD209_I_I_I:%.*]] = add nuw nsw <4 x i32> [[AND146_I_I_I]], [[SHR207_I_I_I]] -// CHECK-NEXT: [[COND_V_I2_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> [[ADD13_I_I_I_I]], <4 x float> [[ADD204_I_I_I]] -// CHECK-NEXT: [[COND4_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> [[FNEG_I_I_I_I]], <4 x float> [[SUB206_I_I_I]] -// CHECK-NEXT: [[COND6_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[CONV_I_I_I]], <4 x i32> [[ADD209_I_I_I]] -// CHECK-NEXT: [[COND6_I_I:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 2) -// CHECK-NEXT: [[MUL_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], [[COND_V_I2_I]] -// CHECK-NEXT: [[MUL1_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], [[MUL_I_I]] -// CHECK-NEXT: [[TMP30:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 0x3DE5D93A60000000), <4 x float> splat (float 0xBE5AE5E680000000)) -// CHECK-NEXT: [[TMP31:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP30]], <4 x float> splat (float 0x3EC6DBE4A0000000)) -// CHECK-NEXT: [[TMP32:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP31]], <4 x float> splat (float 0xBF2A013A80000000)) -// CHECK-NEXT: [[TMP33:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP32]], <4 x float> splat (float 0x3F811110E0000000)) -// CHECK-NEXT: [[FNEG_I3_I:%.*]] = fneg <4 x float> [[MUL1_I_I]] -// CHECK-NEXT: [[MUL5_I_I:%.*]] = fmul <4 x float> [[TMP33]], [[FNEG_I3_I]] -// CHECK-NEXT: [[TMP34:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[COND4_V_I_I]], <4 x float> splat (float 5.000000e-01), <4 x float> [[MUL5_I_I]]) -// CHECK-NEXT: [[FNEG7_I_I:%.*]] = fneg <4 x float> [[COND4_V_I_I]] -// CHECK-NEXT: [[TMP35:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP34]], <4 x float> [[FNEG7_I_I]]) -// CHECK-NEXT: [[TMP36:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL1_I_I]], <4 x float> splat (float 0x3FC5555560000000), <4 x float> [[TMP35]]) -// CHECK-NEXT: [[SUB_I_I:%.*]] = fsub <4 x float> [[COND_V_I2_I]], [[TMP36]] -// CHECK-NEXT: [[FNEG_I_I:%.*]] = fneg <4 x float> [[SUB_I_I]] -// CHECK-NEXT: [[TMP37:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 0xBDA8FAE9C0000000), <4 x float> splat (float 0x3E21EE9EC0000000)) -// CHECK-NEXT: [[TMP38:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP37]], <4 x float> splat (float 0xBE92524740000000)) -// CHECK-NEXT: [[TMP39:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP38]], <4 x float> splat (float 0x3EFA015C40000000)) -// CHECK-NEXT: [[TMP40:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP39]], <4 x float> splat (float 0xBF56C16C00000000)) -// CHECK-NEXT: [[TMP41:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP40]], <4 x float> splat (float 0x3FA5555560000000)) -// CHECK-NEXT: [[MUL5_I5_I:%.*]] = fmul <4 x float> [[MUL_I_I]], [[TMP41]] -// CHECK-NEXT: [[TMP42:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[COND_V_I2_I]]) -// CHECK-NEXT: [[AND_I_I:%.*]] = bitcast <4 x float> [[TMP42]] to <4 x i32> -// CHECK-NEXT: [[SUB_I6_I:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -16777216) -// CHECK-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -1050253722) -// CHECK-NEXT: [[AND938_I_I:%.*]] = icmp ult <4 x i32> [[TMP43]], splat (i32 11429479) -// CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[AND938_I_I]], <4 x i32> [[SUB_I6_I]], <4 x i32> zeroinitializer -// CHECK-NEXT: [[CMP11_I_I:%.*]] = icmp samesign ugt <4 x i32> [[AND_I_I]], splat (i32 1061683200) -// CHECK-NEXT: [[COND14_I_I:%.*]] = select <4 x i1> [[CMP11_I_I]], <4 x i32> splat (i32 1049624576), <4 x i32> [[TMP44]] -// CHECK-NEXT: [[TMP45:%.*]] = bitcast <4 x i32> [[COND14_I_I]] to <4 x float> -// CHECK-NEXT: [[FNEG_I7_I:%.*]] = fneg <4 x float> [[TMP45]] -// CHECK-NEXT: [[TMP46:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 5.000000e-01), <4 x float> [[FNEG_I7_I]]) -// CHECK-NEXT: [[SUB16_I_I:%.*]] = fsub <4 x float> splat (float 1.000000e+00), [[TMP45]] +// CHECK-NEXT: [[ASTYPE178_I_I_I:%.*]] = bitcast <4 x i32> [[OR177_I_I_I]] to +// <4 x float> CHECK-NEXT: [[OR181_I_I_I:%.*]] = tail call <4 x i32> +// @llvm.fshl.v4i32(<4 x i32> [[OR166_I_I_I]], <4 x i32> [[OR172_I_I_I]], <4 x +// i32> splat (i32 23)) CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> +// [[OR181_I_I_I]], i64 0 CHECK-NEXT: [[TMP18:%.*]] = tail call range(i32 0, +// 33) i32 @llvm.ctlz.i32(i32 [[TMP17]], i1 false) CHECK-NEXT: +// [[VECINIT_I_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 +// CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 +// 1 CHECK-NEXT: [[TMP20:%.*]] = tail call range(i32 0, 33) i32 +// @llvm.ctlz.i32(i32 [[TMP19]], i1 false) CHECK-NEXT: [[VECINIT2_I_I_I:%.*]] +// = insertelement <4 x i32> [[VECINIT_I_I_I]], i32 [[TMP20]], i64 1 CHECK-NEXT: +// [[TMP21:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 2 CHECK-NEXT: +// [[TMP22:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP21]], +// i1 false) CHECK-NEXT: [[VECINIT4_I_I_I:%.*]] = insertelement <4 x i32> +// [[VECINIT2_I_I_I]], i32 [[TMP22]], i64 2 CHECK-NEXT: [[TMP23:%.*]] = +// extractelement <4 x i32> [[OR181_I_I_I]], i64 3 CHECK-NEXT: [[TMP24:%.*]] +// = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP23]], i1 false) +// CHECK-NEXT: [[VECINIT6_I_I_I:%.*]] = insertelement <4 x i32> +// [[VECINIT4_I_I_I]], i32 [[TMP24]], i64 3 CHECK-NEXT: [[ADD183_I_I_NEG_I:%.*]] +// = xor <4 x i32> [[VECINIT6_I_I_I]], splat (i32 -1) CHECK-NEXT: +// [[ADD183_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I_I_I]], splat (i32 +// 1) CHECK-NEXT: [[SHL_MASK186_I_I_I:%.*]] = and <4 x i32> [[ADD183_I_I_I]], +// splat (i32 31) CHECK-NEXT: [[SHL187_I_I_I:%.*]] = shl <4 x i32> +// [[OR181_I_I_I]], [[SHL_MASK186_I_I_I]] CHECK-NEXT: [[TMP25:%.*]] = and <4 +// x i32> [[VECINIT6_I_I_I]], splat (i32 31) CHECK-NEXT: +// [[SHR_MASK189_I_I_I:%.*]] = xor <4 x i32> [[TMP25]], splat (i32 31) +// CHECK-NEXT: [[SHR190_I_I_I:%.*]] = lshr <4 x i32> [[OR172_I_I_I]], +// [[SHR_MASK189_I_I_I]] CHECK-NEXT: [[OR191_I_I_I:%.*]] = or <4 x i32> +// [[SHL187_I_I_I]], [[SHR190_I_I_I]] CHECK-NEXT: [[REASS_SUB10_I:%.*]] = sub +// nuw nsw <4 x i32> [[ADD183_I_I_NEG_I]], [[VECINIT6_I4_I_I]] CHECK-NEXT: +// [[ADD193_I_I_NEG_I:%.*]] = shl <4 x i32> [[REASS_SUB10_I]], splat (i32 23) +// CHECK-NEXT: [[SHR197_I_I_I:%.*]] = lshr <4 x i32> [[OR191_I_I_I]], splat +// (i32 9) CHECK-NEXT: [[REASS_SUB16_I_I:%.*]] = add <4 x i32> +// [[ADD193_I_I_NEG_I]], splat (i32 864026624) CHECK-NEXT: [[TMP26:%.*]] = or +// disjoint <4 x i32> [[SHR197_I_I_I]], [[REASS_SUB16_I_I]] CHECK-NEXT: +// [[OR198_I_I_I:%.*]] = or <4 x i32> [[TMP26]], [[TMP5]] CHECK-NEXT: +// [[ASTYPE199_I_I_I:%.*]] = bitcast <4 x i32> [[OR198_I_I_I]] to <4 x float> +// CHECK-NEXT: [[MUL200_I_I_I:%.*]] = fmul <4 x float> [[ASTYPE178_I_I_I]], +// splat (float 0x3FF921FB40000000) CHECK-NEXT: [[FNEG_I_I_I:%.*]] = fneg <4 +// x float> [[MUL200_I_I_I]] CHECK-NEXT: [[TMP27:%.*]] = tail call noundef <4 +// x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat +// (float 0x3FF921FB40000000), <4 x float> [[FNEG_I_I_I]]) CHECK-NEXT: +// [[TMP28:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> +// [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x +// float> [[TMP27]]) CHECK-NEXT: [[TMP29:%.*]] = tail call noundef <4 x +// float> @llvm.fma.v4f32(<4 x float> [[ASTYPE199_I_I_I]], <4 x float> splat +// (float 0x3FF921FB40000000), <4 x float> [[TMP28]]) CHECK-NEXT: +// [[ADD204_I_I_I:%.*]] = fadd <4 x float> [[MUL200_I_I_I]], [[TMP29]] +// CHECK-NEXT: [[SUB205_I_I_I:%.*]] = fsub <4 x float> [[ADD204_I_I_I]], +// [[MUL200_I_I_I]] CHECK-NEXT: [[SUB206_I_I_I:%.*]] = fsub <4 x float> +// [[TMP29]], [[SUB205_I_I_I]] CHECK-NEXT: [[SHR207_I_I_I:%.*]] = lshr <4 x +// i32> [[COND131_I_I_I]], splat (i32 30) CHECK-NEXT: [[ADD209_I_I_I:%.*]] = +// add nuw nsw <4 x i32> [[AND146_I_I_I]], [[SHR207_I_I_I]] CHECK-NEXT: +// [[COND_V_I2_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> +// [[ADD13_I_I_I_I]], <4 x float> [[ADD204_I_I_I]] CHECK-NEXT: +// [[COND4_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> +// [[FNEG_I_I_I_I]], <4 x float> [[SUB206_I_I_I]] CHECK-NEXT: +// [[COND6_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[CONV_I_I_I]], +// <4 x i32> [[ADD209_I_I_I]] CHECK-NEXT: [[COND6_I_I:%.*]] = and <4 x i32> +// [[COND6_V_I_I]], splat (i32 2) CHECK-NEXT: [[MUL_I_I:%.*]] = fmul <4 x +// float> [[COND_V_I2_I]], [[COND_V_I2_I]] CHECK-NEXT: [[MUL1_I_I:%.*]] = +// fmul <4 x float> [[COND_V_I2_I]], [[MUL_I_I]] CHECK-NEXT: [[TMP30:%.*]] = +// tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 +// x float> splat (float 0x3DE5D93A60000000), <4 x float> splat (float +// 0xBE5AE5E680000000)) CHECK-NEXT: [[TMP31:%.*]] = tail call noundef <4 x +// float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP30]], <4 +// x float> splat (float 0x3EC6DBE4A0000000)) CHECK-NEXT: [[TMP32:%.*]] = +// tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 +// x float> [[TMP31]], <4 x float> splat (float 0xBF2A013A80000000)) CHECK-NEXT: +// [[TMP33:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> +// [[MUL_I_I]], <4 x float> [[TMP32]], <4 x float> splat (float +// 0x3F811110E0000000)) CHECK-NEXT: [[FNEG_I3_I:%.*]] = fneg <4 x float> +// [[MUL1_I_I]] CHECK-NEXT: [[MUL5_I_I:%.*]] = fmul <4 x float> [[TMP33]], +// [[FNEG_I3_I]] CHECK-NEXT: [[TMP34:%.*]] = tail call noundef <4 x float> +// @llvm.fmuladd.v4f32(<4 x float> [[COND4_V_I_I]], <4 x float> splat +// (float 5.000000e-01), <4 x float> [[MUL5_I_I]]) CHECK-NEXT: [[FNEG7_I_I:%.*]] +// = fneg <4 x float> [[COND4_V_I_I]] CHECK-NEXT: [[TMP35:%.*]] = tail call +// noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> +// [[TMP34]], <4 x float> [[FNEG7_I_I]]) CHECK-NEXT: [[TMP36:%.*]] = tail +// call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL1_I_I]], <4 x +// float> splat (float 0x3FC5555560000000), <4 x float> [[TMP35]]) CHECK-NEXT: +// [[SUB_I_I:%.*]] = fsub <4 x float> [[COND_V_I2_I]], [[TMP36]] CHECK-NEXT: +// [[FNEG_I_I:%.*]] = fneg <4 x float> [[SUB_I_I]] CHECK-NEXT: [[TMP37:%.*]] +// = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], +// <4 x float> splat (float 0xBDA8FAE9C0000000), <4 x float> splat (float +// 0x3E21EE9EC0000000)) CHECK-NEXT: [[TMP38:%.*]] = tail call noundef <4 x +// float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP37]], <4 +// x float> splat (float 0xBE92524740000000)) CHECK-NEXT: [[TMP39:%.*]] = +// tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 +// x float> [[TMP38]], <4 x float> splat (float 0x3EFA015C40000000)) CHECK-NEXT: +// [[TMP40:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> +// [[MUL_I_I]], <4 x float> [[TMP39]], <4 x float> splat (float +// 0xBF56C16C00000000)) CHECK-NEXT: [[TMP41:%.*]] = tail call noundef <4 x +// float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP40]], <4 +// x float> splat (float 0x3FA5555560000000)) CHECK-NEXT: [[MUL5_I5_I:%.*]] = +// fmul <4 x float> [[MUL_I_I]], [[TMP41]] CHECK-NEXT: [[TMP42:%.*]] = tail +// call <4 x float> @llvm.fabs.v4f32(<4 x float> [[COND_V_I2_I]]) CHECK-NEXT: +// [[AND_I_I:%.*]] = bitcast <4 x float> [[TMP42]] to <4 x i32> CHECK-NEXT: +// [[SUB_I6_I:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -16777216) +// CHECK-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 +// -1050253722) CHECK-NEXT: [[AND938_I_I:%.*]] = icmp ult <4 x i32> +// [[TMP43]], splat (i32 11429479) CHECK-NEXT: [[TMP44:%.*]] = select <4 x +// i1> [[AND938_I_I]], <4 x i32> [[SUB_I6_I]], <4 x i32> zeroinitializer +// CHECK-NEXT: [[CMP11_I_I:%.*]] = icmp samesign ugt <4 x i32> [[AND_I_I]], +// splat (i32 1061683200) CHECK-NEXT: [[COND14_I_I:%.*]] = select <4 x i1> +// [[CMP11_I_I]], <4 x i32> splat (i32 1049624576), <4 x i32> [[TMP44]] +// CHECK-NEXT: [[TMP45:%.*]] = bitcast <4 x i32> [[COND14_I_I]] to <4 x +// float> CHECK-NEXT: [[FNEG_I7_I:%.*]] = fneg <4 x float> [[TMP45]] +// CHECK-NEXT: [[TMP46:%.*]] = tail call noundef <4 x float> +// @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat +// (float 5.000000e-01), <4 x float> [[FNEG_I7_I]]) CHECK-NEXT: +// [[SUB16_I_I:%.*]] = fsub <4 x float> splat (float 1.000000e+00), [[TMP45]] // CHECK-NEXT: [[FNEG17_I_I:%.*]] = fneg <4 x float> [[COND_V_I2_I]] -// CHECK-NEXT: [[MUL18_I_I:%.*]] = fmul <4 x float> [[COND4_V_I_I]], [[FNEG17_I_I]] -// CHECK-NEXT: [[TMP47:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[MUL5_I5_I]], <4 x float> [[MUL18_I_I]]) -// CHECK-NEXT: [[TMP48:%.*]] = fsub <4 x float> [[TMP47]], [[TMP46]] -// CHECK-NEXT: [[SUB21_I_I:%.*]] = fadd <4 x float> [[SUB16_I_I]], [[TMP48]] -// CHECK-NEXT: [[TMP49:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 1) -// CHECK-NEXT: [[TMP50:%.*]] = icmp eq <4 x i32> [[TMP49]], zeroinitializer -// CHECK-NEXT: [[COND_V_I_I:%.*]] = select <4 x i1> [[TMP50]], <4 x float> [[SUB21_I_I]], <4 x float> [[FNEG_I_I]] -// CHECK-NEXT: [[COND_I_I:%.*]] = bitcast <4 x float> [[COND_V_I_I]] to <4 x i32> -// CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp ne <4 x i32> [[COND6_I_I]], zeroinitializer -// CHECK-NEXT: [[SEXT6_I_I:%.*]] = sext <4 x i1> [[CMP5_I_I]] to <4 x i32> -// CHECK-NEXT: [[SHL_I_I:%.*]] = shl nsw <4 x i32> [[SEXT6_I_I]], splat (i32 31) -// CHECK-NEXT: [[XOR_I_I:%.*]] = xor <4 x i32> [[SHL_I_I]], [[COND_I_I]] -// CHECK-NEXT: [[ASTYPE7_I_I:%.*]] = bitcast <4 x i32> [[XOR_I_I]] to <4 x float> -// CHECK-NEXT: [[TMP51:%.*]] = fcmp ueq <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x7FF0000000000000) -// CHECK-NEXT: [[COND_V_I_I_I:%.*]] = select <4 x i1> [[TMP51]], <4 x float> splat (float 0x7FF8000000000000), <4 x float> [[ASTYPE7_I_I]] -// CHECK-NEXT: store <4 x float> [[COND_V_I_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]] +// CHECK-NEXT: [[MUL18_I_I:%.*]] = fmul <4 x float> [[COND4_V_I_I]], +// [[FNEG17_I_I]] CHECK-NEXT: [[TMP47:%.*]] = tail call noundef <4 x float> +// @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[MUL5_I5_I]], <4 x +// float> [[MUL18_I_I]]) CHECK-NEXT: [[TMP48:%.*]] = fsub <4 x float> +// [[TMP47]], [[TMP46]] CHECK-NEXT: [[SUB21_I_I:%.*]] = fadd <4 x float> +// [[SUB16_I_I]], [[TMP48]] CHECK-NEXT: [[TMP49:%.*]] = and <4 x i32> +// [[COND6_V_I_I]], splat (i32 1) CHECK-NEXT: [[TMP50:%.*]] = icmp eq <4 x +// i32> [[TMP49]], zeroinitializer CHECK-NEXT: [[COND_V_I_I:%.*]] = select <4 +// x i1> [[TMP50]], <4 x float> [[SUB21_I_I]], <4 x float> [[FNEG_I_I]] +// CHECK-NEXT: [[COND_I_I:%.*]] = bitcast <4 x float> [[COND_V_I_I]] to <4 x +// i32> CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp ne <4 x i32> [[COND6_I_I]], +// zeroinitializer CHECK-NEXT: [[SEXT6_I_I:%.*]] = sext <4 x i1> [[CMP5_I_I]] +// to <4 x i32> CHECK-NEXT: [[SHL_I_I:%.*]] = shl nsw <4 x i32> +// [[SEXT6_I_I]], splat (i32 31) CHECK-NEXT: [[XOR_I_I:%.*]] = xor <4 x i32> +// [[SHL_I_I]], [[COND_I_I]] CHECK-NEXT: [[ASTYPE7_I_I:%.*]] = bitcast <4 x +// i32> [[XOR_I_I]] to <4 x float> CHECK-NEXT: [[TMP51:%.*]] = fcmp ueq <4 x +// float> [[ELT_ABS_I_I_I]], splat (float 0x7FF0000000000000) CHECK-NEXT: +// [[COND_V_I_I_I:%.*]] = select <4 x i1> [[TMP51]], <4 x float> splat (float +// 0x7FF8000000000000), <4 x float> [[ASTYPE7_I_I]] CHECK-NEXT: store <4 x +// float> [[COND_V_I_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]] // CHECK-NEXT: ret void // -__kernel void foo(__global float4 *f) { - *f = cos(*f); -} +__kernel void foo(__global float4 *f) { *f = cos(*f); } //. // CHECK: [[META6]] = !{i32 1} // CHECK: [[META7]] = !{!"none"} diff --git a/libclc/test/math/fabs.cl b/libclc/test/math/fabs.cl index 5935fc9f6..1ee1074b0 100644 --- a/libclc/test/math/fabs.cl +++ b/libclc/test/math/fabs.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,19 +8,23 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s // CHECK-LABEL: define protected amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef align 4 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { +// CHECK-SAME: ptr addrspace(1) noundef align 4 captures(none) [[F:%.*]]) +// local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] +// !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] +// !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10:![0-9]+]] -// CHECK-NEXT: [[ELT_ABS_I_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[TMP0]]) -// CHECK-NEXT: store float [[ELT_ABS_I_I]], ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[F]], align 4, +// !tbaa [[TBAA10:![0-9]+]] CHECK-NEXT: [[ELT_ABS_I_I:%.*]] = tail call +// noundef float @llvm.fabs.f32(float [[TMP0]]) CHECK-NEXT: store float +// [[ELT_ABS_I_I]], ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10]] // CHECK-NEXT: ret void // -__kernel void foo(__global float *f) { - *f = fabs(*f); -} +__kernel void foo(__global float *f) { *f = fabs(*f); } //. // CHECK: [[META6]] = !{i32 1} // CHECK: [[META7]] = !{!"none"} diff --git a/libclc/test/math/rsqrt.cl b/libclc/test/math/rsqrt.cl index fa5e13f27..ce5c54485 100644 --- a/libclc/test/math/rsqrt.cl +++ b/libclc/test/math/rsqrt.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,25 +8,36 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable #if defined(cl_khr_fp64) // CHECK-LABEL: define protected amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) initializes((16, 32)) [[X:%.*]], ptr addrspace(1) noundef align 32 captures(none) initializes((32, 64)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]), !fpmath [[META14:![0-9]+]] -// CHECK-NEXT: [[DIV_I_I:%.*]] = fdiv contract <4 x float> splat (float 1.000000e+00), [[TMP1]], !fpmath [[META15:![0-9]+]] -// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[X]], i64 16 -// CHECK-NEXT: store <4 x float> [[DIV_I_I]], ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr addrspace(1) [[Y]], align 32, !tbaa [[TBAA11]] -// CHECK-NEXT: [[TMP3:%.*]] = tail call contract <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP2]]) -// CHECK-NEXT: [[DIV_I_I1:%.*]] = fdiv contract <4 x double> splat (double 1.000000e+00), [[TMP3]] -// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[Y]], i64 32 -// CHECK-NEXT: store <4 x double> [[DIV_I_I1]], ptr addrspace(1) [[ARRAYIDX4_I]], align 32, !tbaa [[TBAA11]] +// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) initializes((16, +// 32)) [[X:%.*]], ptr addrspace(1) noundef align 32 captures(none) +// initializes((32, 64)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] +// !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual +// [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type +// [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { CHECK-NEXT: +// [[ENTRY:.*:]] CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr +// addrspace(1) [[X]], align 16, !tbaa [[TBAA11:![0-9]+]] CHECK-NEXT: +// [[TMP1:%.*]] = tail call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> +// [[TMP0]]), !fpmath [[META14:![0-9]+]] CHECK-NEXT: [[DIV_I_I:%.*]] = fdiv +// contract <4 x float> splat (float 1.000000e+00), [[TMP1]], !fpmath +// [[META15:![0-9]+]] CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr +// inbounds nuw i8, ptr addrspace(1) [[X]], i64 16 CHECK-NEXT: store <4 x +// float> [[DIV_I_I]], ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa +// [[TBAA11]] CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr addrspace(1) +// [[Y]], align 32, !tbaa [[TBAA11]] CHECK-NEXT: [[TMP3:%.*]] = tail call +// contract <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP2]]) CHECK-NEXT: +// [[DIV_I_I1:%.*]] = fdiv contract <4 x double> splat (double 1.000000e+00), +// [[TMP3]] CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, +// ptr addrspace(1) [[Y]], i64 32 CHECK-NEXT: store <4 x double> +// [[DIV_I_I1]], ptr addrspace(1) [[ARRAYIDX4_I]], align 32, !tbaa [[TBAA11]] // CHECK-NEXT: ret void // __kernel void foo(__global float4 *x, __global double4 *y) { @@ -38,11 +50,10 @@ __kernel void foo(__global float4 *x, __global double4 *y) { // CHECK: [[META6]] = !{i32 1, i32 1} // CHECK: [[META7]] = !{!"none", !"none"} // CHECK: [[META8]] = !{!"float4*", !"double4*"} -// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*", !"double __attribute__((ext_vector_type(4)))*"} -// CHECK: [[META10]] = !{!"", !""} -// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} -// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} -// CHECK: [[META14]] = !{float 3.000000e+00} -// CHECK: [[META15]] = !{float 2.500000e+00} +// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*", !"double +// __attribute__((ext_vector_type(4)))*"} CHECK: [[META10]] = !{!"", !""} CHECK: +// [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} CHECK: [[META12]] = +// !{!"omnipotent char", [[META13:![0-9]+]], i64 0} CHECK: [[META13]] = +// !{!"Simple C/C++ TBAA"} CHECK: [[META14]] = !{float 3.000000e+00} CHECK: +// [[META15]] = !{float 2.500000e+00} //. diff --git a/libclc/test/misc/as_type.cl b/libclc/test/misc/as_type.cl index a475956e6..fa00577f3 100644 --- a/libclc/test/misc/as_type.cl +++ b/libclc/test/misc/as_type.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,25 +8,30 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s // CHECK-LABEL: define protected amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]] -// CHECK-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]] -// CHECK-NEXT: ret void +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) +// initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 +// captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] +// !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual +// [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type +// [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { CHECK-NEXT: +// [[ENTRY:.*:]] CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) +// [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]] CHECK-NEXT: store <4 x i32> +// [[TMP0]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]] CHECK-NEXT: ret +// void // -__kernel void foo(__global int4 *x, __global float4 *y) { - *x = as_int4(*y); -} +__kernel void foo(__global int4 *x, __global float4 *y) { *x = as_int4(*y); } //. // CHECK: [[META6]] = !{i32 1, i32 1} // CHECK: [[META7]] = !{!"none", !"none"} // CHECK: [[META8]] = !{!"int4*", !"float4*"} -// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"} -// CHECK: [[META10]] = !{!"", !""} -// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} -// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float +// __attribute__((ext_vector_type(4)))*"} CHECK: [[META10]] = !{!"", !""} CHECK: +// [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} CHECK: [[META12]] = +// !{!"omnipotent char", [[META13:![0-9]+]], i64 0} CHECK: [[META13]] = +// !{!"Simple C/C++ TBAA"} //. diff --git a/libclc/test/misc/convert.cl b/libclc/test/misc/convert.cl index cd8c41465..2959ab1e8 100644 --- a/libclc/test/misc/convert.cl +++ b/libclc/test/misc/convert.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,14 +8,21 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s // CHECK-LABEL: define protected amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]] -// CHECK-NEXT: [[CONV_I_I:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32> -// CHECK-NEXT: store <4 x i32> [[CONV_I_I]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]] +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) +// initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 +// captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] +// !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual +// [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type +// [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { CHECK-NEXT: +// [[ENTRY:.*:]] CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr +// addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]] CHECK-NEXT: +// [[CONV_I_I:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32> CHECK-NEXT: store +// <4 x i32> [[CONV_I_I]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]] // CHECK-NEXT: ret void // __kernel void foo(__global int4 *x, __global float4 *y) { @@ -24,9 +32,9 @@ __kernel void foo(__global int4 *x, __global float4 *y) { // CHECK: [[META6]] = !{i32 1, i32 1} // CHECK: [[META7]] = !{!"none", !"none"} // CHECK: [[META8]] = !{!"int4*", !"float4*"} -// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"} -// CHECK: [[META10]] = !{!"", !""} -// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} -// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0} -// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float +// __attribute__((ext_vector_type(4)))*"} CHECK: [[META10]] = !{!"", !""} CHECK: +// [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} CHECK: [[META12]] = +// !{!"omnipotent char", [[META13:![0-9]+]], i64 0} CHECK: [[META13]] = +// !{!"Simple C/C++ TBAA"} //. diff --git a/libclc/test/work-item/get_group_id.cl b/libclc/test/work-item/get_group_id.cl index f73f8f76c..6a51a2280 100644 --- a/libclc/test/work-item/get_group_id.cl +++ b/libclc/test/work-item/get_group_id.cl @@ -1,4 +1,5 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// UTC_ARGS: --version 5 //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,20 +8,23 @@ // //===----------------------------------------------------------------------===// -// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s +// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - +// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | +// FileCheck %s // CHECK-LABEL: define protected amdgpu_kernel void @foo( -// CHECK-SAME: ptr addrspace(1) noundef writeonly align 4 captures(none) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.x() -// CHECK-NEXT: [[RETVAL_0_I:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(1) [[I]], i64 [[RETVAL_0_I]] -// CHECK-NEXT: store i32 1, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA10:![0-9]+]] -// CHECK-NEXT: ret void +// CHECK-SAME: ptr addrspace(1) noundef writeonly align 4 captures(none) +// [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space +// [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type +// [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual +// [[META9:![0-9]+]] { CHECK-NEXT: [[ENTRY:.*:]] CHECK-NEXT: [[TMP0:%.*]] = +// tail call i32 @llvm.amdgcn.workgroup.id.x() CHECK-NEXT: [[RETVAL_0_I:%.*]] +// = zext i32 [[TMP0]] to i64 CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr +// inbounds nuw i32, ptr addrspace(1) [[I]], i64 [[RETVAL_0_I]] CHECK-NEXT: +// store i32 1, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa +// [[TBAA10:![0-9]+]] CHECK-NEXT: ret void // -__kernel void foo(__global int *i) { - i[get_group_id(0)] = 1; -} +__kernel void foo(__global int *i) { i[get_group_id(0)] = 1; } //. // CHECK: [[META6]] = !{i32 1} // CHECK: [[META7]] = !{!"none"} `````````` </details> https://github.com/llvm/llvm-project/pull/87989 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits