[PATCH] D73570: [FPEnv][X86] Platform-specific builtin constrained FP enablement
This revision was not accepted when it landed; it landed in state "Needs Review". This revision was automatically updated to reflect the committed changes. Closed by commit rG208470dd5d0a: [FPEnv][X86] Platform-specific builtin constrained FP enablement (authored by kpn). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73570/new/ https://reviews.llvm.org/D73570 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/avx512f-builtins-constrained.c clang/test/CodeGen/fma-builtins-constrained.c clang/test/CodeGen/sse-builtins-constrained.c Index: clang/test/CodeGen/sse-builtins-constrained.c === --- /dev/null +++ clang/test/CodeGen/sse-builtins-constrained.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -ffp-exception-behavior=strict -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + + +#include + +__m128 test_mm_sqrt_ps(__m128 x) { + // COMMON-LABEL: test_mm_sqrt_ps + // UNCONSTRAINED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}}) + // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> {{.*}}, metadata !{{.*}}) + // CHECK-ASM: sqrtps + return _mm_sqrt_ps(x); +} + +__m128 test_sqrt_ss(__m128 x) { + // COMMON-LABEL: test_sqrt_ss + // COMMONIR: extractelement <4 x float> {{.*}}, i64 0 + // UNCONSTRAINED: call float @llvm.sqrt.f32(float {{.*}}) + // CONSTRAINED: call float @llvm.experimental.constrained.sqrt.f32(float {{.*}}, metadata !{{.*}}) + // CHECK-ASM: sqrtss + // COMMONIR: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0 + return _mm_sqrt_ss(x); +} + Index: clang/test/CodeGen/fma-builtins-constrained.c === --- /dev/null +++ clang/test/CodeGen/fma-builtins-constrained.c @@ -0,0 +1,352 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -O -emit-llvm -o - | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -ffp-exception-behavior=strict -O -emit-llvm -o - | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -O -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -O -ffp-exception-behavior=strict -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s + +// FIXME: Several of these tests are broken when constrained. + +#include + +__m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) { + // COMMON-LABEL: test_mm_fmadd_ps + // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213ps + return _mm_fmadd_ps(a, b, c); +} + +__m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) { + // COMMON-LABEL: test_mm_fmadd_pd + // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213pd + return _mm_fmadd_pd(a, b, c); +} + +__m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) { + // COMMON-LABEL: test_mm_fmadd_ss + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // UNCONSTRAINED: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) + // CONSTRAINED: call float @llvm.experimental.constrained.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213ss + // COMMONIR: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + return _mm_fmadd_ss(a, b, c); +} + +__m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d
[PATCH] D73570: [FPEnv][X86] Platform-specific builtin constrained FP enablement
craig.topper added a comment. LGTM to me with that FIXME added. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73570/new/ https://reviews.llvm.org/D73570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D73570: [FPEnv][X86] Platform-specific builtin constrained FP enablement
kpn updated this revision to Diff 241771. kpn added a comment. Address review comments: FMA tests are now run optimized. This changes where the FIXME lines are located. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73570/new/ https://reviews.llvm.org/D73570 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/avx512f-builtins-constrained.c clang/test/CodeGen/fma-builtins-constrained.c clang/test/CodeGen/sse-builtins-constrained.c Index: clang/test/CodeGen/sse-builtins-constrained.c === --- /dev/null +++ clang/test/CodeGen/sse-builtins-constrained.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -ffp-exception-behavior=strict -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + + +#include + +__m128 test_mm_sqrt_ps(__m128 x) { + // COMMON-LABEL: test_mm_sqrt_ps + // UNCONSTRAINED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}}) + // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> {{.*}}, metadata !{{.*}}) + // CHECK-ASM: sqrtps + return _mm_sqrt_ps(x); +} + +__m128 test_sqrt_ss(__m128 x) { + // COMMON-LABEL: test_sqrt_ss + // COMMONIR: extractelement <4 x float> {{.*}}, i64 0 + // UNCONSTRAINED: call float @llvm.sqrt.f32(float {{.*}}) + // CONSTRAINED: call float @llvm.experimental.constrained.sqrt.f32(float {{.*}}, metadata !{{.*}}) + // CHECK-ASM: sqrtss + // COMMONIR: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0 + return _mm_sqrt_ss(x); +} + Index: clang/test/CodeGen/fma-builtins-constrained.c === --- /dev/null +++ clang/test/CodeGen/fma-builtins-constrained.c @@ -0,0 +1,352 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -O -emit-llvm -o - | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -ffp-exception-behavior=strict -O -emit-llvm -o - | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -O -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -O -ffp-exception-behavior=strict -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s + +// FIXME: Several of these tests are broken when constrained. + +#include + +__m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) { + // COMMON-LABEL: test_mm_fmadd_ps + // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213ps + return _mm_fmadd_ps(a, b, c); +} + +__m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) { + // COMMON-LABEL: test_mm_fmadd_pd + // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213pd + return _mm_fmadd_pd(a, b, c); +} + +__m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) { + // COMMON-LABEL: test_mm_fmadd_ss + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // UNCONSTRAINED: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) + // CONSTRAINED: call float @llvm.experimental.constrained.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213ss + // COMMONIR: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + return _mm_fmadd_ss(a, b, c); +} + +__m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) { + // COMMON-LABEL: test_mm_fmadd_sd + // COMMONIR: extractelement <2 x double> %{{.*}}, i64 0 +
[PATCH] D73570: [FPEnv][X86] Platform-specific builtin constrained FP enablement
kpn marked 2 inline comments as done. kpn added inline comments. Comment at: clang/test/CodeGen/fma-builtins-constrained.c:4 +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -ffp-exception-behavior=strict -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s + craig.topper wrote: > Technically the fmsub/fnmsub/fnmadd assembly requires optimizations to be > enabled. If it appears to work without optimizations its only because > fast-isel fell back to SelectionDAG and picked up optimizations due to that. > Not something that should be relied on. Ok. Well, the eventual goal is to ship a product with optimization turned on. So I think it makes sense for me to give this a spin with optimizations and see what if anything needs to be done. Unless that's overkill I'll start this afternoon. Comment at: clang/test/CodeGen/fma-builtins-constrained.c:6 + +// FIXME: Several of these tests are broken when constrained. + craig.topper wrote: > Is this just referring to the FIXME-CHECK-ASM? Yes. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73570/new/ https://reviews.llvm.org/D73570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D73570: [FPEnv][X86] Platform-specific builtin constrained FP enablement
craig.topper added inline comments. Comment at: clang/test/CodeGen/fma-builtins-constrained.c:4 +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -ffp-exception-behavior=strict -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s + Technically the fmsub/fnmsub/fnmadd assembly requires optimizations to be enabled. If it appears to work without optimizations its only because fast-isel fell back to SelectionDAG and picked up optimizations due to that. Not something that should be relied on. Comment at: clang/test/CodeGen/fma-builtins-constrained.c:6 + +// FIXME: Several of these tests are broken when constrained. + Is this just referring to the FIXME-CHECK-ASM? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73570/new/ https://reviews.llvm.org/D73570 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D73570: [FPEnv][X86] Platform-specific builtin constrained FP enablement
kpn created this revision. kpn added reviewers: craig.topper, andrew.w.kaylor. Herald added a project: clang. Herald added a subscriber: cfe-commits. [FPEnv][X86] Platform-specific builtin constrained FP enablement When constrained floating point is enabled the X86-specific builtins don't use constrained intrinsics in some cases. Fix that. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D73570 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/avx512f-builtins-constrained.c clang/test/CodeGen/fma-builtins-constrained.c clang/test/CodeGen/sse-builtins-constrained.c Index: clang/test/CodeGen/sse-builtins-constrained.c === --- /dev/null +++ clang/test/CodeGen/sse-builtins-constrained.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -ffp-exception-behavior=strict -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + + +#include + +__m128 test_mm_sqrt_ps(__m128 x) { + // COMMON-LABEL: test_mm_sqrt_ps + // UNCONSTRAINED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}}) + // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> {{.*}}, metadata !{{.*}}) + // CHECK-ASM: sqrtps + return _mm_sqrt_ps(x); +} + +__m128 test_sqrt_ss(__m128 x) { + // COMMON-LABEL: test_sqrt_ss + // COMMONIR: extractelement <4 x float> {{.*}}, i64 0 + // UNCONSTRAINED: call float @llvm.sqrt.f32(float {{.*}}) + // CONSTRAINED: call float @llvm.experimental.constrained.sqrt.f32(float {{.*}}, metadata !{{.*}}) + // CHECK-ASM: sqrtss + // COMMONIR: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0 + return _mm_sqrt_ss(x); +} + Index: clang/test/CodeGen/fma-builtins-constrained.c === --- /dev/null +++ clang/test/CodeGen/fma-builtins-constrained.c @@ -0,0 +1,352 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -ffp-exception-behavior=strict -emit-llvm -o - | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -ffp-exception-behavior=strict -S -o - | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s + +// FIXME: Several of these tests are broken when constrained. + +#include + +__m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) { + // COMMON-LABEL: test_mm_fmadd_ps + // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213ps + return _mm_fmadd_ps(a, b, c); +} + +__m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) { + // COMMON-LABEL: test_mm_fmadd_pd + // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213pd + return _mm_fmadd_pd(a, b, c); +} + +__m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) { + // COMMON-LABEL: test_mm_fmadd_ss + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // COMMONIR: extractelement <4 x float> %{{.*}}, i64 0 + // UNCONSTRAINED: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) + // CONSTRAINED: call float @llvm.experimental.constrained.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfmadd213ss + // COMMONIR: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + return _mm_fmadd_ss(a, b, c); +} + +__m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) { +