[PATCH] D133191: Driver test: remove `REQUIRES: x86-registered-target` and set `--sysroot=""` to support clang with `DEFAULT_SYSROOT`.

2022-09-02 Thread Warren Ristow via Phabricator via cfe-commits
wristow accepted this revision.
wristow added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133191/new/

https://reviews.llvm.org/D133191

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124469: [NFC] Cleanup miscellaneous header items

2022-04-26 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

Thanks for the review @RKSimon !




Comment at: clang/lib/Headers/__wmmintrin_pclmul.h:25
 /// \code
-/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);
+/// __m128i _mm_clmulepi64_si128(__m128i X, __m128i Y, const int I);
 /// \endcode

RKSimon wrote:
> Its annoying that we've never gotten 
> https://github.com/llvm/llvm-project/issues/35297 fixed to have helped with 
> these param name mismatches
Yes -- would be nice to catch these sorts of things.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124469/new/

https://reviews.llvm.org/D124469

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124469: [NFC] Cleanup miscellaneous header items

2022-04-26 Thread Warren Ristow via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGdf08b3493869: [NFC] Cleanup miscellaneous header items 
(authored by wristow).
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124469/new/

https://reviews.llvm.org/D124469

Files:
  clang/lib/Headers/__wmmintrin_pclmul.h
  clang/lib/Headers/avxintrin.h
  clang/lib/Headers/bmiintrin.h
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/smmintrin.h
  clang/lib/Headers/xmmintrin.h

Index: clang/lib/Headers/xmmintrin.h
===
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -2086,7 +2086,7 @@
 /// \headerfile 
 ///
 /// \code
-/// void _mm_prefetch(const void * a, const int sel);
+/// void _mm_prefetch(const void *a, const int sel);
 /// \endcode
 ///
 /// This intrinsic corresponds to the  PREFETCHNTA  instruction.
@@ -2360,7 +2360,10 @@
 ///00: assigned from bits [15:0] of \a a. \n
 ///01: assigned from bits [31:16] of \a a. \n
 ///10: assigned from bits [47:32] of \a a. \n
-///11: assigned from bits [63:48] of \a a.
+///11: assigned from bits [63:48] of \a a. \n
+///Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+///_MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form
+///[b6, b4, b2, b0].
 /// \returns A 64-bit integer vector containing the shuffled values.
 #define _mm_shuffle_pi16(a, n) \
   ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
@@ -2602,7 +2605,10 @@
 ///00: Bits [31:0] copied from the specified operand. \n
 ///01: Bits [63:32] copied from the specified operand. \n
 ///10: Bits [95:64] copied from the specified operand. \n
-///11: Bits [127:96] copied from the specified operand.
+///11: Bits [127:96] copied from the specified operand. \n
+///Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+///_MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form
+///[b6, b4, b2, b0].
 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
 #define _mm_shuffle_ps(a, b, mask) \
   ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
Index: clang/lib/Headers/smmintrin.h
===
--- clang/lib/Headers/smmintrin.h
+++ clang/lib/Headers/smmintrin.h
@@ -1213,8 +1213,8 @@
 /// This intrinsic corresponds to the  VPMOVSXBW / PMOVSXBW  instruction.
 ///
 /// \param __V
-///A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-
-///extended to 16-bit values.
+///A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
+///sign-extended to 16-bit values.
 /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) {
   /* This function always performs a signed extension, but __v16qi is a char
Index: clang/lib/Headers/emmintrin.h
===
--- clang/lib/Headers/emmintrin.h
+++ clang/lib/Headers/emmintrin.h
@@ -4126,21 +4126,25 @@
 ///
 /// \headerfile 
 ///
+/// \code
+/// __m128i _mm_extract_epi16(__m256i a, const int imm);
+/// \endcode
+///
 /// This intrinsic corresponds to the  VPEXTRW / PEXTRW  instruction.
 ///
-/// \param __a
+/// \param a
 ///A 128-bit integer vector.
-/// \param __imm
-///An immediate value. Bits [2:0] selects values from \a __a to be assigned
+/// \param imm
+///An immediate value. Bits [2:0] selects values from \a a to be assigned
 ///to bits[15:0] of the result. \n
-///000: assign values from bits [15:0] of \a __a. \n
-///001: assign values from bits [31:16] of \a __a. \n
-///010: assign values from bits [47:32] of \a __a. \n
-///011: assign values from bits [63:48] of \a __a. \n
-///100: assign values from bits [79:64] of \a __a. \n
-///101: assign values from bits [95:80] of \a __a. \n
-///110: assign values from bits [111:96] of \a __a. \n
-///111: assign values from bits [127:112] of \a __a.
+///000: assign values from bits [15:0] of \a a. \n
+///001: assign values from bits [31:16] of \a a. \n
+///010: assign values from bits [47:32] of \a a. \n
+///011: assign values from bits [63:48] of \a a. \n
+///100: assign values from bits [79:64] of \a a. \n
+///101: assign values from bits [95:80] of \a a. \n
+///110: assign values from bits [111:96] of \a a. \n
+///111: assign values from bits [127:112] of \a a.
 /// \returns An integer, whose lower 16 bits are selected from the 128-bit
 ///integer vector parameter and the remaining bits are assigned zeros.
 #define _mm_extract_epi16(a, imm)  \
@@ -4154,18 +4158,22 @@
 ///
 /// \headerfile 
 ///

[PATCH] D124469: [NFC] Cleanup miscellaneous header items

2022-04-26 Thread Warren Ristow via Phabricator via cfe-commits
wristow created this revision.
wristow added reviewers: craig.topper, RKSimon.
Herald added a subscriber: StephenFan.
Herald added a project: All.
wristow requested review of this revision.

- Explain the use of the _MM_SHUFFLE and _MM_SHUFFLE2 macros
- Update some doxygen parameter descriptions to match the implementations
- Add "see also" doxygen tags to some intrinsics
- Minor clang-format changes


https://reviews.llvm.org/D124469

Files:
  clang/lib/Headers/__wmmintrin_pclmul.h
  clang/lib/Headers/avxintrin.h
  clang/lib/Headers/bmiintrin.h
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/smmintrin.h
  clang/lib/Headers/xmmintrin.h

Index: clang/lib/Headers/xmmintrin.h
===
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -2086,7 +2086,7 @@
 /// \headerfile 
 ///
 /// \code
-/// void _mm_prefetch(const void * a, const int sel);
+/// void _mm_prefetch(const void *a, const int sel);
 /// \endcode
 ///
 /// This intrinsic corresponds to the  PREFETCHNTA  instruction.
@@ -2360,7 +2360,10 @@
 ///00: assigned from bits [15:0] of \a a. \n
 ///01: assigned from bits [31:16] of \a a. \n
 ///10: assigned from bits [47:32] of \a a. \n
-///11: assigned from bits [63:48] of \a a.
+///11: assigned from bits [63:48] of \a a. \n
+///Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+///_MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form
+///[b6, b4, b2, b0].
 /// \returns A 64-bit integer vector containing the shuffled values.
 #define _mm_shuffle_pi16(a, n) \
   ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
@@ -2602,7 +2605,10 @@
 ///00: Bits [31:0] copied from the specified operand. \n
 ///01: Bits [63:32] copied from the specified operand. \n
 ///10: Bits [95:64] copied from the specified operand. \n
-///11: Bits [127:96] copied from the specified operand.
+///11: Bits [127:96] copied from the specified operand. \n
+///Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+///_MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form
+///[b6, b4, b2, b0].
 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
 #define _mm_shuffle_ps(a, b, mask) \
   ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
Index: clang/lib/Headers/smmintrin.h
===
--- clang/lib/Headers/smmintrin.h
+++ clang/lib/Headers/smmintrin.h
@@ -1213,8 +1213,8 @@
 /// This intrinsic corresponds to the  VPMOVSXBW / PMOVSXBW  instruction.
 ///
 /// \param __V
-///A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-
-///extended to 16-bit values.
+///A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
+///sign-extended to 16-bit values.
 /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) {
   /* This function always performs a signed extension, but __v16qi is a char
Index: clang/lib/Headers/emmintrin.h
===
--- clang/lib/Headers/emmintrin.h
+++ clang/lib/Headers/emmintrin.h
@@ -4126,21 +4126,25 @@
 ///
 /// \headerfile 
 ///
+/// \code
+/// __m128i _mm_extract_epi16(__m256i a, const int imm);
+/// \endcode
+///
 /// This intrinsic corresponds to the  VPEXTRW / PEXTRW  instruction.
 ///
-/// \param __a
+/// \param a
 ///A 128-bit integer vector.
-/// \param __imm
-///An immediate value. Bits [2:0] selects values from \a __a to be assigned
+/// \param imm
+///An immediate value. Bits [2:0] selects values from \a a to be assigned
 ///to bits[15:0] of the result. \n
-///000: assign values from bits [15:0] of \a __a. \n
-///001: assign values from bits [31:16] of \a __a. \n
-///010: assign values from bits [47:32] of \a __a. \n
-///011: assign values from bits [63:48] of \a __a. \n
-///100: assign values from bits [79:64] of \a __a. \n
-///101: assign values from bits [95:80] of \a __a. \n
-///110: assign values from bits [111:96] of \a __a. \n
-///111: assign values from bits [127:112] of \a __a.
+///000: assign values from bits [15:0] of \a a. \n
+///001: assign values from bits [31:16] of \a a. \n
+///010: assign values from bits [47:32] of \a a. \n
+///011: assign values from bits [63:48] of \a a. \n
+///100: assign values from bits [79:64] of \a a. \n
+///101: assign values from bits [95:80] of \a a. \n
+///110: assign values from bits [111:96] of \a a. \n
+///111: assign values from bits [127:112] of \a a.
 /// \returns An integer, whose lower 16 bits are selected from the 128-bit
 ///integer vector parameter and the remaining bits are assigned zeros.
 #define _mm_extract_epi16(a, imm)  \
@@ -4154,18 

[PATCH] D107994: Making the code compliant to the documentation about Floating Point support default values for C/C++. FPP-MODEL=PRECISE enables FFP-CONTRACT (FMA is enabled).

2021-11-15 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

Thanks @zahiraam !  The updated ReleaseNote LGTM.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107994/new/

https://reviews.llvm.org/D107994

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107994: Making the code compliant to the documentation about Floating Point support default values for C/C++. FPP-MODEL=PRECISE enables FFP-CONTRACT (FMA is enabled).

2021-11-15 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

In D107994#3131268 , @zahiraam wrote:

> In D107994#3130494 , @wristow wrote:
>
>> The Release Note change here says:
>>
>>   Floating Point Support in Clang
>>   ---
>>   - The -ffp-model=precise now implies -ffp-contract=on rather than
>> -ffp-contract=fast, and the documentation of these features has been
>> clarified. Previously, the documentation claimed that -ffp-model=precise 
>> was
>> the default, but this was incorrect because the precise model implied
>> -ffp-contract=fast, whereas the default behavior is -ffp-contract=on.
>> -ffp-model=precise is now exactly the default mode of the compiler.
>>
>> Unless I'm missing something, there is a related change here that I think 
>> should be more overtly noted (given the discussions in this review, I 
>> //think// this additional change is understood/expected, but I'm surprised 
>> it's not pointed out explicitly -- so maybe I'm misunderstanding).
>>
>> Specifically, this commit explicitly sets `-ffp-contract=on` in the default 
>> mode (which is what the documentation said, and continues to say).  But 
>> previously, there wasn't //any// explicit setting of `-ffp-contract` by 
>> default (and I think that lack of an explicit setting, was equivalent to 
>> `-ffp-contract=off`).
>> ...
>
> @wristow Are you suggesting a change of wording in the ReleaseNotes?

Yes @zahiraam, either a modification to what was written, or an additional 
separate point.  The critical user-visible change from this commit (AFAICS) is 
that previously the default behavior was `-ffp-contract=off`, whereas now the 
default behavior is `-ffp-contract=on`.  The ReleaseNote as written implies 
that the FMA situation //was// `-ffp-contract=fast`, and it has been changed to 
`-ffp-contract=on`, and either of those modes would result in FMA being done 
for cases like:

  float foo(float a, float b, float c) {
return a * b + c;
  }

In short, the current ReleaseNote implies that FMA would be used by default, 
for cases like the above (because `-ffp-contract=on` was the default) -- but 
this isn't true.  (The current ReleaseNote also clarifies that if 
`-ffp-model=precise` were explicitly specified, then it previously would set 
`-ffp-contract=fast` (which was not what the documentation said) and now it 
sets `-ffp-contract=on` (which //is// what the documentation says) -- this 
aspect is correct.)

I think the ReleaseNote should clarify that the result of this change is that 
previously, the default behavior was equivalent to `-ffp-contract=off` (and so 
no FMA would be done by default), but this commit makes the default behavior 
`-ffp-contract=on` (so FMA is enabled by default, even at `-O0`).  The 
documentation indicates that the default is `-ffp-contract=on`, so this change 
makes the compiler behavior consistent with the documentation, with respect to 
the `-ffp-contact` switch.  It also makes it consistent with the documentation 
for the `-ffp-model` switch.

A possible new wording is below (maybe this is too verbose, so making it more 
concise is fine too, from my POV):

  Floating Point Support in Clang
  ---
  - The default setting of FP contraction (FMA) is now -ffp-contract=on (for
languages other than CUDA/HIP).  This is consistent with the documentation.
Previously, the default behavior was equivalent to -ffp-contract=off, so
the old behavior was that FMA would not be used by default for code like:
float foo(float a, float b, float c) {
  return a * b + c;
}
Whereas now, FMA will be used in the above code, even when optimization
is off.
  
Related to this, the switch -ffp-model=precise now implies -ffp-contract=on
rather than -ffp-contract=fast, and the documentation of these features has
been clarified.  Previously, the documentation claimed that
-ffp-model=precise was the default, but this was incorrect because the
precise model implied -ffp-contract=fast, whereas the (now corrected)
default behavior is -ffp-contract=on.
-ffp-model=precise is now exactly the default mode of the compiler.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107994/new/

https://reviews.llvm.org/D107994

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107994: Making the code compliant to the documentation about Floating Point support default values for C/C++. FPP-MODEL=PRECISE enables FFP-CONTRACT (FMA is enabled).

2021-11-14 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

The Release Note change here says:

  Floating Point Support in Clang
  ---
  - The -ffp-model=precise now implies -ffp-contract=on rather than
-ffp-contract=fast, and the documentation of these features has been
clarified. Previously, the documentation claimed that -ffp-model=precise was
the default, but this was incorrect because the precise model implied
-ffp-contract=fast, whereas the default behavior is -ffp-contract=on.
-ffp-model=precise is now exactly the default mode of the compiler.

Unless I'm missing something, there is a related change here that I think 
should be more overtly noted (given the discussions in this review, I //think// 
this additional change is understood/expected, but I'm surprised it's not 
pointed out explicitly -- so maybe I'm misunderstanding).

Specifically, this commit explicitly sets `-ffp-contract=on` in the default 
mode (which is what the documentation said, and continues to say).  But 
previously, there wasn't //any// explicit setting of `-ffp-contract` by default 
(and I think that lack of an explicit setting, was equivalent to 
`-ffp-contract=off`).

So with this commit, we now enable FMA by default (even at `-O0`). Noting the 
semantic change that FMA is now being enabled by default seems sensible.

Succinctly, in terms of the Release Note, the documentation claims that 
`-ffp-contract=on` is the default, but in fact the behavior //was// as though 
`-ffp-contract=off` was the default.  The default now really is 
`-ffp-contract=on`.

__

Also, I see a relatively minor point about `-ffp-contract` in the Users Manual. 
 It says that setting `-ffast-math` implies `-ffp-contract=fast`, and it says 
that setting `-ffp-model=fast` "Behaves identically to specifying both 
`-ffast-math` and `ffp-contract=fast`".  But that's redundant, since 
`-ffast-math` already implies  `-ffp-contract=fast`.  That is, `-ffast-math` 
and `-ffp-model=fast` are equivalent.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107994/new/

https://reviews.llvm.org/D107994

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67429: Improve code generation for thread_local variables:

2020-12-08 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

This commit appears to be the root cause of a run-time crash related to the 
interaction of global initializers and the wrapper functions to access 
`thread_local` variables -- reported as PR48030 
.


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67429/new/

https://reviews.llvm.org/D67429

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86649: Fix for assertion failure on PR46865

2020-09-01 Thread Warren Ristow via Phabricator via cfe-commits
wristow added inline comments.



Comment at: clang/lib/AST/ExprConstant.cpp:3098
 // have been value-dependent too), so diagnose that.
-assert(!VD->mightBeUsableInConstantExpressions(Info.Ctx));
+assert(!VD->isUsableInConstantExpressions(Info.Ctx));
 if (!Info.checkingPotentialConstantExpression()) {

This looks like a more appropriate assertion-check to me.  But it's out of my 
area of experience, so I don't feel qualified to give an authoritative "LGTM".


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86649/new/

https://reviews.llvm.org/D86649

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D80952: [FPEnv][Clang][Driver] Disable constrained floating point on targets lacking support.

2020-06-22 Thread Warren Ristow via Phabricator via cfe-commits
wristow added inline comments.



Comment at: clang/test/CodeGen/fp-strictfp.cpp:1
+// RUN: %clang_cc1 -triple mips64-linux-gnu -frounding-math 
-ffp-exception-behavior=strict -O2 -verify=rounding,exception -emit-llvm -o - 
%s | tee /tmp/1 | FileCheck %s
+// RUN: %clang_cc1 -triple mips64-linux-gnu -ffp-exception-behavior=strict -O2 
-verify=exception -emit-llvm -o - %s | FileCheck %s

Need to remove the "tee" command in the pipe sequence.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80952/new/

https://reviews.llvm.org/D80952



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-03-11 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

Revisiting this patch.  I think that before fixing the `-ffp-contract=off` 
problem I originally raised here, there are two questions that have come up in 
this discussion that we should first resolve.  Specifically:

(1)  Do we want to enable FMA transformations by default (at appropriate 
optimization levels), like GCC does?  That is, should FMA be done for targets 
that support it with a command as simple as the following?

  clang -O2 -c test.c

FTR, currently we don't do FMA with simply `-O2`.  We do enable FMA if 
`-ffast-math` is added, or (of course) if FMA is explicitly enabled via 
`-ffp-contract=fast`.  Also note that with GCC, FMA is completely orthogonal to 
`-ffast-math`.  For example:

  gcc -O2 -ffp-contract=off -ffast-math test.c  # '-ffast-math' does not turn 
FMA "back on"

(2)  Under what conditions do we want to predefine the `__FAST_MATH__` macro?  
That is, should we continue with our current policy (essentially, define it 
when "all the fast-math-flags are enabled"), or do we want to do precisely what 
GCC does (define it when some particular subset of the fast-math-flags are 
enabled)?

My vote for (1) is yes.  To add to this, I would make the `-ffp-contract` 
setting independent of `-ffast-math`, as GCC does.  But to be explicit, 
enabling FMA at `-O2` is a change in behavior that may be unexpected for some 
users -- so I can imagine some users objecting to this change.

My vote for (2) is to keep our current behavior.  This approach seems more 
sensible to me than the GCC behavior.  And we're not boxing ourselves into 
anything by keeping the current behavior (that is, we can just as easily change 
this in the future if we find the GCC behavior is better for some reason).  
Among other things, this means that whether we define `__FAST_MATH__` will 
continue to not be impacted by the `-ffp-contract` setting -- that makes sense 
to me, especially if we make FMA orthogonal to `-ffast-math`, as discussed in 
(1).

One related point.  Our documentation (UsersManual.rst) for 
`-ffp-model=precise` says:

//Disables optimizations that are not value-safe on floating-point data, 
although FP contraction (FMA) is enabled (``-ffp-contract=fast``).  This is the 
default behavior.//

So this explicitly says that FMA is enabled by default (consistent with GCC in 
this regard).  However, that doesn't match our implementation, as we have FMA 
disabled by default, as noted in this discussion.  Concretely, the following 
two commands are not equivalent:

  clang -c -O2 test.c # FMA is disabled
  clang -c -O2 -ffp-model=precise test.c  # FMA is enabled

If we decide to enable FMA by default (that is, if we agree on "yes" for (1)), 
then the current `-ffp-model=precise` description will become correct.  But if 
we decide to continue to disable FMA by default, then we'll need to change the 
above description (possibly add another value to `-ffp-model=` that will 
//really // be the default).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-27 Thread Warren Ristow via Phabricator via cfe-commits
wristow marked 4 inline comments as done.
wristow added inline comments.



Comment at: clang/test/Driver/fast-math.c:196
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
+//

wristow wrote:
> andrew.w.kaylor wrote:
> > What about "-ffp-contract=off -ffast-math"? The way the code is written 
> > that will override the -ffp-contract option. That's probably what we want, 
> > though it might be nice to have a warning.
> Yes, currently `-fffast-math` will override that earlier `-ffp-contract=off` 
> settings.  It's unclear to me whether we're ultimately intending for that to 
> be the behavior (because GCC doesn't do that, as @uweigand noted).  I guess 
> this is another reason to hold off for a bit, until we figure out the wider 
> spec.
Added more tests that illustrate this behavior of "-ffp-contract=off/on 
-ffast-math" overriding and enabling FMA.  (I haven't added a warning.  Holding 
off until we decide on other questions discussed in this review.)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-27 Thread Warren Ristow via Phabricator via cfe-commits
wristow updated this revision to Diff 240689.
wristow added a comment.

Used the clearer '!off && !on' (rather than '!(off || on)') in the checks.

Added more tests that note the current situation that `-ffast-math` enables 
FMA. overriding an earlier switch that had disabled it (included a "TODO" 
comment in these tests, since it isn't clear if that behavior is what we will 
ultimately stick with).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/fast-math.c


Index: clang/test/Driver/fast-math.c
===
--- clang/test/Driver/fast-math.c
+++ clang/test/Driver/fast-math.c
@@ -180,6 +180,40 @@
 // CHECK-FAST-MATH: "-ffast-math"
 // CHECK-FAST-MATH: "-ffinite-math-only"
 //
+// -ffp-contract=off and -ffp-contract=on must disable the fast-math umbrella,
+// and the unsafe-fp-math umbrella (-ffp-contract=fast leaves them enabled).
+// Note that there is some uncertainty as to whether we want the fast-math
+// umbrella to enable FMA, but it is clear that irrespective of whether FMA
+// is enabled by fast-math, it should be disabled with a later switch.
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
+//
+// The current behavior is that the umbrella aspect of -ffast-math enables FMA.
+// As noted above, there is some uncertainty on whether this is the desired
+// behavior.  For now, we verify that the umbrella aspect does kick in to
+// enable -ffp-contract=fast mode, overriding an earlier setting that had
+// disabled FMA.
+// TODO: Decide on whether -ffast-math should enable FMA.
+// RUN: %clang -### -ffp-contract=off -ffast-math -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FP-CONTRACT-FAST %s
+// RUN: %clang -### -ffp-contract=off -ffast-math -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FP-CONTRACT-FAST %s
+// RUN: %clang -### -ffp-contract=on -ffast-math -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FP-CONTRACT-FAST %s
+// RUN: %clang -### -ffp-contract=on -ffast-math -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FP-CONTRACT-FAST %s
+// CHECK-FP-CONTRACT-FAST: "-ffp-contract=fast"
+//
 // RUN: %clang -### -ffast-math -fno-fast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
 // RUN: %clang -### -ffast-math -fno-finite-math-only -c %s 2>&1 \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2785,8 +2785,11 @@
   if (MathErrno)
 CmdArgs.push_back("-fmath-errno");
 
+  // If -ffp-contract=off/on has been specified on the command line, then we
+  // must suppress the emission of -ffast-math and -menable-unsafe-fp-math to
+  // cc1.
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
-  !TrappingMath)
+  !TrappingMath && !FPContract.equals("off") && !FPContract.equals("on"))
 CmdArgs.push_back("-menable-unsafe-fp-math");
 
   if (!SignedZeros)
@@ -2831,12 +2834,17 @@
 
   ParseMRecip(D, Args, CmdArgs);
 
-  // -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for 
the
-  // individual features enabled by -ffast-math instead of the option itself as
-  // that's consistent with gcc's behaviour.
+  // -ffast-math acts as an "umbrella", enabling a variety of individual
+  // floating-point transformations.  We check if the appropriate set of those
+  // transformations are enabled, and if they are, we pass that umbrella switch
+  // to cc1.  This also interacts with another "umbrella" switch, -ffp-model.
+  // We handle -ffp-contract somewhat specially here, to produce a warning in
+  // situations where -ffp-model=fast is overridden by the setting of
+  // -ffp-contract.
   if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath &&
   ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
-CmdArgs.push_back("-ffast-math");
+if (!FPContract.equals("off") && !FPContract.equals("on"))
+  CmdArgs.push_back("-ffast-math");
 if (FPModel.equals("fast")) {
   if (FPContract.equals("fast"))
 // All set, do nothing.


Index: 

[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-26 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

About:

>> This is a bit of an oddity in our handling.
> 
> Yes it is!
> 
> This is certainly getting more complicated than I had originally expected. I 
> feel there isn't a clear spec on what we want in terms of whether FMA should 
> be enabled "automatically" at (for example) '-O2', and/or whether it should 
> be enabled by -ffast-math. I'm very willing to make whatever change is needed 
> here, to meet whatever spec we all ultimately agree on.
> 
> So I think this patch should be put on hold until we decide on these wider 
> aspects.

Thinking about this a bit more, one thing that I believe there //is// a clear 
spec on, is that when `-ffp-contract=off` is "the last switch related to FMA", 
then FMA ought to be disabled.  So for example, `-ffast-math -ffp-contract=off` 
should result in FMA being disabled, //irrrspective// of how we decide on the 
"wider aspects" of the spec mentioned above.  (FTR, this inability to enable 
FastMath in general, but explicitly disable FMA, was a problem one of our 
customers reported, which is what initially motivated this patch.)

Note that the points about the "oddity" of "-ffp-contract=fast" being implied 
by the two long sets of switches in this comment 
 are unrelated to the change proposed 
here.  That is, that oddity is the current behavior of Clang.

So one approach we could take is to first fix the problem of when 
`-ffp-contract=off` is the "the last switch related to FMA", it isn't disabling 
FMA in some cases.  And then as a followup, address the oddities that have been 
noted here.  If we decide to take this two-step approach, I'm happy to take on 
the followup work (once we decide what the spec should be).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-24 Thread Warren Ristow via Phabricator via cfe-commits
wristow marked 4 inline comments as done.
wristow added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:2792
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
-  !TrappingMath)
+  !TrappingMath && !(FPContract.equals("off") || FPContract.equals("on")))
 CmdArgs.push_back("-menable-unsafe-fp-math");

andrew.w.kaylor wrote:
> I think this would read better as "... && !FPContract.equals("off") && 
> !FPContract.equals("on")" The '||' in the middle of all these '&&'s combined 
> with the extra parens from the function call trips me up.
Sounds good.  Will do.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:2846
   ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
-CmdArgs.push_back("-ffast-math");
+if (!(FPContract.equals("off") || FPContract.equals("on")))
+  CmdArgs.push_back("-ffast-math");

andrew.w.kaylor wrote:
> As above, I'd prefer "!off && !on".
As above, will do.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:2854
 // Enable -ffp-contract=fast
 CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
   else

andrew.w.kaylor wrote:
> This is a bit of an oddity in our handling. The FPContract local variable 
> starts out initialized to an empty string. So, what we're saying here is that 
> if you've used all the individual controls to set the rest of the fast math 
> flags, we'll turn on fp-contract=fast also. That seems wrong. If you use 
> "-ffast-math", FPContract will have been set to "fast" so that's not 
> applicable here.
> 
> I believe this means
> 
> ```
> -fno-honor-infinities -fno-honor-nans -fno-math-errno -fassociative-math 
> -freciprocal-math -fno-signed-zeros -fno-trapping-math -fno-rounding-math
> 
> ```
> 
> will imply "-ffp-contract=fast". Even worse:
> 
> ```
> -ffp-contract=off -fno-fast-math -fno-honor-infinities -fno-honor-nans 
> -fno-math-errno -fassociative-math -freciprocal-math -fno-signed-zeros 
> -fno-trapping-math -fno-rounding-math
> 
> ```
> will imply "-ffp-contract=fast" because "-fno-fast-math" resets FPContract to 
> empty.
> 
> I think we should initialize FPContract to whatever we want to be the default 
> (on?) and remove the special case for empty here. Also, -fno-fast-math should 
> either not change FPContract at all or set it to "off". Probably the latter 
> since we're saying -ffast-math includes -ffp-contract=on.
> This is a bit of an oddity in our handling.

Yes it is!

This is certainly getting more complicated than I had originally expected.  I 
feel there isn't a clear spec on what we want in terms of whether FMA should be 
enabled "automatically" at (for example) '-O2', and/or whether it should be 
enabled by `-ffast-math`.  I'm very willing to make whatever change is needed 
here, to meet whatever spec we all ultimately agree on.

So I think this patch should be put on hold until we decide on these wider 
aspects.

One minor note about:
> ... Probably the latter since we're saying -ffast-math includes 
> -ffp-contract=on.

I think that is intended to say "-ffast-math includes -ffp-contract=fast".  The 
semantics of `-ffp-contract=on` are another part that seems unclear (or at 
least, more complicated, since it then gets into the FP_CONTRACT pragma).



Comment at: clang/test/Driver/fast-math.c:196
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
+//

andrew.w.kaylor wrote:
> What about "-ffp-contract=off -ffast-math"? The way the code is written that 
> will override the -ffp-contract option. That's probably what we want, though 
> it might be nice to have a warning.
Yes, currently `-fffast-math` will override that earlier `-ffp-contract=off` 
settings.  It's unclear to me whether we're ultimately intending for that to be 
the behavior (because GCC doesn't do that, as @uweigand noted).  I guess this 
is another reason to hold off for a bit, until we figure out the wider spec.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-24 Thread Warren Ristow via Phabricator via cfe-commits
wristow updated this revision to Diff 240305.
wristow added a comment.

Update a comment to remove the discussion about enabling the `__FAST_MATH__` 
preprocessor macro.  The handling of the setting of `__FAST_MATH__` is done in 
"clang/lib/Frontend/InitPreprocessor.cpp", and once we decide on the set of 
conditions that enable that definition, we can add an appropriate comment there.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/fast-math.c


Index: clang/test/Driver/fast-math.c
===
--- clang/test/Driver/fast-math.c
+++ clang/test/Driver/fast-math.c
@@ -180,6 +180,21 @@
 // CHECK-FAST-MATH: "-ffast-math"
 // CHECK-FAST-MATH: "-ffinite-math-only"
 //
+// -ffp-contract=off and -ffp-contract=on must disable the fast-math umbrella,
+// and the unsafe-fp-math umbrella (-ffp-contract=fast leaves them enabled).
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
+//
 // RUN: %clang -### -ffast-math -fno-fast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
 // RUN: %clang -### -ffast-math -fno-finite-math-only -c %s 2>&1 \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2785,8 +2785,11 @@
   if (MathErrno)
 CmdArgs.push_back("-fmath-errno");
 
+  // If -ffp-contract=off/on has been specified on the command line, then we
+  // must suppress the emission of -ffast-math and -menable-unsafe-fp-math to
+  // cc1.
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
-  !TrappingMath)
+  !TrappingMath && !(FPContract.equals("off") || FPContract.equals("on")))
 CmdArgs.push_back("-menable-unsafe-fp-math");
 
   if (!SignedZeros)
@@ -2831,12 +2834,17 @@
 
   ParseMRecip(D, Args, CmdArgs);
 
-  // -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for 
the
-  // individual features enabled by -ffast-math instead of the option itself as
-  // that's consistent with gcc's behaviour.
+  // -ffast-math acts as an "umbrella", enabling a variety of individual
+  // floating-point transformations.  We check if the appropriate set of those
+  // transformations are enabled, and if they are, we pass that umbrella switch
+  // to cc1.  This also interacts with another "umbrella" switch, -ffp-model.
+  // We handle -ffp-contract somewhat specially here, to produce a warning in
+  // situations where -ffp-model=fast is overridden by the setting of
+  // -ffp-contract.
   if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath &&
   ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
-CmdArgs.push_back("-ffast-math");
+if (!(FPContract.equals("off") || FPContract.equals("on")))
+  CmdArgs.push_back("-ffast-math");
 if (FPModel.equals("fast")) {
   if (FPContract.equals("fast"))
 // All set, do nothing.


Index: clang/test/Driver/fast-math.c
===
--- clang/test/Driver/fast-math.c
+++ clang/test/Driver/fast-math.c
@@ -180,6 +180,21 @@
 // CHECK-FAST-MATH: "-ffast-math"
 // CHECK-FAST-MATH: "-ffinite-math-only"
 //
+// -ffp-contract=off and -ffp-contract=on must disable the fast-math umbrella,
+// and the unsafe-fp-math umbrella (-ffp-contract=fast leaves them enabled).
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
+//
 // RUN: %clang -### 

[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-24 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

In D72675#1839662 , @andrew.w.kaylor 
wrote:

> In D72675#1839492 , @wristow wrote:
>
> > 1. Should we enable FMA "by default" at (for example) '-O2'?
>
>
> We recently introduced a new option "-ffp-model=[precise|fast|strict], which 
> is supposed to serve as an umbrella option for the most common combination of 
> options. I think our default behavior should be equivalent to 
> -ffp-model=precise, which enables contraction. It currently seems to enable 
> -ffp-contract=fast in the precise model (https://godbolt.org/z/c6w8mJ). I'm 
> not sure that's what it should be doing, but whatever the precise model does 
> should be our default.


That makes sense to me.  So `-ffp-model`/`-ffp-contract` interaction should be 
examined, and possibly adjusted.  If an adjustment is needed, I think it makes 
sense to handle that interaction separately.

I'm going to update this patch to adjust the comment that @spatel and I 
discussed earlier , so it no longer 
implies that `__FAST_MATH__` is defined only when "all of" `-ffast-math` is 
enabled.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-24 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

> A separate question is the interaction of `-ffast-math` with 
> `-ffp-contract=`.  Currently, there is no such interaction whatsoever in GCC: 
> `-ffast-math` does not imply any particular `-ffp-contract=` setting, and 
> vice versa the `-ffp-contract=` setting is not considered at all when 
> defining `__FAST_MATH__`. This seems at least internally consistent.

That's interesting, and as you said, internally consistent behavior in GCC.  I 
think it would be a fine idea for us to do the same thing.

Looking into this point, I see that (ignoring fastmath for the moment) our 
default `-ffp-contract` behavior is different than GCC's.  GCC enables FMA by 
default when optimization is high enough ('-O2') without any special switch 
needed.  For example, taking an architecture that supports FMA (Haswell), GCC 
has the following behavior:

  float test(float a, float b, float c)
  {
// FMA is enabled by default for GCC (on Haswell), so done at -O2:
//gcc -S -O2 -march=haswell test.c  # FMA happens
//$ gcc -S -march=haswell test.c ; egrep 'mul|add' test.s
//vmulss  -12(%rbp), %xmm0, %xmm0
//vaddss  -4(%rbp), %xmm0, %xmm0
//$ gcc -S -O2 -march=haswell test.c ; egrep 'mul|add' test.s
//vfmadd231ss %xmm2, %xmm1, %xmm0
//$
return a + (b * c);
  }

As we'd expect, GCC does disable FMA with `-ffp-contract=off` (this is 
irrespective of whether `-ffast-math` was specified).  Loosely, GCC's behavior 
can be summarized very simply on this point as:
//Suppress FMA when `-ffp-contract=off`.//
(As an aside, GCC's behavior with `-ffp-contract=on` is non-intuitive to me.  
It relates to the FP_CONTRACT pragma, which as far as I can see is ignored by 
GCC.)

In contrast, we do //not// enable FMA by default (via general optimization, 
such as '-O2').  For example:

  $ clang -S -O2 -march=haswell test.c ; egrep 'mul|add' test.s
  vmulss  %xmm2, %xmm1, %xmm1
  vaddss  %xmm0, %xmm1, %xmm0
  .addrsig
  $

I think that whether we want to continue doing that (or instead, enable it at 
'-O2', like GCC does), is a separate issue.  I can see arguments either way.

We do enable FMA with `-ffp-contract=fast`, as desired (and also with 
`-ffp-contract=on`).  And we do "leave it disabled" with `-ffp-contract=off` 
(as expected).

Now, bringing fastmath back into the discussion, we //do// enable FMA with 
`-fffast-math`.  If we decide to continue leaving it disabled by default, then 
enabling it with `-ffast-math` seems perfectly sensible.  (If we decide to 
enable it by default when optimization is high enough, like GCC, then turning 
on `-ffast-math` should not disable it of course.)

The problem I want to address here is that if the compiler is a mode where FMA 
is enabled (whether that's at '-O2' "by default", or whether it's because the 
user turned on `-ffast-math`), then appending `-ffp-contract=off` //should// 
disable FMA.  I think this patch (along with the small change to 
"DAGCombiner.cpp", in an earlier version of this patch) is a reasonable 
approach to solving that.  I'd say this patch/review/discussion has raised two 
additional questions:

1. Under what conditions should `__FAST_MATH__` be defined?
2. Should we enable FMA "by default" at (for example) '-O2'?

I think these additional questions are best addressed separately.  My 2 cents 
are that for (1), mimicking GCC's behavior seems reasonable (although that's 
assuming we don't find out that GCC's `__FAST_MATH__` behavior is a bug).  And 
for (2), I don't have a strong preference.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-17 Thread Warren Ristow via Phabricator via cfe-commits
wristow updated this revision to Diff 238933.
wristow added a comment.

Updated patch to correct a comment and fix a typo.

Regarding the point from @spatel :

> This follows the reasoning from the earlier discussion, but after re-reading 
> the gcc comment in particular, I'm wondering whether this is what we really 
> want to do...
>  If `__FAST_MATH__` is purely here for compatibility with gcc, then should we 
> mimic gcc behavior in setting that macro even if we think it's buggy?

Thinking about it, yes, I think we should mimic GCC.  FTR, I've tried some 
older GCCs via Compiler Explorer, and found them to be consistent in that 
removing //some// aspects of `-ffast-math` disables the generation of 
`__FAST_MATH__` and removing other aspects of it does not.  So my previous 
recollection ("I could have sworn that in the past that for GCC I had seen this 
sort of disabling of part of the "fast-math set" result in the disabling of the 
def of `__FAST_MATH__`.") is wrong.  Bottom line, maybe it's not a bug in GCC, 
and instead it's the desired GCC behavior (just that I cannot find specific 
documentation of precisely which settings disable the generation of it).

However, I'd call that a bigger question than this `-ffp-contract` aspect of 
`__FAST_MATH__`, and a separate bug, that can be fixed separately.

> Ie, when we translate these settings to LLVM's FMF, we can still override the 
> -ffast-math flag by checking the -ffp-contract flag (if I'm seeing it 
> correctly, the existing code will pass that alongside -ffast-math when 
> contract is set to on/off).

How to handle this seems like an implementation question.  The code here is 
just deciding whether or not we intend to pass "-ffast-math" to cc1 (it isn't 
directly defining `__FAST_MATH__`).  If we do pass it to cc1, then 
"clang/lib/Frontend/InitPreprocessor.cpp" will pre-define `__FAST_MATH__`:

  if (LangOpts.FastMath)
Builder.defineMacro("__FAST_MATH__");

It seems to me that a straightforward way to deal with this question is to make 
the above test more elaborate (that is, if `LangOpts.FastMath` is set, OR 
whatever the appropriate subset of fast-math switches are set).  If we do that, 
we don't need to deal with the "umbrella" aspect if "-fast-math", which gets 
messy.

Which is to say the approach here can stay the same as it currently is 
(`-ffp-contract=off/on` suppressing the passing of "-ffast-math" to cc1).  
Although the comment about `__FAST_MATH__` here in "Clang.cpp" should be 
changed, if we take this approach.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/fast-math.c


Index: clang/test/Driver/fast-math.c
===
--- clang/test/Driver/fast-math.c
+++ clang/test/Driver/fast-math.c
@@ -180,6 +180,21 @@
 // CHECK-FAST-MATH: "-ffast-math"
 // CHECK-FAST-MATH: "-ffinite-math-only"
 //
+// -ffp-contract=off and -ffp-contract=on must disable the fast-math umbrella,
+// and the unsafe-fp-math umbrella (-ffp-contract=fast leaves them enabled).
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
+//
 // RUN: %clang -### -ffast-math -fno-fast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
 // RUN: %clang -### -ffast-math -fno-finite-math-only -c %s 2>&1 \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2760,8 +2760,11 @@
   if (MathErrno)
 CmdArgs.push_back("-fmath-errno");
 
+  // If -ffp-contract=off/on has been specified on the command line, then we
+  // must suppress the emission of -ffast-math and -menable-unsafe-fp-math to
+  // cc1.
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
-  !TrappingMath)
+  !TrappingMath && !(FPContract.equals("off") || FPContract.equals("on")))
 CmdArgs.push_back("-menable-unsafe-fp-math");
 
   if (!SignedZeros)
@@ -2804,7 +2807,8 @@
   // that's consistent with gcc's behaviour.
   if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath &&
   ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
-  

[PATCH] D72675: [Clang][Driver] Fix -ffast-math/-ffp-contract interaction

2020-01-16 Thread Warren Ristow via Phabricator via cfe-commits
wristow updated this revision to Diff 238595.
wristow retitled this revision from "Fix -ffast-math/-ffp-contract interaction" 
to "[Clang][Driver] Fix -ffast-math/-ffp-contract interaction".
wristow added a comment.

Changing this to address only the Clang driver aspect, as discussed in the 
comments.  Will spin off a separate patch for the LLVM side.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/fast-math.c


Index: clang/test/Driver/fast-math.c
===
--- clang/test/Driver/fast-math.c
+++ clang/test/Driver/fast-math.c
@@ -180,6 +180,21 @@
 // CHECK-FAST-MATH: "-ffast-math"
 // CHECK-FAST-MATH: "-ffinite-math-only"
 //
+// -ffp-contract=off and -ffp-contract=on must disable the fast-math umbrella,
+// and the unsafe-fp-math umbrella (-ffp-conteact=fast leaves them enabled).
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
+//
 // RUN: %clang -### -ffast-math -fno-fast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
 // RUN: %clang -### -ffast-math -fno-finite-math-only -c %s 2>&1 \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2760,8 +2760,10 @@
   if (MathErrno)
 CmdArgs.push_back("-fmath-errno");
 
+  // If -ffp-contract=off has been specified on the command line, then we must
+  // suppress the emission of -ffast-math and -menable-unsafe-fp-math to cc1.
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
-  !TrappingMath)
+  !TrappingMath && !(FPContract.equals("off") || FPContract.equals("on")))
 CmdArgs.push_back("-menable-unsafe-fp-math");
 
   if (!SignedZeros)
@@ -2804,7 +2806,8 @@
   // that's consistent with gcc's behaviour.
   if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath &&
   ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
-CmdArgs.push_back("-ffast-math");
+if (!(FPContract.equals("off") || FPContract.equals("on")))
+  CmdArgs.push_back("-ffast-math");
 if (FPModel.equals("fast")) {
   if (FPContract.equals("fast"))
 // All set, do nothing.


Index: clang/test/Driver/fast-math.c
===
--- clang/test/Driver/fast-math.c
+++ clang/test/Driver/fast-math.c
@@ -180,6 +180,21 @@
 // CHECK-FAST-MATH: "-ffast-math"
 // CHECK-FAST-MATH: "-ffinite-math-only"
 //
+// -ffp-contract=off and -ffp-contract=on must disable the fast-math umbrella,
+// and the unsafe-fp-math umbrella (-ffp-conteact=fast leaves them enabled).
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=on -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=fast -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-UNSAFE-MATH %s
+//
 // RUN: %clang -### -ffast-math -fno-fast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
 // RUN: %clang -### -ffast-math -fno-finite-math-only -c %s 2>&1 \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2760,8 +2760,10 @@
   if (MathErrno)
 CmdArgs.push_back("-fmath-errno");
 
+  // If -ffp-contract=off has been specified on the command line, then we must
+  // suppress the emission of -ffast-math and -menable-unsafe-fp-math to cc1.
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
-  !TrappingMath)
+  !TrappingMath && 

[PATCH] D72675: Fix -ffast-math/-ffp-contract interaction

2020-01-16 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

> One commit for the clang changes should be ok; it's a very small diff. But 
> I'm still not sure if the driver change induces frontend diffs that we should 
> make visible via tests.

The only thing I can think of is that it changes whether/when `__FAST_MATH__` 
is defined.  But that'll be indirectly tested, via the updated tests in 
"Driver/fast-math.c", which will verify that "-ffast-math" is passed 
appropriately (and the `__FAST_MATH__` dependency on "-ffast-math" is already 
tested in "Preprocessor/predefined-macros.c").

In addition to adding the second pair of driver tests you suggested for 
`-ffp-contract=on`, I'll also add another pair for `-ffp-contract=fast` to 
verify that "-ffast-math" is passed in that case (no change in behavior for 
that pair, but just confirming it continues to work correctly).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: Fix -ffast-math/-ffp-contract interaction

2020-01-16 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

This all sounds good to me.

So to make sure we're all on the same page, my understanding is that the plan 
forward is:

1. Make the Clang change first (including adding another pair of tests for 
`-ffp-contract=on`).
2. Update the LLVM tests illustrating the current baseline state.
3. Make the LLVM change shown here, and update the tests with the fixes.
4. Bring in the DAG combiner work that Michael has done internally at Apple.

Points 1, 2, and 3 are essentially the points in the patch posted here, so I'll 
do that.  And of course Michael will then take on point 4.

Is that the plan?  If yes, I'll transition this item to just be the Clang 
pieces, and I'll spin off a new one to do the LLVM portion of what is posted 
here.

Sanjay, regarding:

> But it would be better to have all of the baseline tests in place, so we know 
> where we stand currently.

I'm interpreting that as applying to the LLVM tests.  That is, the Clang 
change, along with the updated tests, can be in one commit, rather than first 
updating the Clang driver tests with the current state.  If you'd prefer two 
commits on the Clang side as well, let me know.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: Fix -ffast-math/-ffp-contract interaction

2020-01-15 Thread Warren Ristow via Phabricator via cfe-commits
wristow marked an inline comment as done.
wristow added inline comments.



Comment at: llvm/test/CodeGen/PowerPC/fmf-propagation.ll:201-203
 ; fma(X, 7.0, X * 42.0) --> X * 49.0
-; This is the minimum FMF needed for this transform - the FMA allows 
reassociation.
+; This is the minimum FMF needed for this transform - the 'contract' allows 
the needed reassociation.
+

spatel wrote:
> I was ok with the reasoning up to here, but this example lost me.
> Why does 'contract' alone allow splitting an fma?
> Is 'contract' relevant on anything besides fadd (with an fmul operand)?
I have to admit I'm handwaving here.  I don't really understand why 'contract' 
alone allows this simpliciation:
`fma(X, 7.0, X * 42.0) --> X * 49.0`

to happen.  But either that's correct, or it's a separate bug (and I waved my 
hands about explaining more detail).

The short story is that even without my proposed change, having //only// 
'contract' on the FMA intrinsic results in this being simplified to `X * 49` 
(and also having //only// 'reassoc' did).  The original comment:

`; This is the minimum FMF needed for this transform - the FMA allows 
reassociation.`

is incomplete/misleading.  A more complete comment (relevant before my change) 
would have been:

`; This is the minimum FMF needed for this transform - either 'reassoc' or 
'contract' applied to the FMA intrinsic allows reassociation.`

And with my change, the result is that 'reassoc' is no longer relevant.  But 
since I don't really understand why it should be simplified with only 
`contract`, I should put a TODO comment in.  That is, to me it seems like both 
'contract' //and// 'reassoc' should be needed for this simplification to happen 
(whereas previously it worked with //either// of them individually).  So maybe 
change this comment to:

; This is the minimum FMF needed for this transform - applying 'contract' 
to the FMA intrinsic allows reassociation.
; TODO: It seems 'contract' and 'reassoc' combined should be needed for 
this transform.  Why does it work with only 'contract'?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: Fix -ffast-math/-ffp-contract interaction

2020-01-14 Thread Warren Ristow via Phabricator via cfe-commits
wristow updated this revision to Diff 238143.
wristow retitled this revision from "ix -ffast-math/-ffp-contract interaction" 
to "Fix -ffast-math/-ffp-contract interaction".
wristow added a comment.

Addressed comments from @hfinkel .


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/fast-math.c
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/test/CodeGen/PowerPC/fmf-propagation.ll
  llvm/test/CodeGen/X86/fp-contract.ll

Index: llvm/test/CodeGen/X86/fp-contract.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/fp-contract.ll
@@ -0,0 +1,204 @@
+; Tests for -ffp-contract/-ffast-math interaction.
+; Specifically, -ffp-contract=off must suppress the use of FMA.
+
+; RUN: llc < %s -mcpu=haswell | FileCheck %s --check-prefix=FMA
+
+; Scalar versions:
+
+define float @MulAddPlain(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddPlain:
+; FMA:   vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  ret float %add
+}
+
+define float @MulAddFast(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddFast:
+; FMA:   vfmadd213ss
+; FMA-NEXT:  ret
+  %mul = fmul fast float %a, %b
+  %add = fadd fast float %mul, %c
+  ret float %add
+}
+
+define float @MulAddContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddContract:
+; FMA:   vfmadd213ss
+; FMA-NEXT:  ret
+  %mul = fmul contract float %a, %b
+  %add = fadd contract float %mul, %c
+  ret float %add
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define float @MulAddFastNoContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddFastNoContract:
+; FMA:   vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b
+  %add = fadd nnan ninf nsz arcp afn reassoc float %mul, %c
+  ret float %add
+}
+
+define float @MulAddReassoc(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddReassoc:
+; FMA:   vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul reassoc float %a, %b
+  %add = fadd reassoc float %mul, %c
+  ret float %add
+}
+
+define float @MulSubPlain(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubPlain:
+; FMA:   vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubFast(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubFast:
+; FMA:   vfmsub213ss
+; FMA-NEXT:  ret
+  %mul = fmul fast float %a, %b
+  %sub = fsub fast float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubContract:
+; FMA:   vfmsub213ss
+; FMA-NEXT:  ret
+  %mul = fmul contract float %a, %b
+  %sub = fsub contract float %mul, %c
+  ret float %sub
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define float @MulSubFastNoContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubFastNoContract:
+; FMA:   vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b
+  %sub = fsub nnan ninf nsz arcp afn reassoc float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubReassoc(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubReassoc:
+; FMA:   vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul reassoc float %a, %b
+  %sub = fsub reassoc float %mul, %c
+  ret float %sub
+}
+
+; Vector versions:
+
+define <4 x float> @VecMulAddPlain(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddPlain:
+; FMA:   vmulps
+; FMA-NEXT:  vaddps
+; FMA-NEXT:  ret
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulAddFast(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddFast:
+; FMA:   vfmadd213ps
+; FMA-NEXT:  ret
+  %mul = fmul fast <4 x float> %a, %b
+  %add = fadd fast <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulAddContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddContract:
+; FMA:   vfmadd213ps
+; FMA-NEXT:  ret
+  %mul = fmul contract <4 x float> %a, %b
+  %add = fadd contract <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define <4 x float> @VecMulAddFastNoContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddFastNoContract:
+; FMA:   vmulps
+; FMA-NEXT:  vaddps
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc <4 x float> %a, %b
+  %add = fadd nnan ninf nsz arcp afn reassoc <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulAddReassoc(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddReassoc:

[PATCH] D72675: ix -ffast-math/-ffp-contract interaction

2020-01-14 Thread Warren Ristow via Phabricator via cfe-commits
wristow marked 3 inline comments as done.
wristow added a comment.

Thanks for the quick feedback @hfinkel




Comment at: clang/lib/Driver/ToolChains/Clang.cpp:2721
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
-  !TrappingMath)
+  !TrappingMath && !FPContractDisabled)
 CmdArgs.push_back("-menable-unsafe-fp-math");

hfinkel wrote:
> I think that you just need
> 
>   && !FPContract.equals("off")
> 
> or
> 
>   && !(FPContract.equals("off") || FPContract.equals("on"))
> 
> of which I think the latter. fp-contract=no is also not a 
> fast-math-compatible setting in that regard, right?
Eliminating the `FPContractDisabled` variable, and instead checking that the 
value isn't "off" (or maybe isn't ("off" or "on")), sounds good.  I'm not 100% 
sure of whether just "off" or both "off" and "on" ought to be checked.  More on 
that in the discussion about `__FAST_MATH__` in one of your other comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:2745
   if (!FPContract.empty())
 CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract));
 

hfinkel wrote:
> So now we pass it twice, because we also pass it here?
Whoops.  This one was here originally, so I'll leave it here, and delete the 
one I needlessly added, above.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:2763
   if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath &&
   ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
+if (!FPContractDisabled)

hfinkel wrote:
> You want the check here, I think, and not below so that if parts of fast-math 
> are disabled `__FAST_MATH__` is not set. That seems to be what the 
> comment/code currently do, although what does GCC do in this regard?
I had originally included the check with the set of conditions of line 2763, 
but then the `warn_drv_overriding_flag_option` warning (below) was missed in 
the case of `-ffp-model=fast -ffp-contract=off`.  So I think the check does 
need to remain inside the above conditional.

Regarding the `__FAST_MATH__` aspect and whether `ffp-contract=on` also ought 
to suppress the `-ffast-math` emission (and what GCC does), this is somewhat 
fuzzy for me.  After getting your comments, I'm leaning toward including both 
the "off" and "on" in the check (rather than only disabling it in the case of 
"off", as the current patch does).  But I want to at least note my observations 
here, to make the current situation with both Clang and GCC clear, in case 
others want to chime in.

Somewhat surprisingly to me, GCC's behavior wrt `__FAST_MATH__` seems buggy in 
general, so it's not a good base to compare against.  For example, the 
following set of switches result in the `__FAST_MATH__` macro being defined 
(although I expected they would have disabled it):
-ffast-math -fno-associative-math -fno-reciprocal-math -frounding-math

(I've just tested GCC 7.4.0.)

I could have sworn that in the past that for GCC I had seen this sort of 
disabling of part of the "fast-math set" result in the disabling of the def of 
`__FAST_MATH__`.  As you'd expect with Clang (from the comment in our code 
code), the above switches //do //suppress the definition of `__FAST_MATH__`.

That said, GCC does suppress the `__FAST_MATH__` def in //some //situations.  
E.g., both GCC and Clang suppress it with the following switches:
-ffast-math -fmath-errno
-ffast-math -ftrapping-math
-ffast-math -fsigned-zeros

In any case, for the `-ffp-contract` switch, none of the settings suppress the 
def of `__FAST_MATH__` with GCC.  That is, all of the following result in 
`__FAST_MATH__` being defined:
-ffast-math -ffp-contract=fast
-ffast-math -ffp-contract=on
-ffast-math -ffp-contract=off

I think that's buggy (but hard to know if it's `-ffp-contract` specific, or 
part of the general buggyness of GCC and `__FAST_MATH__`).  Clang (prior to the 
change described here) also defines `__FAST_MATH__` in all of these 
`-ffp-contract` situations.  But I think that's just part of the bug I'm trying 
to address here.

Bottom line: The intended semantics are somewhat fuzzy to me.  I'd say:
-ffast-math -ffp-contract=fast  // should leave __FAST_MATH__ defined
-ffast-math -ffp-contract=off   // should not define __FAST_MATH__
-ffast-math -ffp-contract=on// unsure 

For that last one ("unsure ???"), in the current patch I was leaving 
`__FAST_MATH__` defined.  But I'm now leaning toward also suppressing the 
`__FAST_MATH__` def in that case.  Unless anyone disagrees, I'll include that 
change when I update the patch.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72675: ix -ffast-math/-ffp-contract interaction

2020-01-13 Thread Warren Ristow via Phabricator via cfe-commits
wristow created this revision.
wristow added reviewers: spatel, mcberg2017.
Herald added subscribers: jsji, hiraditya, nemanjai.
Herald added a project: LLVM.

Fused Multiply Add (FMA) was not always being disabled when the switch 
`-ffp-contract=off` was used.  More specifically, FMA is enabled when 
`-ffp-contract=fast` is used, and it also is enabled implicitly with 
`-ffast-math`.  The combination:

  -ffast-math -ffp-contract=off

is intended to leave most of fast-math enabled (for example, leave 
reassociation, reciprocal transformations, etc.) enabled, but disable the use 
of FMA.  However, FMA was incorrectly left enabled with the above switch 
combination.  This commit fixes this, allowing users to enable most of the 
fast-math optimizations, while disabling the FMA feature.


https://reviews.llvm.org/D72675

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/fast-math.c
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/test/CodeGen/PowerPC/fmf-propagation.ll
  llvm/test/CodeGen/X86/fp-contract.ll

Index: llvm/test/CodeGen/X86/fp-contract.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/fp-contract.ll
@@ -0,0 +1,204 @@
+; Tests for -ffp-contract/-ffast-math interaction.
+; Specifically, -ffp-contract=off must suppress the use of FMA.
+
+; RUN: llc < %s -mcpu=haswell | FileCheck %s --check-prefix=FMA
+
+; Scalar versions:
+
+define float @MulAddPlain(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddPlain:
+; FMA:   vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  ret float %add
+}
+
+define float @MulAddFast(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddFast:
+; FMA:   vfmadd213ss
+; FMA-NEXT:  ret
+  %mul = fmul fast float %a, %b
+  %add = fadd fast float %mul, %c
+  ret float %add
+}
+
+define float @MulAddContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddContract:
+; FMA:   vfmadd213ss
+; FMA-NEXT:  ret
+  %mul = fmul contract float %a, %b
+  %add = fadd contract float %mul, %c
+  ret float %add
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define float @MulAddFastNoContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddFastNoContract:
+; FMA:   vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b
+  %add = fadd nnan ninf nsz arcp afn reassoc float %mul, %c
+  ret float %add
+}
+
+define float @MulAddReassoc(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddReassoc:
+; FMA:   vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul reassoc float %a, %b
+  %add = fadd reassoc float %mul, %c
+  ret float %add
+}
+
+define float @MulSubPlain(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubPlain:
+; FMA:   vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubFast(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubFast:
+; FMA:   vfmsub213ss
+; FMA-NEXT:  ret
+  %mul = fmul fast float %a, %b
+  %sub = fsub fast float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubContract:
+; FMA:   vfmsub213ss
+; FMA-NEXT:  ret
+  %mul = fmul contract float %a, %b
+  %sub = fsub contract float %mul, %c
+  ret float %sub
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define float @MulSubFastNoContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubFastNoContract:
+; FMA:   vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b
+  %sub = fsub nnan ninf nsz arcp afn reassoc float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubReassoc(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubReassoc:
+; FMA:   vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul reassoc float %a, %b
+  %sub = fsub reassoc float %mul, %c
+  ret float %sub
+}
+
+; Vector versions:
+
+define <4 x float> @VecMulAddPlain(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddPlain:
+; FMA:   vmulps
+; FMA-NEXT:  vaddps
+; FMA-NEXT:  ret
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulAddFast(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddFast:
+; FMA:   vfmadd213ps
+; FMA-NEXT:  ret
+  %mul = fmul fast <4 x float> %a, %b
+  %add = fadd fast <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulAddContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddContract:
+; FMA:   vfmadd213ps
+; FMA-NEXT:  ret
+  %mul = fmul contract <4 x float> %a, %b
+  %add = fadd contract <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+; Enabling all the fast-math-flags except 'contract' does not 

[PATCH] D71718: [X86] Mark various pointer arguments in builtins as const

2019-12-19 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

Thanks for the quick review.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71718/new/

https://reviews.llvm.org/D71718



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71718: [X86] Mark various pointer arguments in builtins as const

2019-12-19 Thread Warren Ristow via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7fcd9e3f7083: [X86] Mark various pointer arguments in 
builtins as const (authored by wristow).
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71718/new/

https://reviews.llvm.org/D71718

Files:
  clang/include/clang/Basic/BuiltinsX86.def
  clang/lib/Headers/avx512bwintrin.h
  clang/lib/Headers/avx512fintrin.h
  clang/lib/Headers/avx512vlbwintrin.h
  clang/lib/Headers/avx512vlintrin.h
  clang/lib/Headers/avxintrin.h
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/mwaitxintrin.h
  clang/lib/Headers/pmmintrin.h
  clang/lib/Headers/xmmintrin.h
  clang/test/Headers/x86-intrinsics-headers-clean.cpp
  clang/test/Headers/x86intrin-2.c

Index: clang/test/Headers/x86intrin-2.c
===
--- clang/test/Headers/x86intrin-2.c
+++ clang/test/Headers/x86intrin-2.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify
-// RUN: %clang_cc1 -fsyntax-only -ffreestanding -flax-vector-conversions=none %s -verify
-// RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -Wcast-qual %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -flax-vector-conversions=none -Wcast-qual %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -Wcast-qual -x c++ %s -verify
 // expected-no-diagnostics
 
 #if defined(i386) || defined(__x86_64__)
@@ -16,6 +16,10 @@
   return _mm_add_ss(a, b);
 }
 
+void __attribute__((__target__("sse"))) mm_prefetch_wrap(const void *p) {
+  _mm_prefetch(p, 0x3);
+}
+
 __m128d __attribute__((__target__("sse2"))) mm_sqrt_sd_wrap(__m128d a, __m128d b) {
   return _mm_sqrt_sd(a, b);
 }
Index: clang/test/Headers/x86-intrinsics-headers-clean.cpp
===
--- clang/test/Headers/x86-intrinsics-headers-clean.cpp
+++ clang/test/Headers/x86-intrinsics-headers-clean.cpp
@@ -1,7 +1,7 @@
 // Make sure the intrinsic headers compile cleanly with no warnings or errors.
 
 // RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -Wsystem-headers \
-// RUN:   -fsyntax-only -flax-vector-conversions=none -x c++ -verify %s
+// RUN:   -Wcast-qual -fsyntax-only -flax-vector-conversions=none -x c++ -verify %s
 
 // expected-no-diagnostics
 
Index: clang/lib/Headers/xmmintrin.h
===
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -1627,7 +1627,7 @@
   struct __mm_loadh_pi_struct {
 __mm_loadh_pi_v2f32 __u;
   } __attribute__((__packed__, __may_alias__));
-  __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;
+  __mm_loadh_pi_v2f32 __b = ((const struct __mm_loadh_pi_struct*)__p)->__u;
   __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
   return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
 }
@@ -1654,7 +1654,7 @@
   struct __mm_loadl_pi_struct {
 __mm_loadl_pi_v2f32 __u;
   } __attribute__((__packed__, __may_alias__));
-  __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;
+  __mm_loadl_pi_v2f32 __b = ((const struct __mm_loadl_pi_struct*)__p)->__u;
   __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
   return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
 }
@@ -1680,7 +1680,7 @@
   struct __mm_load_ss_struct {
 float __u;
   } __attribute__((__packed__, __may_alias__));
-  float __u = ((struct __mm_load_ss_struct*)__p)->__u;
+  float __u = ((const struct __mm_load_ss_struct*)__p)->__u;
   return __extension__ (__m128){ __u, 0, 0, 0 };
 }
 
@@ -1702,7 +1702,7 @@
   struct __mm_load1_ps_struct {
 float __u;
   } __attribute__((__packed__, __may_alias__));
-  float __u = ((struct __mm_load1_ps_struct*)__p)->__u;
+  float __u = ((const struct __mm_load1_ps_struct*)__p)->__u;
   return __extension__ (__m128){ __u, __u, __u, __u };
 }
 
@@ -1722,7 +1722,7 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load_ps(const float *__p)
 {
-  return *(__m128*)__p;
+  return *(const __m128*)__p;
 }
 
 /// Loads a 128-bit floating-point vector of [4 x float] from an
@@ -1742,7 +1742,7 @@
   struct __loadu_ps {
 __m128_u __v;
   } __attribute__((__packed__, __may_alias__));
-  return ((struct __loadu_ps*)__p)->__v;
+  return ((const struct __loadu_ps*)__p)->__v;
 }
 
 /// Loads four packed float values, in reverse order, from an aligned
@@ -2100,7 +2100,7 @@
 ///be generated. \n
 ///_MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
 ///be generated.
-#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \
+#define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \
  ((sel) >> 2) & 1, (sel) & 0x3))
 #endif
 
Index: clang/lib/Headers/pmmintrin.h

[PATCH] D71718: [X86] Mark various pointer arguments in builtins as const

2019-12-19 Thread Warren Ristow via Phabricator via cfe-commits
wristow created this revision.
wristow added a reviewer: craig.topper.

Enabling `-Wcast-qual` identified many casts in various system headers
that were dropping the `const` qualifier.  Fixing those missing
qualifiers pointed out that a few of the definitions of the builtins
did not properly identify their arguments as 'const' pointers.  This
commit fixes those builtin definitions, and the system header files
so that they no longer drop the qualifier.


https://reviews.llvm.org/D71718

Files:
  clang/include/clang/Basic/BuiltinsX86.def
  clang/lib/Headers/avx512bwintrin.h
  clang/lib/Headers/avx512fintrin.h
  clang/lib/Headers/avx512vlbwintrin.h
  clang/lib/Headers/avx512vlintrin.h
  clang/lib/Headers/avxintrin.h
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/mwaitxintrin.h
  clang/lib/Headers/pmmintrin.h
  clang/lib/Headers/xmmintrin.h
  clang/test/Headers/x86-intrinsics-headers-clean.cpp
  clang/test/Headers/x86intrin-2.c

Index: clang/test/Headers/x86intrin-2.c
===
--- clang/test/Headers/x86intrin-2.c
+++ clang/test/Headers/x86intrin-2.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify
-// RUN: %clang_cc1 -fsyntax-only -ffreestanding -flax-vector-conversions=none %s -verify
-// RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -Wcast-qual %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -flax-vector-conversions=none -Wcast-qual %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -Wcast-qual -x c++ %s -verify
 // expected-no-diagnostics
 
 #if defined(i386) || defined(__x86_64__)
@@ -16,6 +16,10 @@
   return _mm_add_ss(a, b);
 }
 
+void __attribute__((__target__("sse"))) mm_prefetch_wrap(const void *p) {
+  _mm_prefetch(p, 0x3);
+}
+
 __m128d __attribute__((__target__("sse2"))) mm_sqrt_sd_wrap(__m128d a, __m128d b) {
   return _mm_sqrt_sd(a, b);
 }
Index: clang/test/Headers/x86-intrinsics-headers-clean.cpp
===
--- clang/test/Headers/x86-intrinsics-headers-clean.cpp
+++ clang/test/Headers/x86-intrinsics-headers-clean.cpp
@@ -1,7 +1,7 @@
 // Make sure the intrinsic headers compile cleanly with no warnings or errors.
 
 // RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -Wsystem-headers \
-// RUN:   -fsyntax-only -flax-vector-conversions=none -x c++ -verify %s
+// RUN:   -Wcast-qual -fsyntax-only -flax-vector-conversions=none -x c++ -verify %s
 
 // expected-no-diagnostics
 
Index: clang/lib/Headers/xmmintrin.h
===
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -1627,7 +1627,7 @@
   struct __mm_loadh_pi_struct {
 __mm_loadh_pi_v2f32 __u;
   } __attribute__((__packed__, __may_alias__));
-  __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;
+  __mm_loadh_pi_v2f32 __b = ((const struct __mm_loadh_pi_struct*)__p)->__u;
   __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
   return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
 }
@@ -1654,7 +1654,7 @@
   struct __mm_loadl_pi_struct {
 __mm_loadl_pi_v2f32 __u;
   } __attribute__((__packed__, __may_alias__));
-  __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;
+  __mm_loadl_pi_v2f32 __b = ((const struct __mm_loadl_pi_struct*)__p)->__u;
   __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
   return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
 }
@@ -1680,7 +1680,7 @@
   struct __mm_load_ss_struct {
 float __u;
   } __attribute__((__packed__, __may_alias__));
-  float __u = ((struct __mm_load_ss_struct*)__p)->__u;
+  float __u = ((const struct __mm_load_ss_struct*)__p)->__u;
   return __extension__ (__m128){ __u, 0, 0, 0 };
 }
 
@@ -1702,7 +1702,7 @@
   struct __mm_load1_ps_struct {
 float __u;
   } __attribute__((__packed__, __may_alias__));
-  float __u = ((struct __mm_load1_ps_struct*)__p)->__u;
+  float __u = ((const struct __mm_load1_ps_struct*)__p)->__u;
   return __extension__ (__m128){ __u, __u, __u, __u };
 }
 
@@ -1722,7 +1722,7 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load_ps(const float *__p)
 {
-  return *(__m128*)__p;
+  return *(const __m128*)__p;
 }
 
 /// Loads a 128-bit floating-point vector of [4 x float] from an
@@ -1742,7 +1742,7 @@
   struct __loadu_ps {
 __m128_u __v;
   } __attribute__((__packed__, __may_alias__));
-  return ((struct __loadu_ps*)__p)->__v;
+  return ((const struct __loadu_ps*)__p)->__v;
 }
 
 /// Loads four packed float values, in reverse order, from an aligned
@@ -2100,7 +2100,7 @@
 ///be generated. \n
 ///_MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
 ///be generated.
-#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \
+#define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \
   

[PATCH] D69732: [WIP][LTO] Apply SamplePGO pipeline tunes for ThinLTO pre-link to full LTO

2019-12-13 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

>> Given the performance improvements here, I'd like to develop this patch 
>> further.
> 
> In D69732#1784290 , @ormris wrote:
>  //Ping// @tejohnson

@ormris, I think that since @tejohnson originally suggested that someone with 
more interest in full LTO performance pick this up (and she specifically 
suggested you or me), then you can feel free to take this over.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69732/new/

https://reviews.llvm.org/D69732



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69732: [WIP][LTO] Apply SamplePGO pipeline tunes for ThinLTO pre-link to full LTO

2019-11-04 Thread Warren Ristow via Phabricator via cfe-commits
wristow added inline comments.



Comment at: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp:614
   // during ThinLTO and perform the rest of the optimizations afterward.
   if (PrepareForThinLTO) {
 // Ensure we perform any last passes, but do so before renaming anonymous

wenlei wrote:
> this also need to be `PrepareForThinLTO || PrepareForLTO` for oldPM?
I agree this is another instance where a balancing act question applies.  In 
this case, assuming the comment about the concern of code bloat is accurate, 
it's not so much about compile-time resources in the full LTO back-end, but 
rather about minimizing the ThinLTO bitcode write/read time.  So if as this WIP 
evolves, it ultimately is a win for SamplePGO to suppress some loop 
optimizations (unrolling/vectorization) here, then that will probably also be a 
//small // win in full LTO compile time.  That said, in addition to these 
loop-related optimizations, there are other transformations here that are done 
in the full LTO pipeline (but not in the ThinLTO pipeline).  So I suspect if 
some change to check for `PrepareForThinLTO || PrepareForLTO` (rather than only 
`PrepareForThinLTO`) makes sense here from a Sample PGO perspective, then the 
change will be more complicated than simply adding the small set of passes here 
followed by the early return (that is, I think there are probably things after 
the `return` on line 621 that still ought to be enabled for full LTO -- 
essentially continuing to do them in the pre-link stage for full LTO, to try to 
avoid needing to do too much work in the full LTO backend stage, since it's 
more of a problem for the full backend to absorb that compile time cost).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69732/new/

https://reviews.llvm.org/D69732



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69732: [WIP][LTO] Apply SamplePGO pipeline tunes for ThinLTO pre-link to full LTO

2019-11-04 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

> This probably needs to be taken over by someone who cares about full LTO 
> performance

We at PlayStation are definitely interested in full LTO performance, so we're 
looking into this.  We certainly agree with the rationale that if suppressing 
some optimizations is useful to allow better SamplePGO matching, then we'd 
expect that would apply equally to both ThinLTO and full LTO.

I guess much of this comes down to a balancing act between:

1. The amount of the runtime benefit with Sample PGO if these loop 
optimizations are deferred to the full LTO back-end (like they are for ThinLTO).
2. The cost in compile-time resources in the full LTO back-end to do these loop 
optimizations at that later stage.

From the discussion here, the Sample PGO runtime win (point 1) seems more or 
less to be a given.  If we find the compile-time cost in the full LTO back-end 
(point 2) is not significant, then the decision should be easy.  So after 
seeing this patch, we're doing some experiments to at least try to get a handle 
on this.  (I'm a bit concerned we won't be able to draw any hard conclusions 
from the results of our experiments, but at least we'll be able to make a 
better informed assessment.)

FTR, for PlayStation, we're using the old PM.  But we'll do some experiments 
for both the old and new PM, to get a sense of the answers to the (old PM) 
`LoopUnrollAndJam` point, and the (new PM) FIXME comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69732/new/

https://reviews.llvm.org/D69732



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64780: Disallow most calling convention attributes on PS4.

2019-07-19 Thread Warren Ristow via Phabricator via cfe-commits
wristow accepted this revision.
wristow added a comment.

In D64780#1593624 , @aaron.ballman 
wrote:

> LGTM aside from a nit. You should give other reviewers a chance to sign off 
> in case they have additional comments.


LGTM too, once the line-length is fixed (and IMO, no need to update the review 
for just that line-length change).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64780/new/

https://reviews.llvm.org/D64780



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64780: Disallow most calling convention attributes on PS4.

2019-07-17 Thread Warren Ristow via Phabricator via cfe-commits
wristow added reviewers: rnk, rjmccall.
wristow added inline comments.



Comment at: include/clang/Basic/TargetInfo.h:1271
 CCCR_Ignore,
+CCCR_Error,
   };

rnk wrote:
> I feel like perhaps a cleaner way of implementing this would be to have the 
> driver pass down -Werror=unknown-calling-conv (sp) on PS4. That would also 
> allow users to tell clang to ignore their CC annotations by disabling the 
> warning, which could be useful when porting a codebase widely annotated with 
> __stdcall, for example.
I can see the value of allowing users to ignore the CC annotations to ease a 
porting task, but we on PS4 prefer not to allow that.  We're probably more 
cautious than most about ABI incompatibilities, so if others decide to restrict 
use of unsupported CCs, they may very well want to go this route of the driver 
disabling the CCs.  But we'll still make them hard errors on PS4.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64780/new/

https://reviews.llvm.org/D64780



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64672: [X86] Prevent passing vectors of __int128 as in llvm IR

2019-07-15 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

> Do we need to keep the old behavior on platforms where clang is the de facto 
> compiler?

I know we (PlayStation) will want to keep the old behavior.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64672/new/

https://reviews.llvm.org/D64672



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47291: Proposal to make rtti errors more generic.

2018-06-05 Thread Warren Ristow via Phabricator via cfe-commits
wristow added inline comments.



Comment at: include/clang/Basic/DiagnosticSemaKinds.td:6729
 def err_no_dynamic_cast_with_fno_rtti : Error<
-  "cannot use dynamic_cast with -fno-rtti">;
+  "use of dynamic_cast requires enabling RTTI">;
 

probinson wrote:
> wristow wrote:
> > Sunil_Srivastava wrote:
> > > probinson wrote:
> > > > filcab wrote:
> > > > > I'd prefer to have the way to enable RTTI mentioned in the message. 
> > > > > Could we have something like `ToolChain::getRTTIMode()` and have a 
> > > > > "RTTI was on/of" or "RTTI defaulted to on/off"? That way we'd be able 
> > > > > to have a message similar to the current one (mentioning "you passed 
> > > > > -fno-rtti") on platforms that default to RTTI=on, and have your 
> > > > > updated message (possibly with a mention of "use -frtti") on 
> > > > > platforms that default to RTTI=off.
> > > > > 
> > > > > (This is a minor usability comment about this patch, I don't consider 
> > > > > it a blocker or anything)
> > > > If the options are spelled differently for clang-cl and we had a way to 
> > > > retrieve the appropriate spellings, providing the option to use in the 
> > > > diagnostic does seem like a nice touch.
> > > The idea of suggestion as to how-to-turn-on-rtti is appealing, but it is 
> > > not trivial.
> > > 
> > > First, clang-cl does not give this warning at all, so the issue is moot 
> > > for clang-cl.
> > > 
> > > For unix-line command-line, if the default RTTI mode in ENABLED (the 
> > > unknown-linux)
> > > then this warning appears only if the user gives -fno-rtti options, so 
> > > again we do
> > > not need to say anything more.
> > > 
> > > The only cases left are compilers a where the default RTTI mode is 
> > > DISABLED. 
> > > Here an addendum like '[-frtti]' may make sense. AFAIK, PS4 is the only 
> > > compiler
> > > with this default, but there may be other such private compilers.
> > > 
> > > So should we append '[-frtti]' if Target.getTriple().isPS4() is true?
> > > So should we append '[-frtti]' if Target.getTriple().isPS4() is true?
> > 
> > Personally, I'd be OK with producing a suggestion of how to enable RTTI 
> > based on the PS4 triple.  But I'd also be OK with not enhancing this 
> > diagnostic to suggest how to turn on RTTI (that is, going with the patch as 
> > originally proposed here).
> > 
> > If clang-cl produced a warning when a dynamic_cast or typeid construct was 
> > encountered in `/GR-` mode, then I'd feel it's worth complicating the code 
> > to provide a target-sensitive way for advising the user how to turn RTTI 
> > on.  But clang-cl doesn't produce a warning in that case, so the effort to 
> > add the framework for producing a target-sensitive warning doesn't seem 
> > worth it to me.
> > 
> > Improving clang-cl to produce a diagnostic in this `/GR-` situation seems 
> > like a good idea, but separate from this proposed patch.  If that work gets 
> > done at some point, then it would be natural to revisit this diagnostic at 
> > that time.
> If clang-cl is not a consideration, then I think the easiest and clearest way 
> to do this is simply to say `requires -frtti` without hair-splitting which 
> targets default which way.
Saying `requires -frtti` makes good sense to me.


Repository:
  rC Clang

https://reviews.llvm.org/D47291



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47291: Proposal to make rtti errors more generic.

2018-06-04 Thread Warren Ristow via Phabricator via cfe-commits
wristow added inline comments.



Comment at: include/clang/Basic/DiagnosticSemaKinds.td:6729
 def err_no_dynamic_cast_with_fno_rtti : Error<
-  "cannot use dynamic_cast with -fno-rtti">;
+  "use of dynamic_cast requires enabling RTTI">;
 

Sunil_Srivastava wrote:
> probinson wrote:
> > filcab wrote:
> > > I'd prefer to have the way to enable RTTI mentioned in the message. Could 
> > > we have something like `ToolChain::getRTTIMode()` and have a "RTTI was 
> > > on/of" or "RTTI defaulted to on/off"? That way we'd be able to have a 
> > > message similar to the current one (mentioning "you passed -fno-rtti") on 
> > > platforms that default to RTTI=on, and have your updated message 
> > > (possibly with a mention of "use -frtti") on platforms that default to 
> > > RTTI=off.
> > > 
> > > (This is a minor usability comment about this patch, I don't consider it 
> > > a blocker or anything)
> > If the options are spelled differently for clang-cl and we had a way to 
> > retrieve the appropriate spellings, providing the option to use in the 
> > diagnostic does seem like a nice touch.
> The idea of suggestion as to how-to-turn-on-rtti is appealing, but it is not 
> trivial.
> 
> First, clang-cl does not give this warning at all, so the issue is moot for 
> clang-cl.
> 
> For unix-line command-line, if the default RTTI mode in ENABLED (the 
> unknown-linux)
> then this warning appears only if the user gives -fno-rtti options, so again 
> we do
> not need to say anything more.
> 
> The only cases left are compilers a where the default RTTI mode is DISABLED. 
> Here an addendum like '[-frtti]' may make sense. AFAIK, PS4 is the only 
> compiler
> with this default, but there may be other such private compilers.
> 
> So should we append '[-frtti]' if Target.getTriple().isPS4() is true?
> So should we append '[-frtti]' if Target.getTriple().isPS4() is true?

Personally, I'd be OK with producing a suggestion of how to enable RTTI based 
on the PS4 triple.  But I'd also be OK with not enhancing this diagnostic to 
suggest how to turn on RTTI (that is, going with the patch as originally 
proposed here).

If clang-cl produced a warning when a dynamic_cast or typeid construct was 
encountered in `/GR-` mode, then I'd feel it's worth complicating the code to 
provide a target-sensitive way for advising the user how to turn RTTI on.  But 
clang-cl doesn't produce a warning in that case, so the effort to add the 
framework for producing a target-sensitive warning doesn't seem worth it to me.

Improving clang-cl to produce a diagnostic in this `/GR-` situation seems like 
a good idea, but separate from this proposed patch.  If that work gets done at 
some point, then it would be natural to revisit this diagnostic at that time.


Repository:
  rC Clang

https://reviews.llvm.org/D47291



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39812: [Driver, CodeGen] pass through and apply -fassociative-math

2017-11-08 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

In https://reviews.llvm.org/D39812#919687, @spatel wrote:

> I just reviewed the gcc docs:
>  https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
>
> "[-fassociative-math] requires that both -fno-signed-zeros and 
> -fno-trapping-math be in effect."
>
> If we want to match that behavior, I need to change this patch to light up 
> 'nsz' and the no-trapping-math function attribute.


Yes, I think we want to match that behavior.  But by "light up" 'nsz' and the 
no-trapping-math attribute, do you mean automatically turn them on when 
'-fassociative-math' is specified?  I'd think it should be the other way 
around: Suppress the effect of '-fassociative-math' unless both 
'-fno-signed-zeros' and '-fno-trapping-math' are also specified.


https://reviews.llvm.org/D39812



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D29208: Prevent ICE in dllexport class with _Atomic() data member

2017-02-02 Thread Warren Ristow via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
wristow marked an inline comment as done.
Closed by commit rL293911: Prevent ICE in dllexport class with _Atomic data 
member (authored by wristow).

Changed prior to commit:
  https://reviews.llvm.org/D29208?vs=86767=86848#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D29208

Files:
  cfe/trunk/lib/CodeGen/CGClass.cpp
  cfe/trunk/test/CodeGenCXX/atomic-dllexport.cpp


Index: cfe/trunk/test/CodeGenCXX/atomic-dllexport.cpp
===
--- cfe/trunk/test/CodeGenCXX/atomic-dllexport.cpp
+++ cfe/trunk/test/CodeGenCXX/atomic-dllexport.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -std=c++11 
-fms-extensions -O0 -o - %s | FileCheck --check-prefix=M32 %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -std=c++11 
-fms-extensions -O0 -o - %s | FileCheck --check-prefix=M64 %s
+
+struct __declspec(dllexport) SomeStruct {
+  // Copy assignment operator should be produced, and exported:
+  // M32: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) 
%struct.SomeStruct* @"\01??4SomeStruct@@QAEAAU0@ABU0@@Z"
+  // M64: define weak_odr dllexportdereferenceable({{[0-9]+}}) 
%struct.SomeStruct* @"\01??4SomeStruct@@QEAAAEAU0@AEBU0@@Z"
+  _Atomic(int) mData;
+};
Index: cfe/trunk/lib/CodeGen/CGClass.cpp
===
--- cfe/trunk/lib/CodeGen/CGClass.cpp
+++ cfe/trunk/lib/CodeGen/CGClass.cpp
@@ -1131,10 +1131,11 @@
   RHS = EC->getSubExpr();
 if (!RHS)
   return nullptr;
-MemberExpr *ME2 = dyn_cast(RHS);
-if (dyn_cast(ME2->getMemberDecl()) != Field)
-  return nullptr;
-return Field;
+if (MemberExpr *ME2 = dyn_cast(RHS)) {
+  if (ME2->getMemberDecl() == Field)
+return Field;
+}
+return nullptr;
   } else if (CXXMemberCallExpr *MCE = dyn_cast(S)) {
 CXXMethodDecl *MD = dyn_cast(MCE->getCalleeDecl());
 if (!(MD && isMemcpyEquivalentSpecialMember(MD)))


Index: cfe/trunk/test/CodeGenCXX/atomic-dllexport.cpp
===
--- cfe/trunk/test/CodeGenCXX/atomic-dllexport.cpp
+++ cfe/trunk/test/CodeGenCXX/atomic-dllexport.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -std=c++11 -fms-extensions -O0 -o - %s | FileCheck --check-prefix=M32 %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -std=c++11 -fms-extensions -O0 -o - %s | FileCheck --check-prefix=M64 %s
+
+struct __declspec(dllexport) SomeStruct {
+  // Copy assignment operator should be produced, and exported:
+  // M32: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QAEAAU0@ABU0@@Z"
+  // M64: define weak_odr dllexportdereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QEAAAEAU0@AEBU0@@Z"
+  _Atomic(int) mData;
+};
Index: cfe/trunk/lib/CodeGen/CGClass.cpp
===
--- cfe/trunk/lib/CodeGen/CGClass.cpp
+++ cfe/trunk/lib/CodeGen/CGClass.cpp
@@ -1131,10 +1131,11 @@
   RHS = EC->getSubExpr();
 if (!RHS)
   return nullptr;
-MemberExpr *ME2 = dyn_cast(RHS);
-if (dyn_cast(ME2->getMemberDecl()) != Field)
-  return nullptr;
-return Field;
+if (MemberExpr *ME2 = dyn_cast(RHS)) {
+  if (ME2->getMemberDecl() == Field)
+return Field;
+}
+return nullptr;
   } else if (CXXMemberCallExpr *MCE = dyn_cast(S)) {
 CXXMethodDecl *MD = dyn_cast(MCE->getCalleeDecl());
 if (!(MD && isMemcpyEquivalentSpecialMember(MD)))
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D29208: Prevent ICE in dllexport class with _Atomic() data member

2017-02-01 Thread Warren Ristow via Phabricator via cfe-commits
wristow marked 2 inline comments as done.
wristow added inline comments.



Comment at: lib/CodeGen/CGClass.cpp:1135
 MemberExpr *ME2 = dyn_cast(RHS);
-if (dyn_cast(ME2->getMemberDecl()) != Field)
+if (!ME2 || dyn_cast(ME2->getMemberDecl()) != Field)
   return nullptr;

rjmccall wrote:
> wristow wrote:
> > rjmccall wrote:
> > > I would prefer:
> > > 
> > >   if (MemberExpr *ME2 = dyn_cast(RHS)) {
> > > if (ME2->getMemberDecl() == Field)
> > >   return Field;
> > >   }
> > >   return nullptr;
> > I see that change removes the `dyn_cast`.  Was that intended, or 
> > an oversight?
> > 
> > In terms of changing the code-structure, in code on it's own, I do like the 
> > approach you described.  But in this case, there is a sequence of `if 
> > () return nullptr; ... if (conditionN) return nullptr; return 
> > Field;`.  Then after the block containing that set of guarded `nullptr` 
> > returns with a final `return Field;`, there is a similar block.  And then 
> > there is a third block with a similar set.  So changing the structure in 
> > that way breaks that pattern.  With that in mind, do you still want that 
> > change done?
> The dyn_cast has no effect.  There is no situation in which the declarations 
> would compare equal without it where they would not with it, because Field is 
> already known to be a FieldDecl.
> 
> The structure of the existing code is unlikely to stay the same.  Actually, 
> that code is quite worrying — it's making a lot of assumptions about how Sema 
> synthesizes defaulted assignment operator bodies.  But I didn't want to ask 
> you to fix it when it's not the subject of your bug.
Got it.  Posted updated patch.


https://reviews.llvm.org/D29208



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D29208: Prevent ICE in dllexport class with _Atomic() data member

2017-02-01 Thread Warren Ristow via Phabricator via cfe-commits
wristow updated this revision to Diff 86767.
wristow added a comment.

Code restructured.


https://reviews.llvm.org/D29208

Files:
  lib/CodeGen/CGClass.cpp
  test/CodeGenCXX/atomic-dllexport.cpp


Index: test/CodeGenCXX/atomic-dllexport.cpp
===
--- test/CodeGenCXX/atomic-dllexport.cpp
+++ test/CodeGenCXX/atomic-dllexport.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -std=c++11 
-fms-extensions -O0 -o - %s | FileCheck --check-prefix=M32 %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -std=c++11 
-fms-extensions -O0 -o - %s | FileCheck --check-prefix=M64 %s
+
+struct __declspec(dllexport) SomeStruct {
+  // Copy assignment operator should be produced, and exported:
+  // M32: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) 
%struct.SomeStruct* @"\01??4SomeStruct@@QAEAAU0@ABU0@@Z"
+  // M64: define weak_odr dllexportdereferenceable({{[0-9]+}}) 
%struct.SomeStruct* @"\01??4SomeStruct@@QEAAAEAU0@AEBU0@@Z"
+  _Atomic(int) mData;
+};
Index: lib/CodeGen/CGClass.cpp
===
--- lib/CodeGen/CGClass.cpp
+++ lib/CodeGen/CGClass.cpp
@@ -1131,10 +1131,11 @@
   RHS = EC->getSubExpr();
 if (!RHS)
   return nullptr;
-MemberExpr *ME2 = dyn_cast(RHS);
-if (dyn_cast(ME2->getMemberDecl()) != Field)
-  return nullptr;
-return Field;
+if (MemberExpr *ME2 = dyn_cast(RHS)) {
+  if (ME2->getMemberDecl() == Field)
+return Field;
+}
+return nullptr;
   } else if (CXXMemberCallExpr *MCE = dyn_cast(S)) {
 CXXMethodDecl *MD = dyn_cast(MCE->getCalleeDecl());
 if (!(MD && isMemcpyEquivalentSpecialMember(MD)))


Index: test/CodeGenCXX/atomic-dllexport.cpp
===
--- test/CodeGenCXX/atomic-dllexport.cpp
+++ test/CodeGenCXX/atomic-dllexport.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -std=c++11 -fms-extensions -O0 -o - %s | FileCheck --check-prefix=M32 %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -std=c++11 -fms-extensions -O0 -o - %s | FileCheck --check-prefix=M64 %s
+
+struct __declspec(dllexport) SomeStruct {
+  // Copy assignment operator should be produced, and exported:
+  // M32: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QAEAAU0@ABU0@@Z"
+  // M64: define weak_odr dllexportdereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QEAAAEAU0@AEBU0@@Z"
+  _Atomic(int) mData;
+};
Index: lib/CodeGen/CGClass.cpp
===
--- lib/CodeGen/CGClass.cpp
+++ lib/CodeGen/CGClass.cpp
@@ -1131,10 +1131,11 @@
   RHS = EC->getSubExpr();
 if (!RHS)
   return nullptr;
-MemberExpr *ME2 = dyn_cast(RHS);
-if (dyn_cast(ME2->getMemberDecl()) != Field)
-  return nullptr;
-return Field;
+if (MemberExpr *ME2 = dyn_cast(RHS)) {
+  if (ME2->getMemberDecl() == Field)
+return Field;
+}
+return nullptr;
   } else if (CXXMemberCallExpr *MCE = dyn_cast(S)) {
 CXXMethodDecl *MD = dyn_cast(MCE->getCalleeDecl());
 if (!(MD && isMemcpyEquivalentSpecialMember(MD)))
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D29208: Prevent ICE in dllexport class with _Atomic() data member

2017-02-01 Thread Warren Ristow via Phabricator via cfe-commits
wristow added inline comments.



Comment at: lib/CodeGen/CGClass.cpp:1135
 MemberExpr *ME2 = dyn_cast(RHS);
-if (dyn_cast(ME2->getMemberDecl()) != Field)
+if (!ME2 || dyn_cast(ME2->getMemberDecl()) != Field)
   return nullptr;

rjmccall wrote:
> I would prefer:
> 
>   if (MemberExpr *ME2 = dyn_cast(RHS)) {
> if (ME2->getMemberDecl() == Field)
>   return Field;
>   }
>   return nullptr;
I see that change removes the `dyn_cast`.  Was that intended, or an 
oversight?

In terms of changing the code-structure, in code on it's own, I do like the 
approach you described.  But in this case, there is a sequence of `if 
() return nullptr; ... if (conditionN) return nullptr; return 
Field;`.  Then after the block containing that set of guarded `nullptr` returns 
with a final `return Field;`, there is a similar block.  And then there is a 
third block with a similar set.  So changing the structure in that way breaks 
that pattern.  With that in mind, do you still want that change done?


https://reviews.llvm.org/D29208



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D29208: Prevent ICE in dllexport class with _Atomic() data member

2017-01-26 Thread Warren Ristow via Phabricator via cfe-commits
wristow added a comment.

When a class that has been tagged as dllexport (for an MSVC target) contains an 
atomic data member via the C11 '_Atomic' approach, the front end crashes with a 
null pointer dereference.
This patch fixes it by guarding the null dereference with the approach used by 
similar code in the same method.


https://reviews.llvm.org/D29208



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D29208: Prevent ICE in dllexport class with _Atomic() data member

2017-01-26 Thread Warren Ristow via Phabricator via cfe-commits
wristow created this revision.

Guard against a null pointer dereference that caused Clang to crash
when processing a class containing an _Atomic() data member,
and that is tagged with 'dllexport'.


https://reviews.llvm.org/D29208

Files:
  lib/CodeGen/CGClass.cpp
  test/CodeGenCXX/atomic-dllexport.cpp


Index: test/CodeGenCXX/atomic-dllexport.cpp
===
--- test/CodeGenCXX/atomic-dllexport.cpp
+++ test/CodeGenCXX/atomic-dllexport.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -std=c++11 
-fms-extensions -O0 -o - %s | FileCheck --check-prefix=M32 %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -std=c++11 
-fms-extensions -O0 -o - %s | FileCheck --check-prefix=M64 %s
+
+struct __declspec(dllexport) SomeStruct {
+  // Copy assignment operator should be produced, and exported:
+  // M32: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) 
%struct.SomeStruct* @"\01??4SomeStruct@@QAEAAU0@ABU0@@Z"
+  // M64: define weak_odr dllexportdereferenceable({{[0-9]+}}) 
%struct.SomeStruct* @"\01??4SomeStruct@@QEAAAEAU0@AEBU0@@Z"
+  _Atomic(int) mData;
+};
Index: lib/CodeGen/CGClass.cpp
===
--- lib/CodeGen/CGClass.cpp
+++ lib/CodeGen/CGClass.cpp
@@ -1132,7 +1132,7 @@
 if (!RHS)
   return nullptr;
 MemberExpr *ME2 = dyn_cast(RHS);
-if (dyn_cast(ME2->getMemberDecl()) != Field)
+if (!ME2 || dyn_cast(ME2->getMemberDecl()) != Field)
   return nullptr;
 return Field;
   } else if (CXXMemberCallExpr *MCE = dyn_cast(S)) {


Index: test/CodeGenCXX/atomic-dllexport.cpp
===
--- test/CodeGenCXX/atomic-dllexport.cpp
+++ test/CodeGenCXX/atomic-dllexport.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -std=c++11 -fms-extensions -O0 -o - %s | FileCheck --check-prefix=M32 %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -std=c++11 -fms-extensions -O0 -o - %s | FileCheck --check-prefix=M64 %s
+
+struct __declspec(dllexport) SomeStruct {
+  // Copy assignment operator should be produced, and exported:
+  // M32: define weak_odr dllexport x86_thiscallcc dereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QAEAAU0@ABU0@@Z"
+  // M64: define weak_odr dllexportdereferenceable({{[0-9]+}}) %struct.SomeStruct* @"\01??4SomeStruct@@QEAAAEAU0@AEBU0@@Z"
+  _Atomic(int) mData;
+};
Index: lib/CodeGen/CGClass.cpp
===
--- lib/CodeGen/CGClass.cpp
+++ lib/CodeGen/CGClass.cpp
@@ -1132,7 +1132,7 @@
 if (!RHS)
   return nullptr;
 MemberExpr *ME2 = dyn_cast(RHS);
-if (dyn_cast(ME2->getMemberDecl()) != Field)
+if (!ME2 || dyn_cast(ME2->getMemberDecl()) != Field)
   return nullptr;
 return Field;
   } else if (CXXMemberCallExpr *MCE = dyn_cast(S)) {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits