[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-10-03 Thread Takuya Shimizu via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2176c5e510e3: [Clang][Sema] Fix display of characters on 
static assertion failure (authored by hazohelet).

Changed prior to commit:
  https://reviews.llvm.org/D155610?vs=557524=557581#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/Diagnostic.h
  clang/lib/Basic/Diagnostic.cpp
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/test/Lexer/cxx1z-trigraphs.cpp
  clang/test/SemaCXX/static-assert-cxx26.cpp
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -268,7 +268,31 @@
 return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-   // expected-note {{evaluates to ''c' == 'a''}}
+   // expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+   // expected-note {{n' (0x0A, 10) == '' (0x00, 0)'}}
+  // The note above is intended to match "evaluates to '\n' (0x0A, 10) == '' (0x00, 0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''' (0xFC, -4) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (0xA0, -96) == ' ' (0x20, 32)'}}
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'L'/' (0xFF0F, 65295) == u'�' (0xFFFD, 65533)'}}
+  static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'L'⚾' (0x26BE, 9918) == U'🌍' (0x1F30D, 127757)'}}
+  static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'U'\a' (0x07, 7) == L'\t' (0x09, 9)'}}
+  static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'L'§' (0xA7, 167) == U'Ö' (0xD6, 214)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
 Bad b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' (0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
 
 #if !ENABLED_TRIGRAPHS
 // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-10-02 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin accepted this revision.
cor3ntin added a comment.
This revision is now accepted and ready to land.

LGTM, thanks!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-10-02 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet updated this revision to Diff 557524.
hazohelet added a comment.

Address comments from Corentin


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/Diagnostic.h
  clang/lib/Basic/Diagnostic.cpp
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/test/Lexer/cxx1z-trigraphs.cpp
  clang/test/SemaCXX/static-assert-cxx26.cpp
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -268,7 +268,31 @@
 return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-   // expected-note {{evaluates to ''c' == 'a''}}
+   // expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+   // expected-note {{n' (0x0A, 10) == '' (0x00, 0)'}}
+  // The note above is intended to match "evaluates to '\n' (0x0A, 10) == '' (0x00, 0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''' (0xFC, -4) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (0xA0, -96) == ' ' (0x20, 32)'}}
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'L'/' (0xFF0F, 65295) == u'�' (0xFFFD, 65533)'}}
+  static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'L'⚾' (0x26BE, 9918) == U'🌍' (0x1F30D, 127757)'}}
+  static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'U'\a' (0x07, 7) == L'\t' (0x09, 9)'}}
+  static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'L'§' (0xA7, 167) == U'Ö' (0xD6, 214)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
 Bad b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' (0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
 
 #if !ENABLED_TRIGRAPHS
 // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (0x3F, 63) == '#' (0x23, 35)'}}
 // expected-error@16 {{}}
 // expected-error@20 {{}}
 #else
Index: 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-10-02 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet added a comment.

In D155610#4652198 , @cor3ntin wrote:

> @hazohelet Please keep this patch on Phab. It's not going to be shutdown. The 
> current consensus is that we will reconsider shutting down phab on November 15
> https://discourse.llvm.org/t/update-on-github-pull-requests/71540/125

Oh I failed to notice that post, thanks for letting me know.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-10-01 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added a comment.

@hazohelet Please keep this patch on Phab. It's not going to be shutdown. The 
current consensus is that we will reconsider shutting down phab on November 15
https://discourse.llvm.org/t/update-on-github-pull-requests/71540/125


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-30 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet abandoned this revision.
hazohelet added a comment.

Thank you so much to everyone for guiding this patch onto the right track! I'll 
submit GitHub PR after making suggested changes.
Since Phabricator is going to be shutdown, I mark this differential as 
abandoned.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-27 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added a comment.

I had a chat with @hubert.reinterpretcast and @tahonermann.
We reached consensus on wanting to make sure the codepoint value is formatted 
in a future-proof way so that if we ever implement escaping of lone combining 
codepoint, this case would be handled as well.

To do that, we can:

- * Expose the escaping mechanism) (done by `pushEscapedString` in 
Diagnostic.cpp) as a new `EscapeStringForDiagnostic` function. Use that to 
escape the code point (line 16921)

I think that should be the last change we need here. Thanks for being patient 
with us!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-18 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

hubert.reinterpretcast wrote:
> cor3ntin wrote:
> > hubert.reinterpretcast wrote:
> > > tahonermann wrote:
> > > > cor3ntin wrote:
> > > > > hubert.reinterpretcast wrote:
> > > > > > hubert.reinterpretcast wrote:
> > > > > > > cor3ntin wrote:
> > > > > > > > hubert.reinterpretcast wrote:
> > > > > > > > > The C++23 escaped string formatting facility would not 
> > > > > > > > > generate a trailing combining character like this. I 
> > > > > > > > > recommend following suit.
> > > > > > > > > 
> > > > > > > > > Info on U+0335: 
> > > > > > > > > https://util.unicode.org/UnicodeJsps/character.jsp?a=0335
> > > > > > > > > 
> > > > > > > > This is way outside the scope of the patch. The diagnostic 
> > > > > > > > output facility has no understanding of combining characters or 
> > > > > > > > graphemes and do not attempt to match std::print. It probably 
> > > > > > > > would be an improvement but this patch is not trying to modify 
> > > > > > > > how all diagnostics are printed. (all of that logic is in 
> > > > > > > > Diagnostic.cpp)
> > > > > > > This patch is pushing the envelope of what appears in 
> > > > > > > diagnostics. One can also argue that someone writing
> > > > > > > ```
> > > > > > > static_assert(false, "\u0301");
> > > > > > > ```
> > > > > > > gets what they deserve, but that case does not have a big problem 
> > > > > > > anyway (because the provided message text appears after `: `).
> > > > > > > 
> > > > > > > This patch increases the exposure of the diagnostic output 
> > > > > > > facility to input that it does not handle well. I disagree that 
> > > > > > > it is outside the scope of this patch to insist that it does not 
> > > > > > > generate such inputs to the diagnostic output facility (even if a 
> > > > > > > possible solution is to modify the diagnostic output facility 
> > > > > > > first).
> > > > > > @cor3ntin, do you have status quo examples for how 
> > > > > > grapheme-extending characters that are not already "problematic" in 
> > > > > > their original context are emitted in diagnostics in contexts where 
> > > > > > they are?
> > > > > Are you looking for that sort of examples? 
> > > > > https://godbolt.org/z/c79xWr7Me
> > > > > That shows that clang has no understanding of graphemes
> > > > gcc and MSVC get that case "right" (probably by accident). 
> > > > https://godbolt.org/z/Tjd6xnEon
> > > I was more looking for cases where the output grapheme includes elements 
> > > that were part of the fixed message text (gobbling quotes, etc.). Also, 
> > > this patch is in the wrong shape for handling this concern in the 
> > > diagnostic printer because the delimiting of the replacement text happens 
> > > in this patch.
> > Could you craft a message that becomes a graphene after substitution by the 
> > engine? Maybe? You would have to try very hard and `static_assert` 
> > diagnostics are not of the right shape.
> > 
> > > This patch increases the exposure of the diagnostic output facility to 
> > > input that it does not handle well. I disagree that it is outside the 
> > > scope of this patch to insist that it does not generate such inputs to 
> > > the diagnostic output facility (even if a possible solution is to modify 
> > > the diagnostic output facility first).
> > 
> > I still don't see it. None of the output produce in this patch are even 
> > close to what i could be problematic. ie this patch is only ever producing 
> > ASCII or single codepoints that gets escaped when they are not printable
> > Could you craft a message that becomes a graphene after substitution by the 
> > engine? Maybe? You would have to try very hard and `static_assert` 
> > diagnostics are not of the right shape.
> 
> That is what I meant by this patch introducing new situations.
> 
> > > This patch increases the exposure of the diagnostic output facility to 
> > > input that it does not handle well. I disagree that it is outside the 
> > > scope of this patch to insist that it does not generate such inputs to 
> > > the diagnostic output facility (even if a possible solution is to modify 
> > > the diagnostic output facility first).
> > 
> > I still don't see it. None of the output produce in this patch are even 
> > close to what i could be problematic. ie this patch is only ever producing 
> > ASCII or single codepoints that gets escaped when they are not printable
> 
> This patch produces single codepoints that are not escaped even when they may 
> combine with a `'` delimiter. This patch also (currently) forms the string 
> with the `'` and the potentially-combining character 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-18 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

cor3ntin wrote:
> hubert.reinterpretcast wrote:
> > tahonermann wrote:
> > > cor3ntin wrote:
> > > > hubert.reinterpretcast wrote:
> > > > > hubert.reinterpretcast wrote:
> > > > > > cor3ntin wrote:
> > > > > > > hubert.reinterpretcast wrote:
> > > > > > > > The C++23 escaped string formatting facility would not generate 
> > > > > > > > a trailing combining character like this. I recommend following 
> > > > > > > > suit.
> > > > > > > > 
> > > > > > > > Info on U+0335: 
> > > > > > > > https://util.unicode.org/UnicodeJsps/character.jsp?a=0335
> > > > > > > > 
> > > > > > > This is way outside the scope of the patch. The diagnostic output 
> > > > > > > facility has no understanding of combining characters or 
> > > > > > > graphemes and do not attempt to match std::print. It probably 
> > > > > > > would be an improvement but this patch is not trying to modify 
> > > > > > > how all diagnostics are printed. (all of that logic is in 
> > > > > > > Diagnostic.cpp)
> > > > > > This patch is pushing the envelope of what appears in diagnostics. 
> > > > > > One can also argue that someone writing
> > > > > > ```
> > > > > > static_assert(false, "\u0301");
> > > > > > ```
> > > > > > gets what they deserve, but that case does not have a big problem 
> > > > > > anyway (because the provided message text appears after `: `).
> > > > > > 
> > > > > > This patch increases the exposure of the diagnostic output facility 
> > > > > > to input that it does not handle well. I disagree that it is 
> > > > > > outside the scope of this patch to insist that it does not generate 
> > > > > > such inputs to the diagnostic output facility (even if a possible 
> > > > > > solution is to modify the diagnostic output facility first).
> > > > > @cor3ntin, do you have status quo examples for how grapheme-extending 
> > > > > characters that are not already "problematic" in their original 
> > > > > context are emitted in diagnostics in contexts where they are?
> > > > Are you looking for that sort of examples? 
> > > > https://godbolt.org/z/c79xWr7Me
> > > > That shows that clang has no understanding of graphemes
> > > gcc and MSVC get that case "right" (probably by accident). 
> > > https://godbolt.org/z/Tjd6xnEon
> > I was more looking for cases where the output grapheme includes elements 
> > that were part of the fixed message text (gobbling quotes, etc.). Also, 
> > this patch is in the wrong shape for handling this concern in the 
> > diagnostic printer because the delimiting of the replacement text happens 
> > in this patch.
> Could you craft a message that becomes a graphene after substitution by the 
> engine? Maybe? You would have to try very hard and `static_assert` 
> diagnostics are not of the right shape.
> 
> > This patch increases the exposure of the diagnostic output facility to 
> > input that it does not handle well. I disagree that it is outside the scope 
> > of this patch to insist that it does not generate such inputs to the 
> > diagnostic output facility (even if a possible solution is to modify the 
> > diagnostic output facility first).
> 
> I still don't see it. None of the output produce in this patch are even close 
> to what i could be problematic. ie this patch is only ever producing ASCII or 
> single codepoints that gets escaped when they are not printable
> Could you craft a message that becomes a graphene after substitution by the 
> engine? Maybe? You would have to try very hard and `static_assert` 
> diagnostics are not of the right shape.

That is what I meant by this patch introducing new situations.

> > This patch increases the exposure of the diagnostic output facility to 
> > input that it does not handle well. I disagree that it is outside the scope 
> > of this patch to insist that it does not generate such inputs to the 
> > diagnostic output facility (even if a possible solution is to modify the 
> > diagnostic output facility first).
> 
> I still don't see it. None of the output produce in this patch are even close 
> to what i could be problematic. ie this patch is only ever producing ASCII or 
> single codepoints that gets escaped when they are not printable

This patch produces single codepoints that are not escaped even when they may 
combine with a `'` delimiter. This patch also (currently) forms the string with 
the `'` and the potentially-combining character directly adjacent to each 
other. The least this patch should do is to emit the potentially-combining 
character to the diagnostic facility as a separate substitution (that is, if we 
can agree that the 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-16 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/test/SemaCXX/static-assert-cxx26.cpp:304
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' 
(0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error 
{{failed}} \

tahonermann wrote:
> cor3ntin wrote:
> > tahonermann wrote:
> > > cor3ntin wrote:
> > > > tahonermann wrote:
> > > > > Is the expected note up to date? I don't see code that would generate 
> > > > > the `` output. Am I just missing it? Since U+0001 is a valid, 
> > > > > though non-printable, character, I would expect more `'\u0001'`.
> > > > See elsewhere in the discussion. this formating is pre existing and 
> > > > managed at the DiagnosticEngine level (pushEscapedString). the reason 
> > > > it's not `\u0001` is 1/ to avoid  reusing c++ syntactic elements for 
> > > > something that comes from diagnostics and is not represented as an 
> > > > escaped sequence in source 2/ `\u00011` is unreadable, and 
> > > > `\U1` is also not helpful :)
> > > > 
> > > Thanks for the explanation. I'm not sure that I agree with the rationale 
> > > for (1) though. We're already putting the value in single quotes and 
> > > representing some values with escapes in many of these cases when the 
> > > value isn't produced by an escape sequence (or even a character/string 
> > > literal); why exclude `\u`? I agree with the rationale for (2); we 
> > > could use `'\u{1}'` in that case.
> > FYI afaik the notation in clang predates the existence of \u{} by a few 
> > years, and follow Unicode notation 
> > (https://unicode.org/mail-arch/unicode-ml/y2005-m11/0060.html).
> > Oldest instance seems to be 
> > https://github.com/llvm/llvm-project/commit/77091b167fd959e1ee0c4dad4ec44de43b6c95db
> >  - i followed suite when reworking the generic escaping mechanism all 
> > string fed to diagnostics go through.
> > 
> > I don't care about changing the syntax, but i do hope we are consistent. 
> > Ultimately what we are trying to do is to designate a unicode codepoint and 
> > whether we do it through C++ syntax or not probably does not matter much as 
> > long as it's clear, delimited and consistent!
> I think the substitution of `` by the diagnostic engine itself is 
> perfectly fine and good; particularly when it has no context to suggest a 
> different presentation. In this particular case, where the character is being 
> presented using C++ syntax as a character literal, I would prefer that C++ 
> syntax be used consistently.
> 
> From an implementation standpoint, I'm suggesting that 
> `WriteCharValueForDiagnostic()` be modified such that, if 
> `escapeCStyle()` returns an empty string, that the 
> character be presented in `'\u{}'` form if the character is one that 
> would otherwise be substituted by the diagnostic engine (e.g., if 
> `isPrintable()` is false). Note that this would be restricted to `char` 
> values <= 0x7F; larger values could still be passed through as invalid code 
> units that the diagnostic engine would then render as, e.g., `''`.
We should take a decision before forcing the author to do further change as to 
avoid going in circle.
As a user `\u{}` vs `` makes no difference in terms of the amount 
of information i receive.

I'm really not fan of duplicating code, spreading the logic in multiple places 
and having multiple ways to render a an invalid `char`. But I'm also concerned 
about spending too much time on `char` literals in `static_assert`. It might 
not be a common enough use case to warrant that much scrutiny :)

So I would be happy to go with _any_ direction



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

hubert.reinterpretcast wrote:
> tahonermann wrote:
> > cor3ntin wrote:
> > > hubert.reinterpretcast wrote:
> > > > hubert.reinterpretcast wrote:
> > > > > cor3ntin wrote:
> > > > > > hubert.reinterpretcast wrote:
> > > > > > > The C++23 escaped string formatting facility would not generate a 
> > > > > > > trailing combining character like this. I recommend following 
> > > > > > > suit.
> > > > > > > 
> > > > > > > Info on U+0335: 
> > > > > > > https://util.unicode.org/UnicodeJsps/character.jsp?a=0335
> > > > > > > 
> > > > > > This is way outside the scope of the patch. The diagnostic output 
> > > > > > facility has no understanding of combining characters or graphemes 
> > > > > > and do not attempt to match std::print. It probably would be an 
> > > > > > improvement but this patch is not trying to modify how all 
> > > > > > diagnostics are printed. (all of that logic 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-13 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16970
+  OS << '\'';
+  WriteCharValueForDiagnostic(CodeUnit, BTy, TyWidth, OS);
+  OS << "' (0x"

To have the diagnostic printer handle separating any potential grapheme (if it 
is capable of doing so--potentially in the future), we need to isolate the 
result of `WriteCharValueForDiagnostic` in a separate message substitution.



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

tahonermann wrote:
> cor3ntin wrote:
> > hubert.reinterpretcast wrote:
> > > hubert.reinterpretcast wrote:
> > > > cor3ntin wrote:
> > > > > hubert.reinterpretcast wrote:
> > > > > > The C++23 escaped string formatting facility would not generate a 
> > > > > > trailing combining character like this. I recommend following suit.
> > > > > > 
> > > > > > Info on U+0335: 
> > > > > > https://util.unicode.org/UnicodeJsps/character.jsp?a=0335
> > > > > > 
> > > > > This is way outside the scope of the patch. The diagnostic output 
> > > > > facility has no understanding of combining characters or graphemes 
> > > > > and do not attempt to match std::print. It probably would be an 
> > > > > improvement but this patch is not trying to modify how all 
> > > > > diagnostics are printed. (all of that logic is in Diagnostic.cpp)
> > > > This patch is pushing the envelope of what appears in diagnostics. One 
> > > > can also argue that someone writing
> > > > ```
> > > > static_assert(false, "\u0301");
> > > > ```
> > > > gets what they deserve, but that case does not have a big problem 
> > > > anyway (because the provided message text appears after `: `).
> > > > 
> > > > This patch increases the exposure of the diagnostic output facility to 
> > > > input that it does not handle well. I disagree that it is outside the 
> > > > scope of this patch to insist that it does not generate such inputs to 
> > > > the diagnostic output facility (even if a possible solution is to 
> > > > modify the diagnostic output facility first).
> > > @cor3ntin, do you have status quo examples for how grapheme-extending 
> > > characters that are not already "problematic" in their original context 
> > > are emitted in diagnostics in contexts where they are?
> > Are you looking for that sort of examples? https://godbolt.org/z/c79xWr7Me
> > That shows that clang has no understanding of graphemes
> gcc and MSVC get that case "right" (probably by accident). 
> https://godbolt.org/z/Tjd6xnEon
I was more looking for cases where the output grapheme includes elements that 
were part of the fixed message text (gobbling quotes, etc.). Also, this patch 
is in the wrong shape for handling this concern in the diagnostic printer 
because the delimiting of the replacement text happens in this patch.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-12 Thread Tom Honermann via Phabricator via cfe-commits
tahonermann added inline comments.



Comment at: clang/test/SemaCXX/static-assert-cxx26.cpp:304
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' 
(0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error 
{{failed}} \

cor3ntin wrote:
> tahonermann wrote:
> > cor3ntin wrote:
> > > tahonermann wrote:
> > > > Is the expected note up to date? I don't see code that would generate 
> > > > the `` output. Am I just missing it? Since U+0001 is a valid, 
> > > > though non-printable, character, I would expect more `'\u0001'`.
> > > See elsewhere in the discussion. this formating is pre existing and 
> > > managed at the DiagnosticEngine level (pushEscapedString). the reason 
> > > it's not `\u0001` is 1/ to avoid  reusing c++ syntactic elements for 
> > > something that comes from diagnostics and is not represented as an 
> > > escaped sequence in source 2/ `\u00011` is unreadable, and `\U1` 
> > > is also not helpful :)
> > > 
> > Thanks for the explanation. I'm not sure that I agree with the rationale 
> > for (1) though. We're already putting the value in single quotes and 
> > representing some values with escapes in many of these cases when the value 
> > isn't produced by an escape sequence (or even a character/string literal); 
> > why exclude `\u`? I agree with the rationale for (2); we could use 
> > `'\u{1}'` in that case.
> FYI afaik the notation in clang predates the existence of \u{} by a few 
> years, and follow Unicode notation 
> (https://unicode.org/mail-arch/unicode-ml/y2005-m11/0060.html).
> Oldest instance seems to be 
> https://github.com/llvm/llvm-project/commit/77091b167fd959e1ee0c4dad4ec44de43b6c95db
>  - i followed suite when reworking the generic escaping mechanism all string 
> fed to diagnostics go through.
> 
> I don't care about changing the syntax, but i do hope we are consistent. 
> Ultimately what we are trying to do is to designate a unicode codepoint and 
> whether we do it through C++ syntax or not probably does not matter much as 
> long as it's clear, delimited and consistent!
I think the substitution of `` by the diagnostic engine itself is 
perfectly fine and good; particularly when it has no context to suggest a 
different presentation. In this particular case, where the character is being 
presented using C++ syntax as a character literal, I would prefer that C++ 
syntax be used consistently.

From an implementation standpoint, I'm suggesting that 
`WriteCharValueForDiagnostic()` be modified such that, if 
`escapeCStyle()` returns an empty string, that the 
character be presented in `'\u{}'` form if the character is one that would 
otherwise be substituted by the diagnostic engine (e.g., if `isPrintable()` is 
false). Note that this would be restricted to `char` values <= 0x7F; larger 
values could still be passed through as invalid code units that the diagnostic 
engine would then render as, e.g., `''`.



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

cor3ntin wrote:
> hubert.reinterpretcast wrote:
> > hubert.reinterpretcast wrote:
> > > cor3ntin wrote:
> > > > hubert.reinterpretcast wrote:
> > > > > The C++23 escaped string formatting facility would not generate a 
> > > > > trailing combining character like this. I recommend following suit.
> > > > > 
> > > > > Info on U+0335: 
> > > > > https://util.unicode.org/UnicodeJsps/character.jsp?a=0335
> > > > > 
> > > > This is way outside the scope of the patch. The diagnostic output 
> > > > facility has no understanding of combining characters or graphemes and 
> > > > do not attempt to match std::print. It probably would be an improvement 
> > > > but this patch is not trying to modify how all diagnostics are printed. 
> > > > (all of that logic is in Diagnostic.cpp)
> > > This patch is pushing the envelope of what appears in diagnostics. One 
> > > can also argue that someone writing
> > > ```
> > > static_assert(false, "\u0301");
> > > ```
> > > gets what they deserve, but that case does not have a big problem anyway 
> > > (because the provided message text appears after `: `).
> > > 
> > > This patch increases the exposure of the diagnostic output facility to 
> > > input that it does not handle well. I disagree that it is outside the 
> > > scope of this patch to insist that it does not generate such inputs to 
> > > the diagnostic output facility (even if a possible solution is to modify 
> > > the diagnostic output facility first).
> > @cor3ntin, do you have status quo examples for how grapheme-extending 
> > 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-06 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/test/SemaCXX/static-assert-cxx26.cpp:304
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' 
(0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error 
{{failed}} \

tahonermann wrote:
> cor3ntin wrote:
> > tahonermann wrote:
> > > Is the expected note up to date? I don't see code that would generate the 
> > > `` output. Am I just missing it? Since U+0001 is a valid, though 
> > > non-printable, character, I would expect more `'\u0001'`.
> > See elsewhere in the discussion. this formating is pre existing and managed 
> > at the DiagnosticEngine level (pushEscapedString). the reason it's not 
> > `\u0001` is 1/ to avoid  reusing c++ syntactic elements for something that 
> > comes from diagnostics and is not represented as an escaped sequence in 
> > source 2/ `\u00011` is unreadable, and `\U1` is also not helpful :)
> > 
> Thanks for the explanation. I'm not sure that I agree with the rationale for 
> (1) though. We're already putting the value in single quotes and representing 
> some values with escapes in many of these cases when the value isn't produced 
> by an escape sequence (or even a character/string literal); why exclude 
> `\u`? I agree with the rationale for (2); we could use `'\u{1}'` in that 
> case.
FYI afaik the notation in clang predates the existence of \u{} by a few years, 
and follow Unicode notation 
(https://unicode.org/mail-arch/unicode-ml/y2005-m11/0060.html).
Oldest instance seems to be 
https://github.com/llvm/llvm-project/commit/77091b167fd959e1ee0c4dad4ec44de43b6c95db
 - i followed suite when reworking the generic escaping mechanism all string 
fed to diagnostics go through.

I don't care about changing the syntax, but i do hope we are consistent. 
Ultimately what we are trying to do is to designate a unicode codepoint and 
whether we do it through C++ syntax or not probably does not matter much as 
long as it's clear, delimited and consistent!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-06 Thread Tom Honermann via Phabricator via cfe-commits
tahonermann added inline comments.



Comment at: clang/test/SemaCXX/static-assert-cxx26.cpp:304
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' 
(0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error 
{{failed}} \

cor3ntin wrote:
> tahonermann wrote:
> > Is the expected note up to date? I don't see code that would generate the 
> > `` output. Am I just missing it? Since U+0001 is a valid, though 
> > non-printable, character, I would expect more `'\u0001'`.
> See elsewhere in the discussion. this formating is pre existing and managed 
> at the DiagnosticEngine level (pushEscapedString). the reason it's not 
> `\u0001` is 1/ to avoid  reusing c++ syntactic elements for something that 
> comes from diagnostics and is not represented as an escaped sequence in 
> source 2/ `\u00011` is unreadable, and `\U1` is also not helpful :)
> 
Thanks for the explanation. I'm not sure that I agree with the rationale for 
(1) though. We're already putting the value in single quotes and representing 
some values with escapes in many of these cases when the value isn't produced 
by an escape sequence (or even a character/string literal); why exclude 
`\u`? I agree with the rationale for (2); we could use `'\u{1}'` in that 
case.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-06 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added a comment.

@hazohelet I'm happy with the patch, I just need to make sure Hubert and I 
agree!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-06 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

hubert.reinterpretcast wrote:
> hubert.reinterpretcast wrote:
> > cor3ntin wrote:
> > > hubert.reinterpretcast wrote:
> > > > The C++23 escaped string formatting facility would not generate a 
> > > > trailing combining character like this. I recommend following suit.
> > > > 
> > > > Info on U+0335: 
> > > > https://util.unicode.org/UnicodeJsps/character.jsp?a=0335
> > > > 
> > > This is way outside the scope of the patch. The diagnostic output 
> > > facility has no understanding of combining characters or graphemes and do 
> > > not attempt to match std::print. It probably would be an improvement but 
> > > this patch is not trying to modify how all diagnostics are printed. (all 
> > > of that logic is in Diagnostic.cpp)
> > This patch is pushing the envelope of what appears in diagnostics. One can 
> > also argue that someone writing
> > ```
> > static_assert(false, "\u0301");
> > ```
> > gets what they deserve, but that case does not have a big problem anyway 
> > (because the provided message text appears after `: `).
> > 
> > This patch increases the exposure of the diagnostic output facility to 
> > input that it does not handle well. I disagree that it is outside the scope 
> > of this patch to insist that it does not generate such inputs to the 
> > diagnostic output facility (even if a possible solution is to modify the 
> > diagnostic output facility first).
> @cor3ntin, do you have status quo examples for how grapheme-extending 
> characters that are not already "problematic" in their original context are 
> emitted in diagnostics in contexts where they are?
Are you looking for that sort of examples? https://godbolt.org/z/c79xWr7Me
That shows that clang has no understanding of graphemes


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-06 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/test/SemaCXX/static-assert-cxx26.cpp:304
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' 
(0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error 
{{failed}} \

tahonermann wrote:
> Is the expected note up to date? I don't see code that would generate the 
> `` output. Am I just missing it? Since U+0001 is a valid, though 
> non-printable, character, I would expect more `'\u0001'`.
See elsewhere in the discussion. this formating is pre existing and managed at 
the DiagnosticEngine level (pushEscapedString). the reason it's not `\u0001` is 
1/ to avoid  reusing c++ syntactic elements for something that comes from 
diagnostics and is not represented as an escaped sequence in source 2/ 
`\u00011` is unreadable, and `\U1` is also not helpful :)



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-06 Thread Tom Honermann via Phabricator via cfe-commits
tahonermann added inline comments.



Comment at: clang/test/SemaCXX/static-assert-cxx26.cpp:304
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' 
(0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error 
{{failed}} \

Is the expected note up to date? I don't see code that would generate the 
`` output. Am I just missing it? Since U+0001 is a valid, though 
non-printable, character, I would expect more `'\u0001'`.



Comment at: clang/test/SemaCXX/static-assert.cpp:274-277
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+   // expected-note {{n' (0x0A, 10) == 
'' (0x00, 0)'}}
+  // The note above is intended to match "evaluates to '\n' (0x0A, 10) == 
'' (0x00, 0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.

Here too, I find the `''` presentation surprising; either of `'\0'` or 
`'\u'` would be preferred.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-09-06 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet added a comment.

@cor3ntin Gentle ping


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-28 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

hubert.reinterpretcast wrote:
> cor3ntin wrote:
> > hubert.reinterpretcast wrote:
> > > The C++23 escaped string formatting facility would not generate a 
> > > trailing combining character like this. I recommend following suit.
> > > 
> > > Info on U+0335: https://util.unicode.org/UnicodeJsps/character.jsp?a=0335
> > > 
> > This is way outside the scope of the patch. The diagnostic output facility 
> > has no understanding of combining characters or graphemes and do not 
> > attempt to match std::print. It probably would be an improvement but this 
> > patch is not trying to modify how all diagnostics are printed. (all of that 
> > logic is in Diagnostic.cpp)
> This patch is pushing the envelope of what appears in diagnostics. One can 
> also argue that someone writing
> ```
> static_assert(false, "\u0301");
> ```
> gets what they deserve, but that case does not have a big problem anyway 
> (because the provided message text appears after `: `).
> 
> This patch increases the exposure of the diagnostic output facility to input 
> that it does not handle well. I disagree that it is outside the scope of this 
> patch to insist that it does not generate such inputs to the diagnostic 
> output facility (even if a possible solution is to modify the diagnostic 
> output facility first).
@cor3ntin, do you have status quo examples for how grapheme-extending 
characters that are not already "problematic" in their original context are 
emitted in diagnostics in contexts where they are?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-21 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet updated this revision to Diff 552167.
hazohelet marked an inline comment as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/test/Lexer/cxx1z-trigraphs.cpp
  clang/test/SemaCXX/static-assert-cxx26.cpp
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -268,7 +268,31 @@
 return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-   // expected-note {{evaluates to ''c' == 'a''}}
+   // expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+   // expected-note {{n' (0x0A, 10) == '' (0x00, 0)'}}
+  // The note above is intended to match "evaluates to '\n' (0x0A, 10) == '' (0x00, 0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''' (0xFC, -4) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (0xA0, -96) == ' ' (0x20, 32)'}}
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'L'/' (0xFF0F, 65295) == u'�' (0xFFFD, 65533)'}}
+  static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'L'⚾' (0x26BE, 9918) == U'🌍' (0x1F30D, 127757)'}}
+  static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'U'\a' (0x07, 7) == L'\t' (0x09, 9)'}}
+  static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'L'§' (0xA7, 167) == U'Ö' (0xD6, 214)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
 Bad b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' (0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
 
 #if !ENABLED_TRIGRAPHS
 // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (0x3F, 63) == '#' (0x23, 35)'}}
 // expected-error@16 {{}}
 // expected-error@20 {{}}
 #else
Index: clang/lib/Sema/SemaDeclCXX.cpp
===

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-16 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16920-16926
+if (llvm::ConvertCodePointToUTF8(Value, Ptr)) {
+  OS << StringRef(Arr, Ptr - Arr);
+} else {
+  OS << "\\x"
+ << llvm::format_hex_no_prefix(Value, TyWidth / 4,
+   /*Upper=*/true);
+}

Minor nit: Braces no longer needed.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-16 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet updated this revision to Diff 550707.
hazohelet marked 3 inline comments as done.
hazohelet added a comment.

Address comments from Hubert

- Bring back type prefix
- NFC stylistic changes


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/test/Lexer/cxx1z-trigraphs.cpp
  clang/test/SemaCXX/static-assert-cxx26.cpp
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -268,7 +268,31 @@
 return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-   // expected-note {{evaluates to ''c' == 'a''}}
+   // expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+   // expected-note {{n' (0x0A, 10) == '' (0x00, 0)'}}
+  // The note above is intended to match "evaluates to '\n' (0x0A, 10) == '' (0x00, 0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''' (0xFC, -4) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (0xA0, -96) == ' ' (0x20, 32)'}}
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'L'/' (0xFF0F, 65295) == u'�' (0xFFFD, 65533)'}}
+  static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'L'⚾' (0x26BE, 9918) == U'🌍' (0x1F30D, 127757)'}}
+  static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'U'\a' (0x07, 7) == L'\t' (0x09, 9)'}}
+  static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to 'L'§' (0xA7, 167) == U'Ö' (0xD6, 214)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
 Bad b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' (0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
 
 #if !ENABLED_TRIGRAPHS
 // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (0x3F, 63) == '#' (0x23, 35)'}}
 // expected-error@16 {{}}
 // expected-error@20 {{}}
 #else

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-15 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16867-16868
 
+/// Convert character's code unit value to a string.
+/// The code point needs to be zero-extended to 32-bits.
+static void WriteCharValueForDiagnostic(uint32_t Value, const BuiltinType *BTy,

Suggest wording tweaks.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16894-16895
+if (llvm::ConvertCodePointToUTF8(Value, Ptr)) {
+  for (char *I = Arr; I != Ptr; ++I)
+OS << *I;
+} else {

Try using `StringRef`.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16897
+} else {
+  // FIXME: This assumes Unicode literal encodings
+  OS << "\\x"

Since the function interface has been clarified, this part actually doesn't 
need a FIXME. The FIXME should instead be added to the comment above the 
function declaration.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-15 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet updated this revision to Diff 550403.
hazohelet marked 16 inline comments as done.
hazohelet added a comment.

Address some review comments

- Renamed `ConvertCharToString` to `WriteCharValueForDiagnostic`
- Made the function static
- Fixed the printing for unicode 0x80 ~ 0xFF
- Added decimal value next to the hex code


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/test/Lexer/cxx1z-trigraphs.cpp
  clang/test/SemaCXX/static-assert-cxx26.cpp
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -268,7 +268,31 @@
 return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-   // expected-note {{evaluates to ''c' == 'a''}}
+   // expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+   // expected-note {{n' (0x0A, 10) == '' (0x00, 0)'}}
+  // The note above is intended to match "evaluates to '\n' (0x0A, 10) == '' (0x00, 0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''' (0xFC, -4) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (0xA0, -96) == ' ' (0x20, 32)'}}
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to ''ゆ' (0x3086, 12422) == '̵' (0x335, 821)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to ''/' (0xFF0F, 65295) == '�' (0xFFFD, 65533)'}}
+  static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''⚾' (0x26BE, 9918) == '🌍' (0x1F30D, 127757)'}}
+  static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''\a' (0x07, 7) == '\t' (0x09, 9)'}}
+  static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to ''§' (0xA7, 167) == 'Ö' (0xD6, 214)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
 Bad b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' (0x09, 9) == '' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (0x80, 128) == '<85>' (0x85, 133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (0xFEFF, 65279) == '\xDB93' (0xDB93, 56211)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
 
 #if !ENABLED_TRIGRAPHS
 // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-14 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16876
+  // other types.
+  if (CodePoint <= UCHAR_MAX) {
+StringRef Escaped = escapeCStyle(CodePoint);

cor3ntin wrote:
> hubert.reinterpretcast wrote:
> > For types other than `Char_S`, `Char_U`, and `Char8`, this fails to treat 
> > the C1 Controls and Latin-1 Supplement characters as Unicode code points. 
> > It looks like test coverage for these cases are missing.
> `escapeCStyle` is one of the things that assume ASCII / UTF, but yes, we 
> might as well reduce to 0x7F just to avoid unnecessary work
> `escapeCStyle` is one of the things that assume ASCII / UTF, but yes, we 
> might as well reduce to 0x7F just to avoid unnecessary work

I meant (with a `signed char` type to trigger the assertion):
```
:1:28: note: expression evaluates to ''' (0xA2) == '' (0xA2)'
1 | static_assert(u"\u00a2"[0] == '');
  |   ~^
```

should be:
```
:1:28: note: expression evaluates to ''¢' (0xA2) == '' (0xA2)'
1 | static_assert(u"\u00a2"[0] == '');
  |   ~^
```



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-14 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added a comment.

In D155610#4586213 , 
@abhina.sreeskantharajan wrote:

> I've discussed offline with @hubert.reinterpretcast and agree with him that 
> with the addition of fexec-charset support, the set of characters deemed 
> printable will not be accurate when other encodings are used. This will be 
> similar to the printf/scanf format string validation issue I mentioned in my 
> RFC and would require us to reverse the conversion or keep the original 
> string around to check if the character is printable. I don't think we have 
> finalized a solution on how to handle these issues yet.

Furthermore, it is reasonable for the scope of the current patch to focus on 
producing UTF-8 (or UTF-16 for Windows) output to the "terminal".


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-14 Thread Abhina Sree via Phabricator via cfe-commits
abhina.sreeskantharajan added a comment.

I've discussed offline with @hubert.reinterpretcast and agree with him that 
with the addition of fexec-charset support, the set of characters deemed 
printable will not be accurate when other encodings are used. This will be 
similar to the printf/scanf format string validation issue I mentioned in my 
RFC and would require us to reverse the conversion or keep the original string 
around to check if the character is printable. I don't think we have finalized 
a solution on how to handle these issues yet.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-14 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16869
+/// The code point needs to be zero-extended to 32-bits.
+void ConvertCharToString(uint32_t CodePoint, const BuiltinType *BTy,
+ unsigned TyWidth, llvm::raw_ostream ) {

hubert.reinterpretcast wrote:
> It does not seem that the first parameter expects a `CodePoint` argument in 
> all cases. For `Char_S`, `Char_U`, and `Char8`, it seems the function wants 
> to treat the input as a UTF-8 code unit.
> 
> I suggest changing the argument to be clearly a code unit (and potentially 
> treat it as a code point value as appropriate later in the function).
> 
> Also: The function should probably be declared as having static linkage.
> Additionally: The function does not "convert" in the language semantic sense. 
> `WriteCharacterValueDescriptionForDisplay` might be a better name.
Agreed, `CodeUnit` or `Value` would be more correct (mostly because of numeric 
escape sequences).
But if we are going to change that then `WriteCharValueForDiagnostic` would be 
better, `Character` implies too much



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16876
+  // other types.
+  if (CodePoint <= UCHAR_MAX) {
+StringRef Escaped = escapeCStyle(CodePoint);

hubert.reinterpretcast wrote:
> For types other than `Char_S`, `Char_U`, and `Char8`, this fails to treat the 
> C1 Controls and Latin-1 Supplement characters as Unicode code points. It 
> looks like test coverage for these cases are missing.
`escapeCStyle` is one of the things that assume ASCII / UTF, but yes, we might 
as well reduce to 0x7F just to avoid unnecessary work


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-12 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added a comment.

In D155610#4575579 , @cor3ntin wrote:

> @hubert.reinterpretcast It does not, Unicode characters are only escaped in 
> Diagnostics.cpp, and I think this is what we want.

Thanks @cor3ntin for the insight. I agree that this is a separate concern that 
also applies to static assert messages.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-12 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16899
+  uint32_t CodePoint = 
static_cast(V.getInt().getZExtValue());
+  PrintCharLiteralPrefix(BTy->getKind(), OS);
+  OS << '\'';

cor3ntin wrote:
> hubert.reinterpretcast wrote:
> > hubert.reinterpretcast wrote:
> > > cor3ntin wrote:
> > > > Looking at the diagnostics, I don't think it makes sense to print a 
> > > > prefix here. You could just leave that part out.
> > > Why is removing the prefix better? The types can matter (characters 
> > > outside the basic character set are allowed to have negative `char` 
> > > values). Also, moving forward, the value of a character need not be the 
> > > same in the various encodings.
> > Some fun with signedness (imagine a more realistic example with 
> > `ISO-8859-1` ordinary character encoding with a signed `char` type):
> > ```
> > $ clang -Xclang -fwchar-type=short -xc++ -<<<$'static_assert(L"\\uFF10"[0] 
> > == U\'\\uFF10\');'
> > :1:15: error: static assertion failed due to requirement 
> > 'L"\xFF10"[0] == U'\uff10''
> > 1 | static_assert(L"\uFF10"[0] == U'\uFF10');
> >   |   ^
> > :1:28: note: expression evaluates to ''0' (0xFF10) == '0' (0xFF10)'
> > 1 | static_assert(L"\uFF10"[0] == U'\uFF10');
> >   |   ~^~~~
> > 1 error generated.
> > Return:  0x01:1   Fri Aug 11 23:49:02 2023 EDT
> > ```
> Either we care about the actual character - ie `'a'`, or it's value (ie 
> `42`). The motivation for the current patch is to add the value to the 
> diagnostic message.
> I'm also concerned about mixing things that are are are not lexical elements 
> in the diagnostics
Maybe the //motivation// for the current patch is to add the value, but what it 
does (for wide characters as defined in C) is to add the character (and 
obfuscate the value).

Observe the status quo (https://godbolt.org/z/Wc6nKvTMn):
```
note: expression evaluates to '-240 == 65296'
```

From the output higher up (with this patch), we see two "identical" characters 
and values (due to lack of decimal value output). With decimal value output 
added, it will still be potentially confusing why the two identical characters 
have different values (without some sort of type annotation).

I admit that the confusion arises in the status quo treatment of `signed char` 
and `unsigned char`. I hope I am using the word correctly when I say that it is 
ironic that the patch breaks in one context what it seeks to fix in another.




Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16869
+/// The code point needs to be zero-extended to 32-bits.
+void ConvertCharToString(uint32_t CodePoint, const BuiltinType *BTy,
+ unsigned TyWidth, llvm::raw_ostream ) {

It does not seem that the first parameter expects a `CodePoint` argument in all 
cases. For `Char_S`, `Char_U`, and `Char8`, it seems the function wants to 
treat the input as a UTF-8 code unit.

I suggest changing the argument to be clearly a code unit (and potentially 
treat it as a code point value as appropriate later in the function).

Also: The function should probably be declared as having static linkage.
Additionally: The function does not "convert" in the language semantic sense. 
`WriteCharacterValueDescriptionForDisplay` might be a better name.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16876
+  // other types.
+  if (CodePoint <= UCHAR_MAX) {
+StringRef Escaped = escapeCStyle(CodePoint);

For types other than `Char_S`, `Char_U`, and `Char8`, this fails to treat the 
C1 Controls and Latin-1 Supplement characters as Unicode code points. It looks 
like test coverage for these cases are missing.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16930-16936
+case BuiltinType::Char_S:
+case BuiltinType::Char_U:
+case BuiltinType::Char8:
+case BuiltinType::Char16:
+case BuiltinType::Char32:
+case BuiltinType::WChar_S:
+case BuiltinType::WChar_U: {

cor3ntin wrote:
> hubert.reinterpretcast wrote:
> > Add FIXME for `char` and `wchar_t` cases that this assumes Unicode literal 
> > encodings.
> If we wanted such fixme, it should be L1689.
The `ConvertCharToString` has a first parameter called `CodePoint`. With that 
interface[^1], it is sensible to insert conversion from the applicable literal 
encoding to a Unicode code point value here (thus my request for a FIXME here).

You are probably right that the FIXME belongs elsewhere. If you were thinking 
what I am thinking, then I am guessing you meant L16894? That is where the 
`ConvertCharToString` function seems to assume that a `wchar_t` value is 
directly a "code point value". To generate hex escapes, the function needs to 
be passed the original value (including for `char`s, e.g., to handle stray code 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-12 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16899
+  uint32_t CodePoint = 
static_cast(V.getInt().getZExtValue());
+  PrintCharLiteralPrefix(BTy->getKind(), OS);
+  OS << '\'';

hubert.reinterpretcast wrote:
> hubert.reinterpretcast wrote:
> > cor3ntin wrote:
> > > Looking at the diagnostics, I don't think it makes sense to print a 
> > > prefix here. You could just leave that part out.
> > Why is removing the prefix better? The types can matter (characters outside 
> > the basic character set are allowed to have negative `char` values). Also, 
> > moving forward, the value of a character need not be the same in the 
> > various encodings.
> Some fun with signedness (imagine a more realistic example with `ISO-8859-1` 
> ordinary character encoding with a signed `char` type):
> ```
> $ clang -Xclang -fwchar-type=short -xc++ -<<<$'static_assert(L"\\uFF10"[0] == 
> U\'\\uFF10\');'
> :1:15: error: static assertion failed due to requirement 'L"\xFF10"[0] 
> == U'\uff10''
> 1 | static_assert(L"\uFF10"[0] == U'\uFF10');
>   |   ^
> :1:28: note: expression evaluates to ''0' (0xFF10) == '0' (0xFF10)'
> 1 | static_assert(L"\uFF10"[0] == U'\uFF10');
>   |   ~^~~~
> 1 error generated.
> Return:  0x01:1   Fri Aug 11 23:49:02 2023 EDT
> ```
Either we care about the actual character - ie `'a'`, or it's value (ie `42`). 
The motivation for the current patch is to add the value to the diagnostic 
message.
I'm also concerned about mixing things that are are are not lexical elements in 
the diagnostics



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16930-16936
+case BuiltinType::Char_S:
+case BuiltinType::Char_U:
+case BuiltinType::Char8:
+case BuiltinType::Char16:
+case BuiltinType::Char32:
+case BuiltinType::WChar_S:
+case BuiltinType::WChar_U: {

hubert.reinterpretcast wrote:
> Add FIXME for `char` and `wchar_t` cases that this assumes Unicode literal 
> encodings.
If we wanted such fixme, it should be L1689.



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

hubert.reinterpretcast wrote:
> The C++23 escaped string formatting facility would not generate a trailing 
> combining character like this. I recommend following suit.
> 
> Info on U+0335: https://util.unicode.org/UnicodeJsps/character.jsp?a=0335
> 
This is way outside the scope of the patch. The diagnostic output facility has 
no understanding of combining characters or graphemes and do not attempt to 
match std::print. It probably would be an improvement but this patch is not 
trying to modify how all diagnostics are printed. (all of that logic is in 
Diagnostic.cpp)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-11 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16899
+  uint32_t CodePoint = 
static_cast(V.getInt().getZExtValue());
+  PrintCharLiteralPrefix(BTy->getKind(), OS);
+  OS << '\'';

hubert.reinterpretcast wrote:
> cor3ntin wrote:
> > Looking at the diagnostics, I don't think it makes sense to print a prefix 
> > here. You could just leave that part out.
> Why is removing the prefix better? The types can matter (characters outside 
> the basic character set are allowed to have negative `char` values). Also, 
> moving forward, the value of a character need not be the same in the various 
> encodings.
Some fun with signedness (imagine a more realistic example with `ISO-8859-1` 
ordinary character encoding with a signed `char` type):
```
$ clang -Xclang -fwchar-type=short -xc++ -<<<$'static_assert(L"\\uFF10"[0] == 
U\'\\uFF10\');'
:1:15: error: static assertion failed due to requirement 'L"\xFF10"[0] 
== U'\uff10''
1 | static_assert(L"\uFF10"[0] == U'\uFF10');
  |   ^
:1:28: note: expression evaluates to ''0' (0xFF10) == '0' (0xFF10)'
1 | static_assert(L"\uFF10"[0] == U'\uFF10');
  |   ~^~~~
1 error generated.
Return:  0x01:1   Fri Aug 11 23:49:02 2023 EDT
```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-11 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16930-16936
+case BuiltinType::Char_S:
+case BuiltinType::Char_U:
+case BuiltinType::Char8:
+case BuiltinType::Char16:
+case BuiltinType::Char32:
+case BuiltinType::WChar_S:
+case BuiltinType::WChar_U: {

Add FIXME for `char` and `wchar_t` cases that this assumes Unicode literal 
encodings.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16942-16945
+  OS << "' (0x"
+ << llvm::format_hex_no_prefix(CodePoint, /*Width=*/2,
+   /*Upper=*/true)
+ << ')';

@aaron.ballman, hex output hides signedness. I think we want hex //and// 
decimal.



Comment at: clang/test/SemaCXX/static-assert.cpp:287
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+  // expected-note {{evaluates 
to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \

The C++23 escaped string formatting facility would not generate a trailing 
combining character like this. I recommend following suit.

Info on U+0335: https://util.unicode.org/UnicodeJsps/character.jsp?a=0335



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-11 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added inline comments.



Comment at: clang/lib/Sema/SemaDeclCXX.cpp:16899
+  uint32_t CodePoint = 
static_cast(V.getInt().getZExtValue());
+  PrintCharLiteralPrefix(BTy->getKind(), OS);
+  OS << '\'';

cor3ntin wrote:
> Looking at the diagnostics, I don't think it makes sense to print a prefix 
> here. You could just leave that part out.
Why is removing the prefix better? The types can matter (characters outside the 
basic character set are allowed to have negative `char` values). Also, moving 
forward, the value of a character need not be the same in the various encodings.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-10 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added a comment.

In D155610#4575579 , @cor3ntin wrote:

> @hubert.reinterpretcast It does not, Unicode characters are only escaped in 
> Diagnostics.cpp, and I think this is what we want.
> Currently, llvm assume UTF-8 terminal, except on Windows where we convert to 
> UTF-16 and use the wide windows APIs (`raw_fd_ostream::write_impl`).

I am skeptical of the extent to which that assumption is exercised in a 
problematic manner today. The characters being emitted (aside from the [U+0020, 
U+007E] fixed message text itself) generally come from the text of the source 
file, which is generally written using characters that the user can display 
(even if they are not "basic Latin" characters).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-10 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet updated this revision to Diff 548948.
hazohelet marked 2 inline comments as done.
hazohelet added a comment.

Address comments from Aaron

- Use hex code for integer representation of textual types
- NFC stylistic changes


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/test/Lexer/cxx1z-trigraphs.cpp
  clang/test/SemaCXX/static-assert-cxx26.cpp
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -268,7 +268,29 @@
 return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-   // expected-note {{evaluates to ''c' == 'a''}}
+   // expected-note {{evaluates to ''c' (0x63) == 'a' (0x61)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''\t' (0x09) == 'a' (0x61)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+   // expected-note {{n' (0x0A) == '' (0x00)'}}
+  // The note above is intended to match "evaluates to '\n' (0x0A) == '' (0x00)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to '10 == '<85>' (0x85)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''' (0xFC) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (0x80) == '<85>' (0x85)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (0xA0) == ' ' (0x20)'}}
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to ''ゆ' (0x3086) == '̵' (0x335)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+  // expected-note {{evaluates to ''/' (0xFF0F) == '�' (0xFFFD)'}}
+  static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''⚾' (0x26BE) == '🌍' (0x1F30D)'}}
+  static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''\a' (0x07) == '\t' (0x09)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
 Bad b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' (0x09) == '' (0x01)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (0x80) == '<85>' (0x85)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (0xFEFF) == '\xDB93' (0xDB93)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
 
 #if !ENABLED_TRIGRAPHS
 // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (0x3F) == '#' (0x23)'}}
 // expected-error@16 {{}}
 // expected-error@20 {{}}
 #else
Index: clang/lib/Sema/SemaDeclCXX.cpp
===
--- clang/lib/Sema/SemaDeclCXX.cpp
+++ clang/lib/Sema/SemaDeclCXX.cpp
@@ -46,6 +46,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include 

[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-10 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added a comment.

@hubert.reinterpretcast It does not, Unicode characters are only escaped in 
Diagnostics.cpp, and I think this is what we want.
Currently, llvm assume UTF-8 terminal, except on Windows where we convert to 
UTF-16 and use the wide windows APIs (`raw_fd_ostream::write_impl`).

If we want to extend that - IE support EBCDIC, I assume this is your question - 
 we probably would want to modify `pushEscapedString` (Diagnostics.cpp), to 
consider a restricted set of characters as printable.
There are some questions in how we should do that, it could be a compile time 
configuration, or we need a way to 1/ detect the encoding of the environment, 
in a way similar to P1885   2/ construct the 
set of printable characters on that platforms.  
Trying to encode all characters < U+00FF might be a reasonable way to build 
such table.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-09 Thread Hubert Tong via Phabricator via cfe-commits
hubert.reinterpretcast added subscribers: abhina.sreeskantharajan, 
hubert.reinterpretcast.
hubert.reinterpretcast added a comment.

@abhina.sreeskantharajan, does this patch assume too much about the characters 
displayable for diagnostic output?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-04 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:103-137
+- When describing the failure of static assertion of `==` expression, clang 
prints the integer
+  representation of the value as well as its character representation when
+  the user-provided expression is of character type. If the character is
+  non-printable, clang now shows the escpaed character.
+  Clang also prints multi-byte characters if the user-provided expression
+  is of multi-byte character type.
+

hazohelet wrote:
> aaron.ballman wrote:
> > cor3ntin wrote:
> > > aaron.ballman wrote:
> > > > cor3ntin wrote:
> > > > > @aaron.ballman One one hand this is nice, on the other hand maybe too 
> > > > > detailed. What do you think?
> > > > I'm happy with it -- better too much detail than too little, but this 
> > > > really helps users see what's been improved and why it matters.
> > > > 
> > > > That said, I think `0x0A` and `0x1F30D` would arguably be better than 
> > > > printing the values in decimal. For `\n`, perhaps folks remember that 
> > > > it's decimal value 10, but nobody is going to know what `127757` means 
> > > > compared to the hex representation (esp because the value is specified 
> > > > in hex with the prefix printed in the error message). WDYT?
> > > For `wchar_t`, `charN_t` I think that makes sense.
> > > for `char`... hard to know, I think this is mostly useful for people who 
> > > treat char as some kind of integer. I could go either way. using hex 
> > > consistently seems reasonable
> > I don't insist on using hex, but I have a slight preference for using it 
> > consistently everywhere. CC @cjdb for more opinions since this relates to 
> > user experience of diagnostics.
> I generally agree that hex code would be better for characters.
> I think we still have some arguable points.
> 1. Should we print the unsigned code point or the (possibly signed) integer? 
> (e.g. `0xFF` vs `-0x01` for `(char)-1`, on targets where `char` is signed)
> 2. Should we print the hex code when the other subexpression of the `==` 
> expression is not a textual type? (e.g. `0x11` vs `17` for LHS of `(char)17 
> == 11`)
> 
> For 1, I think we should always print unsigned code point for all textual 
> types for consistency. Also we don't want to print `-0x3` for `L'\xFFFD'` on 
> targets where `wchar_t` is signed and 16-bit width (I haven't checked whether 
> that target exists, though).
> For 2, I want to see decimal (possibly signed) integer if the other side of 
> the expression is not textual type.
> Displaying `expression evaluates to ''' (0xFF) == 255'` for the following 
> code would be highly confusing.
> ```
> static_assert((char)-1 == (unsigned char)-1);
> ```
> WDYT?
> Should we print the unsigned code point or the (possibly signed) integer? 
> (e.g. 0xFF vs -0x01 for (char)-1, on targets where char is signed)

Personally, I find -0x01 to be kind of weird and I slightly prefer 0xFF.

> Should we print the hex code when the other subexpression of the == 
> expression is not a textual type? (e.g. 0x11 vs 17 for LHS of (char)17 == 11)

I don't have a strong opinion on this because I think we can come up with 
arguments for either approach. My intuition is that we should just use hex 
values everywhere, but others may have a different opinion.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-04 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:103-137
+- When describing the failure of static assertion of `==` expression, clang 
prints the integer
+  representation of the value as well as its character representation when
+  the user-provided expression is of character type. If the character is
+  non-printable, clang now shows the escpaed character.
+  Clang also prints multi-byte characters if the user-provided expression
+  is of multi-byte character type.
+

aaron.ballman wrote:
> cor3ntin wrote:
> > aaron.ballman wrote:
> > > cor3ntin wrote:
> > > > @aaron.ballman One one hand this is nice, on the other hand maybe too 
> > > > detailed. What do you think?
> > > I'm happy with it -- better too much detail than too little, but this 
> > > really helps users see what's been improved and why it matters.
> > > 
> > > That said, I think `0x0A` and `0x1F30D` would arguably be better than 
> > > printing the values in decimal. For `\n`, perhaps folks remember that 
> > > it's decimal value 10, but nobody is going to know what `127757` means 
> > > compared to the hex representation (esp because the value is specified in 
> > > hex with the prefix printed in the error message). WDYT?
> > For `wchar_t`, `charN_t` I think that makes sense.
> > for `char`... hard to know, I think this is mostly useful for people who 
> > treat char as some kind of integer. I could go either way. using hex 
> > consistently seems reasonable
> I don't insist on using hex, but I have a slight preference for using it 
> consistently everywhere. CC @cjdb for more opinions since this relates to 
> user experience of diagnostics.
I generally agree that hex code would be better for characters.
I think we still have some arguable points.
1. Should we print the unsigned code point or the (possibly signed) integer? 
(e.g. `0xFF` vs `-0x01` for `(char)-1`, on targets where `char` is signed)
2. Should we print the hex code when the other subexpression of the `==` 
expression is not a textual type? (e.g. `0x11` vs `17` for LHS of `(char)17 == 
11`)

For 1, I think we should always print unsigned code point for all textual types 
for consistency. Also we don't want to print `-0x3` for `L'\xFFFD'` on targets 
where `wchar_t` is signed and 16-bit width (I haven't checked whether that 
target exists, though).
For 2, I want to see decimal (possibly signed) integer if the other side of the 
expression is not textual type.
Displaying `expression evaluates to ''' (0xFF) == 255'` for the following 
code would be highly confusing.
```
static_assert((char)-1 == (unsigned char)-1);
```
WDYT?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-03 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:103-137
+- When describing the failure of static assertion of `==` expression, clang 
prints the integer
+  representation of the value as well as its character representation when
+  the user-provided expression is of character type. If the character is
+  non-printable, clang now shows the escpaed character.
+  Clang also prints multi-byte characters if the user-provided expression
+  is of multi-byte character type.
+

cor3ntin wrote:
> aaron.ballman wrote:
> > cor3ntin wrote:
> > > @aaron.ballman One one hand this is nice, on the other hand maybe too 
> > > detailed. What do you think?
> > I'm happy with it -- better too much detail than too little, but this 
> > really helps users see what's been improved and why it matters.
> > 
> > That said, I think `0x0A` and `0x1F30D` would arguably be better than 
> > printing the values in decimal. For `\n`, perhaps folks remember that it's 
> > decimal value 10, but nobody is going to know what `127757` means compared 
> > to the hex representation (esp because the value is specified in hex with 
> > the prefix printed in the error message). WDYT?
> For `wchar_t`, `charN_t` I think that makes sense.
> for `char`... hard to know, I think this is mostly useful for people who 
> treat char as some kind of integer. I could go either way. using hex 
> consistently seems reasonable
I don't insist on using hex, but I have a slight preference for using it 
consistently everywhere. CC @cjdb for more opinions since this relates to user 
experience of diagnostics.



Comment at: clang/test/SemaCXX/static-assert.cpp:280-287
+static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error 
{{failed}} \
+// expected-note {{evaluates 
to ''ゆ' (12422) == '̵' (821)'}}
+static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to 
''/' (65295) == '�' (65533)'}}
+static_assert(L"⚾"[0] == U'', ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''⚾' 
(9918) == '' (127757)'}}
+static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-03 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:103-137
+- When describing the failure of static assertion of `==` expression, clang 
prints the integer
+  representation of the value as well as its character representation when
+  the user-provided expression is of character type. If the character is
+  non-printable, clang now shows the escpaed character.
+  Clang also prints multi-byte characters if the user-provided expression
+  is of multi-byte character type.
+

aaron.ballman wrote:
> cor3ntin wrote:
> > @aaron.ballman One one hand this is nice, on the other hand maybe too 
> > detailed. What do you think?
> I'm happy with it -- better too much detail than too little, but this really 
> helps users see what's been improved and why it matters.
> 
> That said, I think `0x0A` and `0x1F30D` would arguably be better than 
> printing the values in decimal. For `\n`, perhaps folks remember that it's 
> decimal value 10, but nobody is going to know what `127757` means compared to 
> the hex representation (esp because the value is specified in hex with the 
> prefix printed in the error message). WDYT?
For `wchar_t`, `charN_t` I think that makes sense.
for `char`... hard to know, I think this is mostly useful for people who treat 
char as some kind of integer. I could go either way. using hex consistently 
seems reasonable


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-03 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:103-137
+- When describing the failure of static assertion of `==` expression, clang 
prints the integer
+  representation of the value as well as its character representation when
+  the user-provided expression is of character type. If the character is
+  non-printable, clang now shows the escpaed character.
+  Clang also prints multi-byte characters if the user-provided expression
+  is of multi-byte character type.
+

cor3ntin wrote:
> @aaron.ballman One one hand this is nice, on the other hand maybe too 
> detailed. What do you think?
I'm happy with it -- better too much detail than too little, but this really 
helps users see what's been improved and why it matters.

That said, I think `0x0A` and `0x1F30D` would arguably be better than printing 
the values in decimal. For `\n`, perhaps folks remember that it's decimal value 
10, but nobody is going to know what `127757` means compared to the hex 
representation (esp because the value is specified in hex with the prefix 
printed in the error message). WDYT?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-03 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:103-137
+- When describing the failure of static assertion of `==` expression, clang 
prints the integer
+  representation of the value as well as its character representation when
+  the user-provided expression is of character type. If the character is
+  non-printable, clang now shows the escpaed character.
+  Clang also prints multi-byte characters if the user-provided expression
+  is of multi-byte character type.
+

@aaron.ballman One one hand this is nice, on the other hand maybe too detailed. 
What do you think?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-03 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added a comment.

In D155610#4557930 , @hazohelet wrote:

> One concern from my side is that some unicode characters like `U+FEFF` (I 
> added in test) are invisible, but it may not be a big concern because we also 
> display integer representation in parens.

This is not an easy problem to solve. `pushEscapedString` does not escape 
formatting code points, such as `U+FEFF` because doing so break a bunch of 
scripts/emojis.
There are cases where it should probably be escaped but that fully depend on 
context, and it would require grapheme clusterization, which is a lot of work 
for limited value.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-03 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet added a comment.

One concern from my side is that some unicode characters like `U+FEFF` (I added 
in test) are invisible, but it may not be a big concern because we also display 
integer representation in parens.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155610: [Clang][Sema] Fix display of characters on static assertion failure

2023-08-03 Thread Takuya Shimizu via Phabricator via cfe-commits
hazohelet updated this revision to Diff 546832.
hazohelet marked 2 inline comments as done.
hazohelet retitled this revision from "[Clang][ExprConstant] Print integer 
instead of character on static assertion failure" to "[Clang][Sema] Fix display 
of characters on static assertion failure".
hazohelet edited the summary of this revision.
hazohelet added a comment.

Address comments from Corentin

- Remove printing of character type prefix
- Added code example in release note
- Removed unnecessary static_cast


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155610/new/

https://reviews.llvm.org/D155610

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/test/Lexer/cxx1z-trigraphs.cpp
  clang/test/SemaCXX/static-assert-cxx26.cpp
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -262,7 +262,29 @@
 return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-   // expected-note {{evaluates to ''c' == 'a''}}
+   // expected-note {{evaluates to ''c' (99) == 'a' (97)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''\t' (9) == 'a' (97)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+   // expected-note {{n' (10) == '' (0)'}}
+  // The note above is intended to match "evaluates to '\n' (10) == '' (0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to '10 == '<85>' (-123)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''' (-4) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (-128) == '<85>' (-123)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (-96) == ' ' (32)'}}
+static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+// expected-note {{evaluates to ''ゆ' (12422) == '̵' (821)'}}
+static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''/' (65295) == '�' (65533)'}}
+static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''⚾' (9918) == '🌍' (127757)'}}
+static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\a' (7) == '\t' (9)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {
Index: clang/test/SemaCXX/static-assert-cxx26.cpp
===
--- clang/test/SemaCXX/static-assert-cxx26.cpp
+++ clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@
 Bad b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''\t' (9) == '' (1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+   // expected-note {{evaluates to ''<80>' (128) == '<85>' (133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+ // expected-note {{evaluates to ''' (65279) == '\xDB93' (56211)'}}
+}
Index: clang/test/Lexer/cxx1z-trigraphs.cpp
===
--- clang/test/Lexer/cxx1z-trigraphs.cpp
+++ clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@
 
 #if !ENABLED_TRIGRAPHS
 // expected-error@11 {{}} expected-warning@11 {{trigraph ignored}}
-// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' == '#''}}
+// expected-error@13 {{failed}} expected-warning@13 {{trigraph ignored}} expected-note@13 {{evaluates to ''?' (63) == '#' (35)'}}
 // expected-error@16 {{}}
 // expected-error@20 {{}}
 #else
Index: clang/lib/Sema/SemaDeclCXX.cpp