lichray updated this revision to Diff 391833.
lichray marked an inline comment as done.
lichray added a comment.

- Switch to `llvm::SmallString`
- Refactor code that prints C-style builtin escape sequences
- Stop printing strings with embedded NULs in NTTP types
- Add an `EntireContentsOfLargeArray` pretty-print policy


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D115031/new/

https://reviews.llvm.org/D115031

Files:
  clang/include/clang/AST/PrettyPrinter.h
  clang/include/clang/Basic/CharInfo.h
  clang/lib/AST/APValue.cpp
  clang/lib/AST/Expr.cpp
  clang/test/SemaTemplate/temp_arg_string_printing.cpp

Index: clang/test/SemaTemplate/temp_arg_string_printing.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaTemplate/temp_arg_string_printing.cpp
@@ -0,0 +1,141 @@
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -ast-print %s | FileCheck %s
+
+using size_t = __SIZE_TYPE__;
+static_assert(__has_builtin(__make_integer_seq));
+
+template <class T, T... I> class idx_seq {};
+template <size_t N> using make_idx_seq = __make_integer_seq<idx_seq, size_t, N>;
+
+template <class CharT, size_t N>
+struct Str {
+  constexpr Str(CharT const (&s)[N]) : Str(s, make_idx_seq<N>()) {}
+  CharT value[N];
+
+private:
+  template <size_t... I>
+  constexpr Str(CharT const (&s)[N], idx_seq<size_t, I...>) : value{s[I]...} {}
+};
+
+template <Str> class ASCII {};
+
+void not_string() {
+  // CHECK{LITERAL}: ASCII<{{9, -1, 42}}>
+  new ASCII<(int[]){9, -1, 42}>;
+  // CHECK{LITERAL}: ASCII<{{3.140000e+00, 0.000000e+00, 4.200000e+01}}>
+  new ASCII<(double[]){3.14, 0., 42.}>;
+}
+
+void narrow() {
+  // CHECK{LITERAL}: ASCII<{""}>
+  new ASCII<"">;
+  // CHECK{LITERAL}: ASCII<{"the quick brown fox jumps"}>
+  new ASCII<"the quick brown fox jumps">;
+  // CHECK{LITERAL}: ASCII<{"OVER THE LAZY DOG 0123456789"}>
+  new ASCII<"OVER THE LAZY DOG 0123456789">;
+  // CHECK{LITERAL}: ASCII<{"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}>
+  new ASCII<R"(\`~!@#$%^&*()_+-={}[]|'";:,.<>?/)">;
+  // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}>
+  new ASCII<"escape\0">;
+  // CHECK{LITERAL}: ASCII<{"escape\r\n"}>
+  new ASCII<"escape\r\n">;
+  // CHECK{LITERAL}: ASCII<{"escape\\\t\f\v"}>
+  new ASCII<"escape\\\t\f\v">;
+  // CHECK{LITERAL}: ASCII<{"escape\a\bc"}>
+  new ASCII<"escape\a\b\c">;
+  // CHECK{LITERAL}: ASCII<{{110, 111, 116, 17, 0}}>
+  new ASCII<"not\x11">;
+  // CHECK{LITERAL}: ASCII<{{18, 20, 127, 16, 1, 32, 97, 98, 99, 0}}>
+  new ASCII<"\x12\x14\x7f\x10\x01 abc">;
+  // CHECK{LITERAL}: ASCII<{{18, 20, 127, 16, 1, 32, 97, 98, 99, 100, ...}}>
+  new ASCII<"\x12\x14\x7f\x10\x01 abcd">;
+  // CHECK{LITERAL}: ASCII<{"print more characters as string"}>
+  new ASCII<"print more characters as string">;
+  // CHECK{LITERAL}: ASCII<{"print even more characters as string"}>
+  new ASCII<"print even more characters as string">;
+  // CHECK{LITERAL}: ASCII<{"print many characters no more than[...]"}>
+  new ASCII<"print many characters no more than a limit">;
+  // CHECK{LITERAL}: ASCII<{"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r"}>
+  new ASCII<"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r">;
+  // CHECK{LITERAL}: ASCII<{"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n[...]"}>
+  new ASCII<"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n">;
+}
+
+void wide() {
+  // CHECK{LITERAL}: ASCII<{L""}>
+  new ASCII<L"">;
+  // CHECK{LITERAL}: ASCII<{L"the quick brown fox jumps"}>
+  new ASCII<L"the quick brown fox jumps">;
+  // CHECK{LITERAL}: ASCII<{L"OVER THE LAZY DOG 0123456789"}>
+  new ASCII<L"OVER THE LAZY DOG 0123456789">;
+  // CHECK{LITERAL}: ASCII<{L"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}>
+  new ASCII<LR"(\`~!@#$%^&*()_+-={}[]|'";:,.<>?/)">;
+  // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}>
+  new ASCII<L"escape\0">;
+  // CHECK{LITERAL}: ASCII<{L"escape\r\n"}>
+  new ASCII<L"escape\r\n">;
+  // CHECK{LITERAL}: ASCII<{L"escape\\\t\f\v"}>
+  new ASCII<L"escape\\\t\f\v">;
+  // CHECK{LITERAL}: ASCII<{L"escape\a\bc"}>
+  new ASCII<L"escape\a\b\c">;
+  // CHECK{LITERAL}: ASCII<{{110, 111, 116, 17, 0}}>
+  new ASCII<L"not\x11">;
+  // CHECK{LITERAL}: ASCII<{{18, 20, 255, 22909, 136, 32, 97, 98, 99, 0}}>
+  new ASCII<L"\x12\x14\xff\x597d\x88 abc">;
+  // CHECK{LITERAL}: ASCII<{{18, 20, 255, 22909, 136, 32, 97, 98, 99, 100, ...}}>
+  new ASCII<L"\x12\x14\xff\x597d\x88 abcd">;
+  // CHECK{LITERAL}: ASCII<{L"print more characters as string"}>
+  new ASCII<L"print more characters as string">;
+  // CHECK{LITERAL}: ASCII<{L"print even more characters as string"}>
+  new ASCII<L"print even more characters as string">;
+  // CHECK{LITERAL}: ASCII<{L"print many characters no more than[...]"}>
+  new ASCII<L"print many characters no more than a limit">;
+  // CHECK{LITERAL}: ASCII<{L"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r"}>
+  new ASCII<L"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r">;
+  // CHECK{LITERAL}: ASCII<{L"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n[...]"}>
+  new ASCII<L"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n">;
+}
+
+void utf8() {
+  // CHECK{LITERAL}: ASCII<{u8""}>
+  new ASCII<u8"">;
+  // CHECK{LITERAL}: ASCII<{u8"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}>
+  new ASCII<u8R"(\`~!@#$%^&*()_+-={}[]|'";:,.<>?/)">;
+  // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}>
+  new ASCII<u8"escape\0">;
+  // CHECK{LITERAL}: ASCII<{u8"escape\r\n"}>
+  new ASCII<u8"escape\r\n">;
+  // CHECK{LITERAL}: ASCII<{{229, 165, 189, 239, 191, 189, 0}}>
+  new ASCII<u8"\u597d\ufffd">;
+  // CHECK{LITERAL}: ASCII<{u8"print many characters no more than[...]"}>
+  new ASCII<u8"print many characters no more than a limit">;
+}
+
+void utf16() {
+  // CHECK{LITERAL}: ASCII<{u""}>
+  new ASCII<u"">;
+  // CHECK{LITERAL}: ASCII<{u"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}>
+  new ASCII<uR"(\`~!@#$%^&*()_+-={}[]|'";:,.<>?/)">;
+  // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}>
+  new ASCII<u"escape\0">;
+  // CHECK{LITERAL}: ASCII<{u"escape\r\n"}>
+  new ASCII<u"escape\r\n">;
+  // CHECK{LITERAL}: ASCII<{{22909, 65533, 0}}>
+  new ASCII<u"\u597d\ufffd">;
+  // CHECK{LITERAL}: ASCII<{u"print many characters no more than[...]"}>
+  new ASCII<u"print many characters no more than a limit">;
+}
+
+void utf32() {
+  // CHECK{LITERAL}: ASCII<{U""}>
+  new ASCII<U"">;
+  // CHECK{LITERAL}: ASCII<{U"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}>
+  new ASCII<UR"(\`~!@#$%^&*()_+-={}[]|'";:,.<>?/)">;
+  // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}>
+  new ASCII<U"escape\0">;
+  // CHECK{LITERAL}: ASCII<{U"escape\r\n"}>
+  new ASCII<U"escape\r\n">;
+  // CHECK{LITERAL}: ASCII<{{22909, 131358, 0}}>
+  new ASCII<U"\u597d\U0002011E">;
+  // CHECK{LITERAL}: ASCII<{U"print many characters no more than[...]"}>
+  new ASCII<U"print many characters no more than a limit">;
+}
Index: clang/lib/AST/Expr.cpp
===================================================================
--- clang/lib/AST/Expr.cpp
+++ clang/lib/AST/Expr.cpp
@@ -943,40 +943,10 @@
     break;
   }
 
-  switch (Val) {
-  case '\\':
-    OS << "'\\\\'";
-    break;
-  case '\'':
-    OS << "'\\''";
-    break;
-  case '\a':
-    // TODO: K&R: the meaning of '\\a' is different in traditional C
-    OS << "'\\a'";
-    break;
-  case '\b':
-    OS << "'\\b'";
-    break;
-  // Nonstandard escape sequence.
-  /*case '\e':
-    OS << "'\\e'";
-    break;*/
-  case '\f':
-    OS << "'\\f'";
-    break;
-  case '\n':
-    OS << "'\\n'";
-    break;
-  case '\r':
-    OS << "'\\r'";
-    break;
-  case '\t':
-    OS << "'\\t'";
-    break;
-  case '\v':
-    OS << "'\\v'";
-    break;
-  default:
+  auto Escaped = escapeCStyle<EscapeChar::Single>(Val);
+  if (!Escaped.empty()) {
+    OS << "'" << Escaped << "'";
+  } else {
     // A character literal might be sign-extended, which
     // would result in an invalid \U escape sequence.
     // FIXME: multicharacter literals such as '\xFF\xFF\xFF\xFF'
@@ -1146,8 +1116,9 @@
 
   unsigned LastSlashX = getLength();
   for (unsigned I = 0, N = getLength(); I != N; ++I) {
-    switch (uint32_t Char = getCodeUnit(I)) {
-    default:
+    uint32_t Char = getCodeUnit(I);
+    auto Escaped = escapeCStyle<EscapeChar::Double>(Char);
+    if (Escaped.empty()) {
       // FIXME: Convert UTF-8 back to codepoints before rendering.
 
       // Convert UTF-16 surrogate pairs back to codepoints before rendering.
@@ -1175,7 +1146,7 @@
           for (/**/; Shift >= 0; Shift -= 4)
             OS << Hex[(Char >> Shift) & 15];
           LastSlashX = I;
-          break;
+          continue;
         }
 
         if (Char > 0xffff)
@@ -1188,7 +1159,7 @@
            << Hex[(Char >>  8) & 15]
            << Hex[(Char >>  4) & 15]
            << Hex[(Char >>  0) & 15];
-        break;
+        continue;
       }
 
       // If we used \x... for the previous character, and this character is a
@@ -1213,17 +1184,9 @@
            << (char)('0' + ((Char >> 6) & 7))
            << (char)('0' + ((Char >> 3) & 7))
            << (char)('0' + ((Char >> 0) & 7));
-      break;
-    // Handle some common non-printable cases to make dumps prettier.
-    case '\\': OS << "\\\\"; break;
-    case '"': OS << "\\\""; break;
-    case '\a': OS << "\\a"; break;
-    case '\b': OS << "\\b"; break;
-    case '\f': OS << "\\f"; break;
-    case '\n': OS << "\\n"; break;
-    case '\r': OS << "\\r"; break;
-    case '\t': OS << "\\t"; break;
-    case '\v': OS << "\\v"; break;
+    } else {
+      // Handle some common non-printable cases to make dumps prettier.
+      OS << Escaped;
     }
   }
   OS << '"';
Index: clang/lib/AST/APValue.cpp
===================================================================
--- clang/lib/AST/APValue.cpp
+++ clang/lib/AST/APValue.cpp
@@ -625,6 +625,64 @@
   return V.convertToDouble();
 }
 
+static bool TryPrintAsStringLiteral(raw_ostream &Out,
+                                    const PrintingPolicy &Policy,
+                                    const ArrayType *ATy, const APValue *Data,
+                                    size_t Size) {
+  if (Size == 0)
+    return false;
+
+  QualType Ty = ATy->getElementType();
+  if (!Ty->isAnyCharacterType())
+    return false;
+
+  // Nothing we can do about a sequence that is not null-terminated
+  if (!Data[--Size].getInt().isZero())
+    return false;
+
+  llvm::SmallString<40> Buf;
+  Buf.push_back('"');
+
+  // Better than printing a two-digit sequence of 10 integers.
+  constexpr size_t MaxN = 36;
+  StringRef Ellipsis;
+  if (Size > MaxN && !Policy.EntireContentsOfLargeArray) {
+    Ellipsis = "[...]";
+    Size = std::min(MaxN - Ellipsis.size() / 2, Size);
+  }
+
+  for (auto &Val : ArrayRef<const APValue>(Data, Size)) {
+    auto Char64 = Val.getInt().getExtValue();
+    if (!isASCII(Char64))
+      return false; // Bye bye, see you in integers.
+    auto Ch = static_cast<unsigned char>(Char64);
+    // The diagnostic message is 'quoted'
+    auto Escaped = escapeCStyle<EscapeChar::SingleAndDouble>(Ch);
+    if (Escaped.empty()) {
+      if (!isPrintable(Ch))
+        return false;
+      Buf.emplace_back(Ch);
+    } else {
+      Buf.append(Escaped);
+    }
+  }
+
+  Buf.append(Ellipsis);
+  Buf.push_back('"');
+
+  if (Ty->isWideCharType())
+    Out << 'L';
+  else if (Ty->isChar8Type())
+    Out << "u8";
+  else if (Ty->isChar16Type())
+    Out << 'u';
+  else if (Ty->isChar32Type())
+    Out << 'U';
+
+  Out << Buf;
+  return true;
+}
+
 void APValue::printPretty(raw_ostream &Out, const ASTContext &Ctx,
                           QualType Ty) const {
   printPretty(Out, Ctx.getPrintingPolicy(), Ty, &Ctx);
@@ -795,17 +853,23 @@
   }
   case APValue::Array: {
     const ArrayType *AT = Ty->castAsArrayTypeUnsafe();
+    unsigned N = getArrayInitializedElts();
+    if (N != 0 &&
+        TryPrintAsStringLiteral(Out, Policy, AT, &getArrayInitializedElt(0), N))
+      return;
     QualType ElemTy = AT->getElementType();
     Out << '{';
-    if (unsigned N = getArrayInitializedElts()) {
-      getArrayInitializedElt(0).printPretty(Out, Policy, ElemTy, Ctx);
-      for (unsigned I = 1; I != N; ++I) {
+    unsigned I = 0;
+    switch (N) {
+    case 0:
+      for (; I != N; ++I) {
         Out << ", ";
-        if (I == 10) {
-          // Avoid printing out the entire contents of large arrays.
-          Out << "...";
-          break;
+        if (I == 10 && !Policy.EntireContentsOfLargeArray) {
+          Out << "...}";
+          return;
         }
+        LLVM_FALLTHROUGH;
+      default:
         getArrayInitializedElt(I).printPretty(Out, Policy, ElemTy, Ctx);
       }
     }
Index: clang/include/clang/Basic/CharInfo.h
===================================================================
--- clang/include/clang/Basic/CharInfo.h
+++ clang/include/clang/Basic/CharInfo.h
@@ -38,15 +38,16 @@
   };
 } // end namespace charinfo
 
-/// Returns true if this is an ASCII character.
+/// Returns true if a byte is an ASCII character.
 LLVM_READNONE inline bool isASCII(char c) {
   return static_cast<unsigned char>(c) <= 127;
 }
 
 LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; }
 
-/// Returns true if this is an ASCII character.
+/// Returns true if a codepoint is an ASCII character.
 LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; }
+LLVM_READNONE inline bool isASCII(int64_t c) { return c <= 127; }
 
 /// Returns true if this is a valid first character of a C identifier,
 /// which is [a-zA-Z_].
@@ -162,6 +163,44 @@
                           CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0;
 }
 
+enum class EscapeChar {
+  Single = 1,
+  Double = 2,
+  SingleAndDouble = static_cast<int>(Single) | static_cast<int>(Double),
+};
+
+/// Return C-style escaped string for special characters, or an empty string if
+/// there is no such mapping.
+template <EscapeChar Opt, class CharT>
+LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef {
+  switch (Ch) {
+  case '\\':
+    return "\\\\";
+  case '\'':
+    if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0)
+      break;
+    return "\\'";
+  case '"':
+    if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0)
+      break;
+    return "\\\"";
+  case '\a':
+    return "\\a";
+  case '\b':
+    return "\\b";
+  case '\f':
+    return "\\f";
+  case '\n':
+    return "\\n";
+  case '\r':
+    return "\\r";
+  case '\t':
+    return "\\t";
+  case '\v':
+    return "\\v";
+  }
+  return {};
+}
 
 /// Converts the given ASCII character to its lowercase equivalent.
 ///
Index: clang/include/clang/AST/PrettyPrinter.h
===================================================================
--- clang/include/clang/AST/PrettyPrinter.h
+++ clang/include/clang/AST/PrettyPrinter.h
@@ -75,7 +75,8 @@
         MSVCFormatting(false), ConstantsAsWritten(false),
         SuppressImplicitBase(false), FullyQualifiedName(false),
         PrintCanonicalTypes(false), PrintInjectedClassNameWithArguments(true),
-        UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false) {}
+        UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false),
+        EntireContentsOfLargeArray(false) {}
 
   /// Adjust this printing policy for cases where it's known that we're
   /// printing C++ code (for instance, if AST dumping reaches a C++-only
@@ -282,6 +283,10 @@
   /// parameters.
   unsigned AlwaysIncludeTypeForTemplateArgument : 1;
 
+  /// Whether to print the entire array initializers, especially on non-type
+  /// template parameters, no matter how many elements there are.
+  unsigned EntireContentsOfLargeArray : 1;
+
   /// Callbacks to use to allow the behavior of printing to be customized.
   const PrintingCallbacks *Callbacks = nullptr;
 };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to