[clang] [clang] Make -dump-tokens option align tokens (PR #164894)

via cfe-commits Sun, 15 Mar 2026 14:38:16 -0700

https://github.com/alexpaniman updated 
https://github.com/llvm/llvm-project/pull/164894


>From 177eda24280d066b211045b83cb52a7204796c11 Mon Sep 17 00:00:00 2001
From: alexpaniman <[email protected]>
Date: Thu, 23 Oct 2025 23:25:45 +0300
Subject: [PATCH 1/4] [clang] Make -dump-tokens option align tokens

---
 clang/lib/Lex/Preprocessor.cpp          | 19 +++++++++++--------
 clang/test/Preprocessor/dump-tokens.cpp | 16 ++++++++++++++++
 2 files changed, 27 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Preprocessor/dump-tokens.cpp

diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index a531f51408dae..115edf84b488f 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -61,6 +61,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MemoryBufferRef.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -240,14 +241,20 @@ void Preprocessor::FinalizeForModelFile() {
 }
 
 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
-  llvm::errs() << tok::getTokenName(Tok.getKind());
+  llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
 
-  if (!Tok.isAnnotation())
-    llvm::errs() << " '" << getSpelling(Tok) << "'";
+  std::string Spelling;
+  if (!Tok.isAnnotation()) {
+    Spelling = llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
+  }
+  llvm::errs() << Spelling;
 
   if (!DumpFlags) return;
 
-  llvm::errs() << "\t";
+  llvm::errs() << "Loc=<";
+  DumpLocation(Tok.getLocation());
+  llvm::errs() << ">";
+
   if (Tok.isAtStartOfLine())
     llvm::errs() << " [StartOfLine]";
   if (Tok.hasLeadingSpace())
@@ -259,10 +266,6 @@ void Preprocessor::DumpToken(const Token &Tok, bool 
DumpFlags) const {
     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
                  << "']";
   }
-
-  llvm::errs() << "\tLoc=<";
-  DumpLocation(Tok.getLocation());
-  llvm::errs() << ">";
 }
 
 void Preprocessor::DumpLocation(SourceLocation Loc) const {
diff --git a/clang/test/Preprocessor/dump-tokens.cpp 
b/clang/test/Preprocessor/dump-tokens.cpp
new file mode 100644
index 0000000000000..3774894943b87
--- /dev/null
+++ b/clang/test/Preprocessor/dump-tokens.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s
+
+->                           // CHECK: arrow            '->'
+5                            // CHECK: numeric_constant '5'
+id                           // CHECK: identifier       'id'
+&                            // CHECK: amp              '&'
+)                            // CHECK: r_paren          ')'
+unsigned                     // CHECK: unsigned         'unsigned'
+~                            // CHECK: tilde            '~'
+long_variable_name_very_long // CHECK: identifier       
'long_variable_name_very_long'
+union                        // CHECK: union            'union'
+42                           // CHECK: numeric_constant '42'
+j                            // CHECK: identifier       'j'
+&=                           // CHECK: ampequal         '&='
+15                           // CHECK: numeric_constant '15'
+

>From fd1edc5312f9da6a3a2ed018d7f759a5293e2108 Mon Sep 17 00:00:00 2001
From: alexpaniman <[email protected]>
Date: Fri, 24 Oct 2025 19:52:33 +0300
Subject: [PATCH 2/4] [clang] Remove unnecessary variable from
 Preprocessor::DumpToken

---
 clang/lib/Lex/Preprocessor.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 115edf84b488f..4b88fbada5add 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -243,11 +243,9 @@ void Preprocessor::FinalizeForModelFile() {
 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
   llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
 
-  std::string Spelling;
   if (!Tok.isAnnotation()) {
-    Spelling = llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
+    llvm::errs() << llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
   }
-  llvm::errs() << Spelling;
 
   if (!DumpFlags) return;
 

>From f0f266aa535c5c571f6533ba1c1d433ca06f91a7 Mon Sep 17 00:00:00 2001
From: alexpaniman <[email protected]>
Date: Fri, 24 Oct 2025 20:10:44 +0300
Subject: [PATCH 3/4] [clang] Ensure consistent spacing for annotations too in
 Preprocessor::DumpToken

---
 clang/lib/Lex/Preprocessor.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 4b88fbada5add..c744a6ef54765 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -243,10 +243,13 @@ void Preprocessor::FinalizeForModelFile() {
 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
   llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
 
+  std::string Spelling;
   if (!Tok.isAnnotation()) {
-    llvm::errs() << llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'");
+    Spelling = "'" + getSpelling(Tok) + "'";
   }
 
+  llvm::errs() << llvm::formatv("{0,-32} ", Spelling);
+
   if (!DumpFlags) return;
 
   llvm::errs() << "Loc=<";

>From 28d0870612ec50ec1c65a92f64406c95dfaf5539 Mon Sep 17 00:00:00 2001
From: alexpaniman <[email protected]>
Date: Sun, 15 Mar 2026 20:47:10 +0300
Subject: [PATCH 4/4] [clang] Escape multiline tokens and align trailing
 markers in DumpToken

---
 clang/lib/Lex/Preprocessor.cpp          | 51 +++++++++++++++++++++----
 clang/test/Preprocessor/dump-tokens.cpp | 46 +++++++++++++++-------
 2 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index c744a6ef54765..2eb688feb29dd 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -241,21 +241,59 @@ void Preprocessor::FinalizeForModelFile() {
 }
 
 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
-  llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
+  std::string TokenStr;
+  llvm::raw_string_ostream OS(TokenStr);
 
-  std::string Spelling;
+  // The alignment of 16 is chosen to comfortably fit most identifiers.
+  OS << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
+
+  // Annotation tokens are just markers that don't have a spelling -- they
+  // indicate where something expanded.
   if (!Tok.isAnnotation()) {
-    Spelling = "'" + getSpelling(Tok) + "'";
+    OS << "'";
+    // Escape string to prevent token spelling from spanning multiple lines.
+    OS.write_escaped(getSpelling(Tok));
+    OS << "'";
   }
 
-  llvm::errs() << llvm::formatv("{0,-32} ", Spelling);
+  // The alignment of 48 (32 characters for the spelling + the 16 for
+  // the identifier name) fits most variable names, keywords and annotations.
+  llvm::errs() << llvm::formatv("{0,-48} ", OS.str());
 
   if (!DumpFlags) return;
 
+  auto Loc = Tok.getLocation();
   llvm::errs() << "Loc=<";
-  DumpLocation(Tok.getLocation());
+  DumpLocation(Loc);
   llvm::errs() << ">";
 
+  // If the token points directly to a file location (i.e. not a macro
+  // expansion), then add additional padding so that trailing markers
+  // align, provided the line/column numbers are reasonably sized.
+  //
+  // Otherwise, if it's a macro expansion, don't bother with alignment,
+  // as the line will include multiple locations and be very long.
+  //
+  // NOTE: To keep this stateless, it doesn't account for filename
+  // length, so when a header starts markers will be temporarily misaligned.
+  if (Loc.isFileID()) {
+    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
+
+    if (!PLoc.isInvalid()) {
+      int LineWidth = llvm::utostr(PLoc.getLine()).size();
+      int ColumnWidth = llvm::utostr(PLoc.getColumn()).size();
+
+      // Reserve space for lines up to 9999 and columns up to 99,
+      // which is 4 + 2 = 6 characters in total.
+      const int ReservedSpace = 6;
+
+      int LeftSpace = ReservedSpace - LineWidth - ColumnWidth;
+      int Padding = std::max<int>(0, LeftSpace);
+
+      llvm::errs().indent(Padding);
+    }
+  }
+
   if (Tok.isAtStartOfLine())
     llvm::errs() << " [StartOfLine]";
   if (Tok.hasLeadingSpace())
@@ -264,8 +302,7 @@ void Preprocessor::DumpToken(const Token &Tok, bool 
DumpFlags) const {
     llvm::errs() << " [ExpandDisabled]";
   if (Tok.needsCleaning()) {
     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
-    llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
-                 << "']";
+    llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']";
   }
 }
 
diff --git a/clang/test/Preprocessor/dump-tokens.cpp 
b/clang/test/Preprocessor/dump-tokens.cpp
index 3774894943b87..0a9d459688922 100644
--- a/clang/test/Preprocessor/dump-tokens.cpp
+++ b/clang/test/Preprocessor/dump-tokens.cpp
@@ -1,16 +1,34 @@
-// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s --strict-whitespace
 
-->                           // CHECK: arrow            '->'
-5                            // CHECK: numeric_constant '5'
-id                           // CHECK: identifier       'id'
-&                            // CHECK: amp              '&'
-)                            // CHECK: r_paren          ')'
-unsigned                     // CHECK: unsigned         'unsigned'
-~                            // CHECK: tilde            '~'
-long_variable_name_very_long // CHECK: identifier       
'long_variable_name_very_long'
-union                        // CHECK: union            'union'
-42                           // CHECK: numeric_constant '42'
-j                            // CHECK: identifier       'j'
-&=                           // CHECK: ampequal         '&='
-15                           // CHECK: numeric_constant '15'
+// Different kinds of identifiers with different spelling lengths
+->                                  // CHECK:      arrow            '->'       
                     Loc=<{{.*}}:4:1>     [StartOfLine]
+5                                   // CHECK-NEXT: numeric_constant '5'        
                     Loc=<{{.*}}:5:1>     [StartOfLine]
+id                                  // CHECK-NEXT: identifier       'id'       
                     Loc=<{{.*}}:6:1>     [StartOfLine]
+&                                   // CHECK-NEXT: amp              '&'        
                     Loc=<{{.*}}:7:1>     [StartOfLine]
+)                                   // CHECK-NEXT: r_paren          ')'        
                     Loc=<{{.*}}:8:1>     [StartOfLine]
+unsigned                            // CHECK-NEXT: unsigned         'unsigned' 
                     Loc=<{{.*}}:9:1>     [StartOfLine]
+~                                   // CHECK-NEXT: tilde            '~'        
                     Loc=<{{.*}}:10:1>    [StartOfLine]
+long_variable_name_very_long        // CHECK-NEXT: identifier       
'long_variable_name_very_long'  Loc=<{{.*}}:11:1>    [StartOfLine]
+union                               // CHECK-NEXT: union            'union'    
                     Loc=<{{.*}}:12:1>    [StartOfLine]
+42                                  // CHECK-NEXT: numeric_constant '42'       
                     Loc=<{{.*}}:13:1>    [StartOfLine]
+j                                   // CHECK-NEXT: identifier       'j'        
                     Loc=<{{.*}}:14:1>    [StartOfLine]
+&=                                  // CHECK-NEXT: ampequal         '&='       
                     Loc=<{{.*}}:15:1>    [StartOfLine]
+15                                  // CHECK-NEXT: numeric_constant '15'       
                     Loc=<{{.*}}:16:1>    [StartOfLine]
+
+// Different locations in line and trailing markers
+ at different locations= in line    // CHECK-NEXT: identifier       'at'       
                     Loc=<{{.*}}:19:2>    [StartOfLine] [LeadingSpace]
+                                    // CHECK-NEXT: identifier       
'different'                     Loc=<{{.*}}:19:5>    [LeadingSpace]
+                                    // CHECK-NEXT: identifier       
'locations'                     Loc=<{{.*}}:19:15>   [LeadingSpace]
+                                    // CHECK-NEXT: equal            '='        
                     Loc=<{{.*}}:19:24>
+                                    // CHECK-NEXT: identifier       'in'       
                     Loc=<{{.*}}:19:26>   [LeadingSpace]
+                                    // CHECK-NEXT: identifier       'line'     
                     Loc=<{{.*}}:19:29>   [LeadingSpace]
+
+// Tokens that require escaping & annotations
+#pragma clang __debug parser_crash  // CHECK-NEXT: annot_pragma_parser_crash   
                     Loc=<{{.*}}:27:23>
+                                    // CHECK-NEXT: eod              '\n'       
                     Loc=<{{.*}}:27:119>  [LeadingSpace]
+#pragma clang __debug captured      // CHECK-NEXT: annot_pragma_captured       
                     Loc=<{{.*}}:29:120>
+#pragma clang __debug dump X        // CHECK-NEXT: annot_pragma_dump           
                     Loc=<{{.*}}:30:23>
+                                    // CHECK-NEXT: identifier       'X'        
                     Loc=<{{.*}}:30:28>   [LeadingSpace]
+                                    // CHECK-NEXT: eod              '\n'       
                     Loc=<{{.*}}:30:119>  [LeadingSpace]
+                                    // CHECK-NEXT: eof              ''         
                     Loc=<{{.*}}:34:1>
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Make -dump-tokens option align tokens (PR #164894)

Reply via email to