https://github.com/alexpaniman updated https://github.com/llvm/llvm-project/pull/164894
>From 177eda24280d066b211045b83cb52a7204796c11 Mon Sep 17 00:00:00 2001 From: alexpaniman <[email protected]> Date: Thu, 23 Oct 2025 23:25:45 +0300 Subject: [PATCH 1/4] [clang] Make -dump-tokens option align tokens --- clang/lib/Lex/Preprocessor.cpp | 19 +++++++++++-------- clang/test/Preprocessor/dump-tokens.cpp | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 clang/test/Preprocessor/dump-tokens.cpp diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index a531f51408dae..115edf84b488f 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -61,6 +61,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/SaveAndRestore.h" @@ -240,14 +241,20 @@ void Preprocessor::FinalizeForModelFile() { } void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { - llvm::errs() << tok::getTokenName(Tok.getKind()); + llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind())); - if (!Tok.isAnnotation()) - llvm::errs() << " '" << getSpelling(Tok) << "'"; + std::string Spelling; + if (!Tok.isAnnotation()) { + Spelling = llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'"); + } + llvm::errs() << Spelling; if (!DumpFlags) return; - llvm::errs() << "\t"; + llvm::errs() << "Loc=<"; + DumpLocation(Tok.getLocation()); + llvm::errs() << ">"; + if (Tok.isAtStartOfLine()) llvm::errs() << " [StartOfLine]"; if (Tok.hasLeadingSpace()) @@ -259,10 +266,6 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']"; } - - llvm::errs() << "\tLoc=<"; - DumpLocation(Tok.getLocation()); - llvm::errs() << ">"; } void Preprocessor::DumpLocation(SourceLocation Loc) const { diff --git a/clang/test/Preprocessor/dump-tokens.cpp b/clang/test/Preprocessor/dump-tokens.cpp new file mode 100644 index 0000000000000..3774894943b87 --- /dev/null +++ b/clang/test/Preprocessor/dump-tokens.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s + +-> // CHECK: arrow '->' +5 // CHECK: numeric_constant '5' +id // CHECK: identifier 'id' +& // CHECK: amp '&' +) // CHECK: r_paren ')' +unsigned // CHECK: unsigned 'unsigned' +~ // CHECK: tilde '~' +long_variable_name_very_long // CHECK: identifier 'long_variable_name_very_long' +union // CHECK: union 'union' +42 // CHECK: numeric_constant '42' +j // CHECK: identifier 'j' +&= // CHECK: ampequal '&=' +15 // CHECK: numeric_constant '15' + >From fd1edc5312f9da6a3a2ed018d7f759a5293e2108 Mon Sep 17 00:00:00 2001 From: alexpaniman <[email protected]> Date: Fri, 24 Oct 2025 19:52:33 +0300 Subject: [PATCH 2/4] [clang] Remove unnecessary variable from Preprocessor::DumpToken --- clang/lib/Lex/Preprocessor.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 115edf84b488f..4b88fbada5add 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -243,11 +243,9 @@ void Preprocessor::FinalizeForModelFile() { void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind())); - std::string Spelling; if (!Tok.isAnnotation()) { - Spelling = llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'"); + llvm::errs() << llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'"); } - llvm::errs() << Spelling; if (!DumpFlags) return; >From f0f266aa535c5c571f6533ba1c1d433ca06f91a7 Mon Sep 17 00:00:00 2001 From: alexpaniman <[email protected]> Date: Fri, 24 Oct 2025 20:10:44 +0300 Subject: [PATCH 3/4] [clang] Ensure consistent spacing for annotations too in Preprocessor::DumpToken --- clang/lib/Lex/Preprocessor.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 4b88fbada5add..c744a6ef54765 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -243,10 +243,13 @@ void Preprocessor::FinalizeForModelFile() { void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind())); + std::string Spelling; if (!Tok.isAnnotation()) { - llvm::errs() << llvm::formatv("{0,-32} ", "'" + getSpelling(Tok) + "'"); + Spelling = "'" + getSpelling(Tok) + "'"; } + llvm::errs() << llvm::formatv("{0,-32} ", Spelling); + if (!DumpFlags) return; llvm::errs() << "Loc=<"; >From 28d0870612ec50ec1c65a92f64406c95dfaf5539 Mon Sep 17 00:00:00 2001 From: alexpaniman <[email protected]> Date: Sun, 15 Mar 2026 20:47:10 +0300 Subject: [PATCH 4/4] [clang] Escape multiline tokens and align trailing markers in DumpToken --- clang/lib/Lex/Preprocessor.cpp | 51 +++++++++++++++++++++---- clang/test/Preprocessor/dump-tokens.cpp | 46 +++++++++++++++------- 2 files changed, 76 insertions(+), 21 deletions(-) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index c744a6ef54765..2eb688feb29dd 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -241,21 +241,59 @@ void Preprocessor::FinalizeForModelFile() { } void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { - llvm::errs() << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind())); + std::string TokenStr; + llvm::raw_string_ostream OS(TokenStr); - std::string Spelling; + // The alignment of 16 is chosen to comfortably fit most identifiers. + OS << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind())); + + // Annotation tokens are just markers that don't have a spelling -- they + // indicate where something expanded. if (!Tok.isAnnotation()) { - Spelling = "'" + getSpelling(Tok) + "'"; + OS << "'"; + // Escape string to prevent token spelling from spanning multiple lines. + OS.write_escaped(getSpelling(Tok)); + OS << "'"; } - llvm::errs() << llvm::formatv("{0,-32} ", Spelling); + // The alignment of 48 (32 characters for the spelling + the 16 for + // the identifier name) fits most variable names, keywords and annotations. + llvm::errs() << llvm::formatv("{0,-48} ", OS.str()); if (!DumpFlags) return; + auto Loc = Tok.getLocation(); llvm::errs() << "Loc=<"; - DumpLocation(Tok.getLocation()); + DumpLocation(Loc); llvm::errs() << ">"; + // If the token points directly to a file location (i.e. not a macro + // expansion), then add additional padding so that trailing markers + // align, provided the line/column numbers are reasonably sized. + // + // Otherwise, if it's a macro expansion, don't bother with alignment, + // as the line will include multiple locations and be very long. + // + // NOTE: To keep this stateless, it doesn't account for filename + // length, so when a header starts markers will be temporarily misaligned. + if (Loc.isFileID()) { + PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc); + + if (!PLoc.isInvalid()) { + int LineWidth = llvm::utostr(PLoc.getLine()).size(); + int ColumnWidth = llvm::utostr(PLoc.getColumn()).size(); + + // Reserve space for lines up to 9999 and columns up to 99, + // which is 4 + 2 = 6 characters in total. + const int ReservedSpace = 6; + + int LeftSpace = ReservedSpace - LineWidth - ColumnWidth; + int Padding = std::max<int>(0, LeftSpace); + + llvm::errs().indent(Padding); + } + } + if (Tok.isAtStartOfLine()) llvm::errs() << " [StartOfLine]"; if (Tok.hasLeadingSpace()) @@ -264,8 +302,7 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { llvm::errs() << " [ExpandDisabled]"; if (Tok.needsCleaning()) { const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); - llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) - << "']"; + llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']"; } } diff --git a/clang/test/Preprocessor/dump-tokens.cpp b/clang/test/Preprocessor/dump-tokens.cpp index 3774894943b87..0a9d459688922 100644 --- a/clang/test/Preprocessor/dump-tokens.cpp +++ b/clang/test/Preprocessor/dump-tokens.cpp @@ -1,16 +1,34 @@ -// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s --strict-whitespace --> // CHECK: arrow '->' -5 // CHECK: numeric_constant '5' -id // CHECK: identifier 'id' -& // CHECK: amp '&' -) // CHECK: r_paren ')' -unsigned // CHECK: unsigned 'unsigned' -~ // CHECK: tilde '~' -long_variable_name_very_long // CHECK: identifier 'long_variable_name_very_long' -union // CHECK: union 'union' -42 // CHECK: numeric_constant '42' -j // CHECK: identifier 'j' -&= // CHECK: ampequal '&=' -15 // CHECK: numeric_constant '15' +// Different kinds of identifiers with different spelling lengths +-> // CHECK: arrow '->' Loc=<{{.*}}:4:1> [StartOfLine] +5 // CHECK-NEXT: numeric_constant '5' Loc=<{{.*}}:5:1> [StartOfLine] +id // CHECK-NEXT: identifier 'id' Loc=<{{.*}}:6:1> [StartOfLine] +& // CHECK-NEXT: amp '&' Loc=<{{.*}}:7:1> [StartOfLine] +) // CHECK-NEXT: r_paren ')' Loc=<{{.*}}:8:1> [StartOfLine] +unsigned // CHECK-NEXT: unsigned 'unsigned' Loc=<{{.*}}:9:1> [StartOfLine] +~ // CHECK-NEXT: tilde '~' Loc=<{{.*}}:10:1> [StartOfLine] +long_variable_name_very_long // CHECK-NEXT: identifier 'long_variable_name_very_long' Loc=<{{.*}}:11:1> [StartOfLine] +union // CHECK-NEXT: union 'union' Loc=<{{.*}}:12:1> [StartOfLine] +42 // CHECK-NEXT: numeric_constant '42' Loc=<{{.*}}:13:1> [StartOfLine] +j // CHECK-NEXT: identifier 'j' Loc=<{{.*}}:14:1> [StartOfLine] +&= // CHECK-NEXT: ampequal '&=' Loc=<{{.*}}:15:1> [StartOfLine] +15 // CHECK-NEXT: numeric_constant '15' Loc=<{{.*}}:16:1> [StartOfLine] + +// Different locations in line and trailing markers + at different locations= in line // CHECK-NEXT: identifier 'at' Loc=<{{.*}}:19:2> [StartOfLine] [LeadingSpace] + // CHECK-NEXT: identifier 'different' Loc=<{{.*}}:19:5> [LeadingSpace] + // CHECK-NEXT: identifier 'locations' Loc=<{{.*}}:19:15> [LeadingSpace] + // CHECK-NEXT: equal '=' Loc=<{{.*}}:19:24> + // CHECK-NEXT: identifier 'in' Loc=<{{.*}}:19:26> [LeadingSpace] + // CHECK-NEXT: identifier 'line' Loc=<{{.*}}:19:29> [LeadingSpace] + +// Tokens that require escaping & annotations +#pragma clang __debug parser_crash // CHECK-NEXT: annot_pragma_parser_crash Loc=<{{.*}}:27:23> + // CHECK-NEXT: eod '\n' Loc=<{{.*}}:27:119> [LeadingSpace] +#pragma clang __debug captured // CHECK-NEXT: annot_pragma_captured Loc=<{{.*}}:29:120> +#pragma clang __debug dump X // CHECK-NEXT: annot_pragma_dump Loc=<{{.*}}:30:23> + // CHECK-NEXT: identifier 'X' Loc=<{{.*}}:30:28> [LeadingSpace] + // CHECK-NEXT: eod '\n' Loc=<{{.*}}:30:119> [LeadingSpace] + // CHECK-NEXT: eof '' Loc=<{{.*}}:34:1> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
