llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tools-extra Author: Jeremy Rifkin (jeremy-rifkin) <details> <summary>Changes</summary> This PR adds a `--print-terminal-tokens` option to clang-pseudo which prints tokens in a parse forest in addition to providing the token index: ``` › bin/clang-pseudo --source test.cpp --print-forest [ 0, end) translation-unit~simple-declaration := decl-specifier-seq init-declarator-list ; [ 0, 1) ├─decl-specifier-seq~simple-type-specifier := <ambiguous> [ 0, 1) │ ├─simple-type-specifier~IDENTIFIER := tok[0] [ 0, 1) │ └─simple-type-specifier~IDENTIFIER := tok[0] [ 1, 3) ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator [ 1, 2) │ ├─ptr-operator~* := tok[1] [ 2, 3) │ └─ptr-declarator~IDENTIFIER := tok[2] [ 3, end) └─; := tok[3] ``` ``` › bin/clang-pseudo --source test.cpp --print-forest --print-terminal-tokens [ 0, end) translation-unit~simple-declaration := decl-specifier-seq init-declarator-list ; [ 0, 1) ├─decl-specifier-seq~simple-type-specifier := <ambiguous> [ 0, 1) │ ├─simple-type-specifier~IDENTIFIER := tok[0] (identifier 1:0 "T" flags=1) [ 0, 1) │ └─simple-type-specifier~IDENTIFIER := tok[0] (identifier 1:0 "T" flags=1) [ 1, 3) ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator [ 1, 2) │ ├─ptr-operator~* := tok[1] (star 1:0 "*") [ 2, 3) │ └─ptr-declarator~IDENTIFIER := tok[2] (identifier 1:0 "y") [ 3, end) └─; := tok[3] (semi 1:0 ";") ``` --- Full diff: https://github.com/llvm/llvm-project/pull/87898.diff 4 Files Affected: - (modified) clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp (+1-1) - (modified) clang-tools-extra/pseudo/include/clang-pseudo/Forest.h (+9-2) - (modified) clang-tools-extra/pseudo/lib/Forest.cpp (+18-8) - (modified) clang-tools-extra/pseudo/tool/ClangPseudo.cpp (+10-2) ``````````diff diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp index 87b9d15480cc35..33b3da1ed6ea9f 100644 --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -46,7 +46,7 @@ class Fuzzer { glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS}, *Lang.G.findNonterminal("translation-unit"), Lang); if (Print) - llvm::outs() << Root.dumpRecursive(Lang.G); + llvm::outs() << Root.dumpRecursive(Lang.G, std::nullopt); } }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h index e9edb40e02b64e..642c489b3fba41 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h @@ -26,6 +26,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include <cstdint> +#include <functional> +#include <optional> namespace clang { namespace pseudo { @@ -112,8 +114,13 @@ class alignas(class ForestNode *) ForestNode { // Iteration over all nodes in the forest, including this. llvm::iterator_range<RecursiveIterator> descendants() const; - std::string dump(const Grammar &) const; - std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const; + std::string + dump(const Grammar &, + std::optional<std::reference_wrapper<const TokenStream>>) const; + std::string + dumpRecursive(const Grammar &, + std::optional<std::reference_wrapper<const TokenStream>>, + bool Abbreviated = false) const; private: friend class ForestArena; diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp index e8e60e5ec475a4..adce731d6c1e1c 100644 --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -45,13 +45,21 @@ ForestNode::descendants() const { return {RecursiveIterator(this), RecursiveIterator()}; } -std::string ForestNode::dump(const Grammar &G) const { +std::string ForestNode::dump( + const Grammar &G, + std::optional<std::reference_wrapper<const TokenStream>> Code) const { switch (kind()) { case Ambiguous: return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol())); case Terminal: - return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), - startTokenIndex()); + if (Code) { + return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()), + startTokenIndex(), + Code->get().tokens()[startTokenIndex()]); + } else { + return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), + startTokenIndex()); + } case Sequence: return G.dumpRule(rule()); case Opaque: @@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar &G) const { llvm_unreachable("Unhandled node kind!"); } -std::string ForestNode::dumpRecursive(const Grammar &G, - bool Abbreviated) const { +std::string ForestNode::dumpRecursive( + const Grammar &G, + std::optional<std::reference_wrapper<const TokenStream>> Code, + bool Abbreviated) const { using llvm::formatv; Token::Index MaxToken = 0; // Count visits of nodes so we can mark those seen multiple times. @@ -95,7 +105,7 @@ std::string ForestNode::dumpRecursive(const Grammar &G, std::string Result; constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max(); std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>, - LineDecoration &LineDec)> + LineDecoration LineDec)> Dump = [&](const ForestNode *P, Token::Index End, std::optional<SymbolID> ElidedParent, LineDecoration LineDec) { bool SharedNode = VisitCounts.find(P)->getSecond() > 1; @@ -145,13 +155,13 @@ std::string ForestNode::dumpRecursive(const Grammar &G, // The first time, print as #1. Later, =#1. if (First) { - Result += formatv("{0} #{1}", P->dump(G), ID); + Result += formatv("{0} #{1}", P->dump(G, Code), ID); } else { Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID); Children = {}; // Don't walk the children again. } } else { - Result.append(P->dump(G)); + Result.append(P->dump(G, Code)); } Result.push_back('\n'); diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index 6a64760749cefe..4797dc01cdc13b 100644 --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -51,6 +51,9 @@ static opt<bool> Disambiguate("disambiguate", desc("Choose best tree from parse forest")); static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics")); static opt<bool> PrintForest("print-forest", desc("Print parse forest")); +static opt<bool> + PrintTerminalTokens("print-terminal-tokens", + desc("Print terminal tokens in parse forest")); static opt<bool> ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"), init(true)); static opt<std::string> HTMLForest("html-forest", @@ -161,9 +164,14 @@ int main(int argc, char *argv[]) { auto &Root = glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS}, *StartSymID, Lang); + std::optional<std::reference_wrapper<const TokenStream>> Code; + if (PrintTerminalTokens) { + Code = *ParseableStream; + } // If we're disambiguating, we'll print at the end instead. if (PrintForest && !Disambiguate) - llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev); + llvm::outs() << Root.dumpRecursive(Lang.G, Code, + /*Abbreviated=*/ForestAbbrev); clang::pseudo::Disambiguation Disambig; if (Disambiguate) Disambig = clang::pseudo::disambiguate(&Root, {}); @@ -234,7 +242,7 @@ int main(int argc, char *argv[]) { ForestNode *DisambigRoot = &Root; removeAmbiguities(DisambigRoot, Disambig); llvm::outs() << "Disambiguated tree:\n"; - llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, + llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, Code, /*Abbreviated=*/ForestAbbrev); } } `````````` </details> https://github.com/llvm/llvm-project/pull/87898 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits