https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/141763
The code in DirectiveEmitter that generates clause parsers sorted clause names to ensure that longer names were tried before shorter ones, in cases where a shorter name may be a prefix of a longer one. This matters in the strict Fortran source format, since whitespace is ignored there. This sorting did not take into account clause aliases, which are just alternative names. These extra names were not protected in the same way, and were just appended immediately after the primary name. This patch generates a list of pairs Record+Name, where a given record can appear multiple times with different names. Sort that list and use it to generate parsers for each record. What used to be ``` ("fred" || "f") >> construct<SomeClause>{} || "foo" << construct<OtherClause>{} ``` is now ``` "fred" >> construct<SomeClause>{} || "foo" >> construct<OtherClause>{} || "f" >> construct<SomeClause>{} ``` >From e7d2e0b40eae0bf37f76d0aa8a59520b529c760c Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Wed, 21 May 2025 14:23:38 -0500 Subject: [PATCH] [utils][TableGen] Treat clause aliases equally with names The code in DirectiveEmitter that generates clause parsers sorted clause names to ensure that longer names were tried before shorter ones, in cases where a shorter name may be a prefix of a longer one. This matters in the strict Fortran source format, since whitespace is ignored there. This sorting did not take into account clause aliases, which are just alternative names. These extra names were not protected in the same way, and were just appended immediately after the primary name. This patch generates a list of pairs Record+Name, where a given record can appear multiple times with different names. Sort that list and use it to generate parsers for each record. What used to be ``` ("fred" || "f") >> construct<SomeClause>{} || "foo" << construct<OtherClause>{} ``` is now ``` "fred" >> construct<SomeClause>{} || "foo" >> construct<OtherClause>{} || "f" >> construct<SomeClause>{} ``` --- llvm/test/TableGen/directive1.td | 4 +- .../utils/TableGen/Basic/DirectiveEmitter.cpp | 75 ++++++++++--------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 74091edfa2a66..f756f54c03bfb 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -34,6 +34,7 @@ def TDLC_ClauseB : Clause<"clauseb"> { } def TDLC_ClauseC : Clause<"clausec"> { + let aliases = ["ccccccc"]; let flangClass = "IntExpr"; let isValueList = 1; } @@ -260,7 +261,8 @@ def TDL_DirA : Directive<"dira"> { // IMPL-NEXT: TYPE_PARSER( // IMPL-NEXT: "clausec" >> construct<TdlClause>(construct<TdlClause::Clausec>(parenthesized(nonemptyList(Parser<IntExpr>{})))) || // IMPL-NEXT: "clauseb" >> construct<TdlClause>(construct<TdlClause::Clauseb>(maybe(parenthesized(Parser<IntExpr>{})))) || -// IMPL-NEXT: "clausea" >> construct<TdlClause>(construct<TdlClause::Clausea>()) +// IMPL-NEXT: "clausea" >> construct<TdlClause>(construct<TdlClause::Clausea>()) || +// IMPL-NEXT: "ccccccc" >> construct<TdlClause>(construct<TdlClause::Clausec>(parenthesized(nonemptyList(Parser<IntExpr>{})))) // IMPL-NEXT: ) // IMPL-EMPTY: // IMPL-NEXT: #endif // GEN_FLANG_CLAUSES_PARSER diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index 9e79a83ed6e18..bd6c543e1741a 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -608,7 +608,7 @@ static void emitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS, std::vector<int> Ordering(Directives.size()); std::iota(Ordering.begin(), Ordering.end(), 0); - sort(Ordering, [&](int A, int B) { + llvm::sort(Ordering, [&](int A, int B) { auto &LeavesA = LeafTable[A]; auto &LeavesB = LeafTable[B]; int DirA = LeavesA[0], DirB = LeavesB[0]; @@ -1113,59 +1113,63 @@ static void generateFlangClauseParserKindMap(const DirectiveLanguage &DirLang, << " Parser clause\");\n"; } -static bool compareClauseName(const Record *R1, const Record *R2) { - Clause C1(R1); - Clause C2(R2); - return (C1.getName() > C2.getName()); +using RecordWithText = std::pair<const Record *, StringRef>; + +static bool compareRecordText(const RecordWithText &A, + const RecordWithText &B) { + return A.second > B.second; +} + +static std::vector<RecordWithText> +getSpellingTexts(ArrayRef<const Record *> Records) { + std::vector<RecordWithText> List; + for (const Record *R : Records) { + Clause C(R); + List.push_back(std::make_pair(R, C.getName())); + llvm::transform(C.getAliases(), std::back_inserter(List), + [R](StringRef S) { return std::make_pair(R, S); }); + } + return List; } // Generate the parser for the clauses. static void generateFlangClausesParser(const DirectiveLanguage &DirLang, raw_ostream &OS) { std::vector<const Record *> Clauses = DirLang.getClauses(); - // Sort clauses in reverse alphabetical order so with clauses with same - // beginning, the longer option is tried before. - sort(Clauses, compareClauseName); + // Sort clauses in the reverse alphabetical order with respect to their + // names and aliases, so that longer names are tried before shorter ones. + std::vector<std::pair<const Record *, StringRef>> Names = + getSpellingTexts(Clauses); + llvm::sort(Names, compareRecordText); IfDefScope Scope("GEN_FLANG_CLAUSES_PARSER", OS); StringRef Base = DirLang.getFlangClauseBaseClass(); + unsigned LastIndex = Names.size() - 1; OS << "\n"; - unsigned Index = 0; - unsigned LastClauseIndex = Clauses.size() - 1; OS << "TYPE_PARSER(\n"; - for (const Clause Clause : Clauses) { - const std::vector<StringRef> &Aliases = Clause.getAliases(); - if (Aliases.empty()) { - OS << " \"" << Clause.getName() << "\""; - } else { - OS << " (" - << "\"" << Clause.getName() << "\"_tok"; - for (StringRef Alias : Aliases) { - OS << " || \"" << Alias << "\"_tok"; - } - OS << ")"; - } + for (auto [Index, RecTxt] : llvm::enumerate(Names)) { + auto [R, N] = RecTxt; + Clause C(R); - StringRef FlangClass = Clause.getFlangClass(); - OS << " >> construct<" << Base << ">(construct<" << Base - << "::" << Clause.getFormattedParserClassName() << ">("; + StringRef FlangClass = C.getFlangClass(); + OS << " \"" << N << "\" >> construct<" << Base << ">(construct<" << Base + << "::" << C.getFormattedParserClassName() << ">("; if (FlangClass.empty()) { OS << "))"; - if (Index != LastClauseIndex) + if (Index != LastIndex) OS << " ||"; OS << "\n"; - ++Index; continue; } - if (Clause.isValueOptional()) + if (C.isValueOptional()) OS << "maybe("; OS << "parenthesized("; - if (Clause.isValueList()) + if (C.isValueList()) OS << "nonemptyList("; - if (!Clause.getPrefix().empty()) - OS << "\"" << Clause.getPrefix() << ":\" >> "; + if (!C.getPrefix().empty()) + OS << "\"" << C.getPrefix() << ":\" >> "; // The common Flang parser are used directly. Their name is identical to // the Flang class with first letter as lowercase. If the Flang class is @@ -1181,19 +1185,18 @@ static void generateFlangClausesParser(const DirectiveLanguage &DirLang, .Case("ScalarLogicalExpr", "scalarLogicalExpr") .Default(("Parser<" + FlangClass + ">{}").toStringRef(Scratch)); OS << Parser; - if (!Clause.getPrefix().empty() && Clause.isPrefixOptional()) + if (!C.getPrefix().empty() && C.isPrefixOptional()) OS << " || " << Parser; - if (Clause.isValueList()) // close nonemptyList(. + if (C.isValueList()) // close nonemptyList(. OS << ")"; OS << ")"; // close parenthesized(. - if (Clause.isValueOptional()) // close maybe(. + if (C.isValueOptional()) // close maybe(. OS << ")"; OS << "))"; - if (Index != LastClauseIndex) + if (Index != LastIndex) OS << " ||"; OS << "\n"; - ++Index; } OS << ")\n"; } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits