llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-tablegen

Author: Krzysztof Parzyszek (kparzysz)

<details>
<summary>Changes</summary>

The code in DirectiveEmitter that generates clause parsers sorted clause names 
to ensure that longer names were tried before shorter ones, in cases where a 
shorter name may be a prefix of a longer one. This matters in the strict 
Fortran source format, since whitespace is ignored there.

This sorting did not take into account clause aliases, which are just 
alternative names. These extra names were not protected in the same way, and 
were just appended immediately after the primary name.

This patch generates a list of pairs Record+Name, where a given record can 
appear multiple times with different names. Sort that list and use it to 
generate parsers for each record. What used to be
```
  ("fred" || "f") &gt;&gt; construct&lt;SomeClause&gt;{} ||
  "foo" &lt;&lt; construct&lt;OtherClause&gt;{}
```
is now
```
  "fred" &gt;&gt; construct&lt;SomeClause&gt;{} ||
  "foo" &gt;&gt; construct&lt;OtherClause&gt;{} ||
  "f" &gt;&gt; construct&lt;SomeClause&gt;{}
```

---
Full diff: https://github.com/llvm/llvm-project/pull/141763.diff


2 Files Affected:

- (modified) llvm/test/TableGen/directive1.td (+3-1) 
- (modified) llvm/utils/TableGen/Basic/DirectiveEmitter.cpp (+39-36) 


``````````diff
diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td
index 74091edfa2a66..f756f54c03bfb 100644
--- a/llvm/test/TableGen/directive1.td
+++ b/llvm/test/TableGen/directive1.td
@@ -34,6 +34,7 @@ def TDLC_ClauseB : Clause<"clauseb"> {
 }
 
 def TDLC_ClauseC : Clause<"clausec"> {
+  let aliases = ["ccccccc"];
   let flangClass = "IntExpr";
   let isValueList = 1;
 }
@@ -260,7 +261,8 @@ def TDL_DirA : Directive<"dira"> {
 // IMPL-NEXT:  TYPE_PARSER(
 // IMPL-NEXT:    "clausec" >> 
construct<TdlClause>(construct<TdlClause::Clausec>(parenthesized(nonemptyList(Parser<IntExpr>{}))))
 || 
 // IMPL-NEXT:    "clauseb" >> 
construct<TdlClause>(construct<TdlClause::Clauseb>(maybe(parenthesized(Parser<IntExpr>{}))))
 ||
-// IMPL-NEXT:    "clausea" >> 
construct<TdlClause>(construct<TdlClause::Clausea>())
+// IMPL-NEXT:    "clausea" >> 
construct<TdlClause>(construct<TdlClause::Clausea>()) ||
+// IMPL-NEXT:    "ccccccc" >> 
construct<TdlClause>(construct<TdlClause::Clausec>(parenthesized(nonemptyList(Parser<IntExpr>{}))))
 // IMPL-NEXT:  )
 // IMPL-EMPTY:
 // IMPL-NEXT:  #endif // GEN_FLANG_CLAUSES_PARSER
diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp 
b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
index 9e79a83ed6e18..bd6c543e1741a 100644
--- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
@@ -608,7 +608,7 @@ static void emitLeafTable(const DirectiveLanguage &DirLang, 
raw_ostream &OS,
   std::vector<int> Ordering(Directives.size());
   std::iota(Ordering.begin(), Ordering.end(), 0);
 
-  sort(Ordering, [&](int A, int B) {
+  llvm::sort(Ordering, [&](int A, int B) {
     auto &LeavesA = LeafTable[A];
     auto &LeavesB = LeafTable[B];
     int DirA = LeavesA[0], DirB = LeavesB[0];
@@ -1113,59 +1113,63 @@ static void generateFlangClauseParserKindMap(const 
DirectiveLanguage &DirLang,
      << " Parser clause\");\n";
 }
 
-static bool compareClauseName(const Record *R1, const Record *R2) {
-  Clause C1(R1);
-  Clause C2(R2);
-  return (C1.getName() > C2.getName());
+using RecordWithText = std::pair<const Record *, StringRef>;
+
+static bool compareRecordText(const RecordWithText &A,
+                              const RecordWithText &B) {
+  return A.second > B.second;
+}
+
+static std::vector<RecordWithText>
+getSpellingTexts(ArrayRef<const Record *> Records) {
+  std::vector<RecordWithText> List;
+  for (const Record *R : Records) {
+    Clause C(R);
+    List.push_back(std::make_pair(R, C.getName()));
+    llvm::transform(C.getAliases(), std::back_inserter(List),
+                    [R](StringRef S) { return std::make_pair(R, S); });
+  }
+  return List;
 }
 
 // Generate the parser for the clauses.
 static void generateFlangClausesParser(const DirectiveLanguage &DirLang,
                                        raw_ostream &OS) {
   std::vector<const Record *> Clauses = DirLang.getClauses();
-  // Sort clauses in reverse alphabetical order so with clauses with same
-  // beginning, the longer option is tried before.
-  sort(Clauses, compareClauseName);
+  // Sort clauses in the reverse alphabetical order with respect to their
+  // names and aliases, so that longer names are tried before shorter ones.
+  std::vector<std::pair<const Record *, StringRef>> Names =
+      getSpellingTexts(Clauses);
+  llvm::sort(Names, compareRecordText);
   IfDefScope Scope("GEN_FLANG_CLAUSES_PARSER", OS);
   StringRef Base = DirLang.getFlangClauseBaseClass();
 
+  unsigned LastIndex = Names.size() - 1;
   OS << "\n";
-  unsigned Index = 0;
-  unsigned LastClauseIndex = Clauses.size() - 1;
   OS << "TYPE_PARSER(\n";
-  for (const Clause Clause : Clauses) {
-    const std::vector<StringRef> &Aliases = Clause.getAliases();
-    if (Aliases.empty()) {
-      OS << "  \"" << Clause.getName() << "\"";
-    } else {
-      OS << "  ("
-         << "\"" << Clause.getName() << "\"_tok";
-      for (StringRef Alias : Aliases) {
-        OS << " || \"" << Alias << "\"_tok";
-      }
-      OS << ")";
-    }
+  for (auto [Index, RecTxt] : llvm::enumerate(Names)) {
+    auto [R, N] = RecTxt;
+    Clause C(R);
 
-    StringRef FlangClass = Clause.getFlangClass();
-    OS << " >> construct<" << Base << ">(construct<" << Base
-       << "::" << Clause.getFormattedParserClassName() << ">(";
+    StringRef FlangClass = C.getFlangClass();
+    OS << "  \"" << N << "\" >> construct<" << Base << ">(construct<" << Base
+       << "::" << C.getFormattedParserClassName() << ">(";
     if (FlangClass.empty()) {
       OS << "))";
-      if (Index != LastClauseIndex)
+      if (Index != LastIndex)
         OS << " ||";
       OS << "\n";
-      ++Index;
       continue;
     }
 
-    if (Clause.isValueOptional())
+    if (C.isValueOptional())
       OS << "maybe(";
     OS << "parenthesized(";
-    if (Clause.isValueList())
+    if (C.isValueList())
       OS << "nonemptyList(";
 
-    if (!Clause.getPrefix().empty())
-      OS << "\"" << Clause.getPrefix() << ":\" >> ";
+    if (!C.getPrefix().empty())
+      OS << "\"" << C.getPrefix() << ":\" >> ";
 
     // The common Flang parser are used directly. Their name is identical to
     // the Flang class with first letter as lowercase. If the Flang class is
@@ -1181,19 +1185,18 @@ static void generateFlangClausesParser(const 
DirectiveLanguage &DirLang,
             .Case("ScalarLogicalExpr", "scalarLogicalExpr")
             .Default(("Parser<" + FlangClass + ">{}").toStringRef(Scratch));
     OS << Parser;
-    if (!Clause.getPrefix().empty() && Clause.isPrefixOptional())
+    if (!C.getPrefix().empty() && C.isPrefixOptional())
       OS << " || " << Parser;
-    if (Clause.isValueList()) // close nonemptyList(.
+    if (C.isValueList()) // close nonemptyList(.
       OS << ")";
     OS << ")"; // close parenthesized(.
 
-    if (Clause.isValueOptional()) // close maybe(.
+    if (C.isValueOptional()) // close maybe(.
       OS << ")";
     OS << "))";
-    if (Index != LastClauseIndex)
+    if (Index != LastIndex)
       OS << " ||";
     OS << "\n";
-    ++Index;
   }
   OS << ")\n";
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/141763
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
  • [... Krzysztof Parzyszek via llvm-branch-commits
    • ... via llvm-branch-commits
    • ... Krzysztof Parzyszek via llvm-branch-commits
    • ... Valentin Clement バレンタイン クレメン via llvm-branch-commits
    • ... Tom Eccles via llvm-branch-commits

Reply via email to