[llvm-branch-commits] [llvm] [utils][TableGen] Treat clause aliases equally with names (PR #141763)

Krzysztof Parzyszek via llvm-branch-commits Wed, 28 May 2025 06:31:13 -0700

https://github.com/kparzysz created 
https://github.com/llvm/llvm-project/pull/141763


The code in DirectiveEmitter that generates clause parsers sorted clause names 
to ensure that longer names were tried before shorter ones, in cases where a 
shorter name may be a prefix of a longer one. This matters in the strict 
Fortran source format, since whitespace is ignored there.

This sorting did not take into account clause aliases, which are just 
alternative names. These extra names were not protected in the same way, and 
were just appended immediately after the primary name.

This patch generates a list of pairs Record+Name, where a given record can 
appear multiple times with different names. Sort that list and use it to 
generate parsers for each record. What used to be
```
  ("fred" || "f") >> construct<SomeClause>{} ||
  "foo" << construct<OtherClause>{}
```
is now
```
  "fred" >> construct<SomeClause>{} ||
  "foo" >> construct<OtherClause>{} ||
  "f" >> construct<SomeClause>{}
```

>From e7d2e0b40eae0bf37f76d0aa8a59520b529c760c Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com>
Date: Wed, 21 May 2025 14:23:38 -0500
Subject: [PATCH] [utils][TableGen] Treat clause aliases equally with names

The code in DirectiveEmitter that generates clause parsers sorted
clause names to ensure that longer names were tried before shorter
ones, in cases where a shorter name may be a prefix of a longer one.
This matters in the strict Fortran source format, since whitespace
is ignored there.

This sorting did not take into account clause aliases, which are
just alternative names. These extra names were not protected in the
same way, and were just appended immediately after the primary name.

This patch generates a list of pairs Record+Name, where a given
record can appear multiple times with different names. Sort that
list and use it to generate parsers for each record.
What used to be
```
  ("fred" || "f") >> construct<SomeClause>{} ||
  "foo" << construct<OtherClause>{}
```
is now
```
  "fred" >> construct<SomeClause>{} ||
  "foo" >> construct<OtherClause>{} ||
  "f" >> construct<SomeClause>{}
```
---
 llvm/test/TableGen/directive1.td              |  4 +-
 .../utils/TableGen/Basic/DirectiveEmitter.cpp | 75 ++++++++++---------
 2 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td
index 74091edfa2a66..f756f54c03bfb 100644
--- a/llvm/test/TableGen/directive1.td
+++ b/llvm/test/TableGen/directive1.td
@@ -34,6 +34,7 @@ def TDLC_ClauseB : Clause<"clauseb"> {
 }
 
 def TDLC_ClauseC : Clause<"clausec"> {
+  let aliases = ["ccccccc"];
   let flangClass = "IntExpr";
   let isValueList = 1;
 }
@@ -260,7 +261,8 @@ def TDL_DirA : Directive<"dira"> {
 // IMPL-NEXT:  TYPE_PARSER(
 // IMPL-NEXT:    "clausec" >> 
construct<TdlClause>(construct<TdlClause::Clausec>(parenthesized(nonemptyList(Parser<IntExpr>{}))))
 || 
 // IMPL-NEXT:    "clauseb" >> 
construct<TdlClause>(construct<TdlClause::Clauseb>(maybe(parenthesized(Parser<IntExpr>{}))))
 ||
-// IMPL-NEXT:    "clausea" >> 
construct<TdlClause>(construct<TdlClause::Clausea>())
+// IMPL-NEXT:    "clausea" >> 
construct<TdlClause>(construct<TdlClause::Clausea>()) ||
+// IMPL-NEXT:    "ccccccc" >> 
construct<TdlClause>(construct<TdlClause::Clausec>(parenthesized(nonemptyList(Parser<IntExpr>{}))))
 // IMPL-NEXT:  )
 // IMPL-EMPTY:
 // IMPL-NEXT:  #endif // GEN_FLANG_CLAUSES_PARSER
diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp 
b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
index 9e79a83ed6e18..bd6c543e1741a 100644
--- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
@@ -608,7 +608,7 @@ static void emitLeafTable(const DirectiveLanguage &DirLang, 
raw_ostream &OS,
   std::vector<int> Ordering(Directives.size());
   std::iota(Ordering.begin(), Ordering.end(), 0);
 
-  sort(Ordering, [&](int A, int B) {
+  llvm::sort(Ordering, [&](int A, int B) {
     auto &LeavesA = LeafTable[A];
     auto &LeavesB = LeafTable[B];
     int DirA = LeavesA[0], DirB = LeavesB[0];
@@ -1113,59 +1113,63 @@ static void generateFlangClauseParserKindMap(const 
DirectiveLanguage &DirLang,
      << " Parser clause\");\n";
 }
 
-static bool compareClauseName(const Record *R1, const Record *R2) {
-  Clause C1(R1);
-  Clause C2(R2);
-  return (C1.getName() > C2.getName());
+using RecordWithText = std::pair<const Record *, StringRef>;
+
+static bool compareRecordText(const RecordWithText &A,
+                              const RecordWithText &B) {
+  return A.second > B.second;
+}
+
+static std::vector<RecordWithText>
+getSpellingTexts(ArrayRef<const Record *> Records) {
+  std::vector<RecordWithText> List;
+  for (const Record *R : Records) {
+    Clause C(R);
+    List.push_back(std::make_pair(R, C.getName()));
+    llvm::transform(C.getAliases(), std::back_inserter(List),
+                    [R](StringRef S) { return std::make_pair(R, S); });
+  }
+  return List;
 }
 
 // Generate the parser for the clauses.
 static void generateFlangClausesParser(const DirectiveLanguage &DirLang,
                                        raw_ostream &OS) {
   std::vector<const Record *> Clauses = DirLang.getClauses();
-  // Sort clauses in reverse alphabetical order so with clauses with same
-  // beginning, the longer option is tried before.
-  sort(Clauses, compareClauseName);
+  // Sort clauses in the reverse alphabetical order with respect to their
+  // names and aliases, so that longer names are tried before shorter ones.
+  std::vector<std::pair<const Record *, StringRef>> Names =
+      getSpellingTexts(Clauses);
+  llvm::sort(Names, compareRecordText);
   IfDefScope Scope("GEN_FLANG_CLAUSES_PARSER", OS);
   StringRef Base = DirLang.getFlangClauseBaseClass();
 
+  unsigned LastIndex = Names.size() - 1;
   OS << "\n";
-  unsigned Index = 0;
-  unsigned LastClauseIndex = Clauses.size() - 1;
   OS << "TYPE_PARSER(\n";
-  for (const Clause Clause : Clauses) {
-    const std::vector<StringRef> &Aliases = Clause.getAliases();
-    if (Aliases.empty()) {
-      OS << "  \"" << Clause.getName() << "\"";
-    } else {
-      OS << "  ("
-         << "\"" << Clause.getName() << "\"_tok";
-      for (StringRef Alias : Aliases) {
-        OS << " || \"" << Alias << "\"_tok";
-      }
-      OS << ")";
-    }
+  for (auto [Index, RecTxt] : llvm::enumerate(Names)) {
+    auto [R, N] = RecTxt;
+    Clause C(R);
 
-    StringRef FlangClass = Clause.getFlangClass();
-    OS << " >> construct<" << Base << ">(construct<" << Base
-       << "::" << Clause.getFormattedParserClassName() << ">(";
+    StringRef FlangClass = C.getFlangClass();
+    OS << "  \"" << N << "\" >> construct<" << Base << ">(construct<" << Base
+       << "::" << C.getFormattedParserClassName() << ">(";
     if (FlangClass.empty()) {
       OS << "))";
-      if (Index != LastClauseIndex)
+      if (Index != LastIndex)
         OS << " ||";
       OS << "\n";
-      ++Index;
       continue;
     }
 
-    if (Clause.isValueOptional())
+    if (C.isValueOptional())
       OS << "maybe(";
     OS << "parenthesized(";
-    if (Clause.isValueList())
+    if (C.isValueList())
       OS << "nonemptyList(";
 
-    if (!Clause.getPrefix().empty())
-      OS << "\"" << Clause.getPrefix() << ":\" >> ";
+    if (!C.getPrefix().empty())
+      OS << "\"" << C.getPrefix() << ":\" >> ";
 
     // The common Flang parser are used directly. Their name is identical to
     // the Flang class with first letter as lowercase. If the Flang class is
@@ -1181,19 +1185,18 @@ static void generateFlangClausesParser(const 
DirectiveLanguage &DirLang,
             .Case("ScalarLogicalExpr", "scalarLogicalExpr")
             .Default(("Parser<" + FlangClass + ">{}").toStringRef(Scratch));
     OS << Parser;
-    if (!Clause.getPrefix().empty() && Clause.isPrefixOptional())
+    if (!C.getPrefix().empty() && C.isPrefixOptional())
       OS << " || " << Parser;
-    if (Clause.isValueList()) // close nonemptyList(.
+    if (C.isValueList()) // close nonemptyList(.
       OS << ")";
     OS << ")"; // close parenthesized(.
 
-    if (Clause.isValueOptional()) // close maybe(.
+    if (C.isValueOptional()) // close maybe(.
       OS << ")";
     OS << "))";
-    if (Index != LastClauseIndex)
+    if (Index != LastIndex)
       OS << " ||";
     OS << "\n";
-    ++Index;
   }
   OS << ")\n";
 }

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [utils][TableGen] Treat clause aliases equally with names (PR #141763)

Reply via email to