Author: sstwcw
Date: 2022-06-26T02:02:29Z
New Revision: 2e32ff106e740c76601004493816d0ed7c483056

URL: 
https://github.com/llvm/llvm-project/commit/2e32ff106e740c76601004493816d0ed7c483056
DIFF: 
https://github.com/llvm/llvm-project/commit/2e32ff106e740c76601004493816d0ed7c483056.diff

LOG: [clang-format] Handle Verilog preprocessor directives

Verilog uses the backtick instead of the hash.  In this revision
backticks are lexed manually and then get labeled as hashes so the logic
for handling C preprocessor stuff don't have to change.  Hashes get
labeled as identifiers for Verilog-specific stuff like delays.

Reviewed By: HazardyKnusperkeks

Differential Revision: https://reviews.llvm.org/D124749

Added: 
    

Modified: 
    clang/lib/Format/FormatToken.h
    clang/lib/Format/FormatTokenLexer.cpp
    clang/lib/Format/FormatTokenLexer.h
    clang/lib/Format/TokenAnnotator.cpp
    clang/lib/Format/UnwrappedLineParser.cpp
    clang/unittests/Format/FormatTestVerilog.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 92fa0798f699..b6cc021affae 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -988,6 +988,7 @@ struct AdditionalKeywords {
     kw_automatic = &IdentTable.get("automatic");
     kw_before = &IdentTable.get("before");
     kw_begin = &IdentTable.get("begin");
+    kw_begin_keywords = &IdentTable.get("begin_keywords");
     kw_bins = &IdentTable.get("bins");
     kw_binsof = &IdentTable.get("binsof");
     kw_casex = &IdentTable.get("casex");
@@ -999,10 +1000,20 @@ struct AdditionalKeywords {
     kw_cover = &IdentTable.get("cover");
     kw_covergroup = &IdentTable.get("covergroup");
     kw_coverpoint = &IdentTable.get("coverpoint");
+    kw_default_decay_time = &IdentTable.get("default_decay_time");
+    kw_default_nettype = &IdentTable.get("default_nettype");
+    kw_default_trireg_strength = &IdentTable.get("default_trireg_strength");
+    kw_delay_mode_distributed = &IdentTable.get("delay_mode_distributed");
+    kw_delay_mode_path = &IdentTable.get("delay_mode_path");
+    kw_delay_mode_unit = &IdentTable.get("delay_mode_unit");
+    kw_delay_mode_zero = &IdentTable.get("delay_mode_zero");
     kw_disable = &IdentTable.get("disable");
     kw_dist = &IdentTable.get("dist");
+    kw_elsif = &IdentTable.get("elsif");
     kw_end = &IdentTable.get("end");
+    kw_end_keywords = &IdentTable.get("end_keywords");
     kw_endcase = &IdentTable.get("endcase");
+    kw_endcelldefine = &IdentTable.get("endcelldefine");
     kw_endchecker = &IdentTable.get("endchecker");
     kw_endclass = &IdentTable.get("endclass");
     kw_endclocking = &IdentTable.get("endclocking");
@@ -1043,6 +1054,7 @@ struct AdditionalKeywords {
     kw_macromodule = &IdentTable.get("macromodule");
     kw_matches = &IdentTable.get("matches");
     kw_medium = &IdentTable.get("medium");
+    kw_nounconnected_drive = &IdentTable.get("nounconnected_drive");
     kw_output = &IdentTable.get("output");
     kw_packed = &IdentTable.get("packed");
     kw_parameter = &IdentTable.get("parameter");
@@ -1058,6 +1070,7 @@ struct AdditionalKeywords {
     kw_randcase = &IdentTable.get("randcase");
     kw_randsequence = &IdentTable.get("randsequence");
     kw_repeat = &IdentTable.get("repeat");
+    kw_resetall = &IdentTable.get("resetall");
     kw_sample = &IdentTable.get("sample");
     kw_scalared = &IdentTable.get("scalared");
     kw_sequence = &IdentTable.get("sequence");
@@ -1073,12 +1086,15 @@ struct AdditionalKeywords {
     kw_table = &IdentTable.get("table");
     kw_tagged = &IdentTable.get("tagged");
     kw_task = &IdentTable.get("task");
+    kw_timescale = &IdentTable.get("timescale");
     kw_tri = &IdentTable.get("tri");
     kw_tri0 = &IdentTable.get("tri0");
     kw_tri1 = &IdentTable.get("tri1");
     kw_triand = &IdentTable.get("triand");
     kw_trior = &IdentTable.get("trior");
     kw_trireg = &IdentTable.get("trireg");
+    kw_unconnected_drive = &IdentTable.get("unconnected_drive");
+    kw_undefineall = &IdentTable.get("undefineall");
     kw_unique = &IdentTable.get("unique");
     kw_unique0 = &IdentTable.get("unique0");
     kw_uwire = &IdentTable.get("uwire");
@@ -1091,6 +1107,10 @@ struct AdditionalKeywords {
     kw_with = &IdentTable.get("with");
     kw_wor = &IdentTable.get("wor");
 
+    // Symbols that are treated as keywords.
+    kw_verilogHash = &IdentTable.get("#");
+    kw_verilogHashHash = &IdentTable.get("##");
+
     // Keep this at the end of the constructor to make sure everything here
     // is
     // already initialized.
@@ -1118,38 +1138,132 @@ struct AdditionalKeywords {
     // Some keywords are not included here because they don't need special
     // treatment like `showcancelled` or they should be treated as identifiers
     // like `int` and `logic`.
-    VerilogExtraKeywords = std::unordered_set<IdentifierInfo *>(
-        {kw_always,       kw_always_comb,  kw_always_ff,    kw_always_latch,
-         kw_assert,       kw_assign,       kw_assume,       kw_automatic,
-         kw_before,       kw_begin,        kw_bins,         kw_binsof,
-         kw_casex,        kw_casez,        kw_celldefine,   kw_checker,
-         kw_clocking,     kw_constraint,   kw_cover,        kw_covergroup,
-         kw_coverpoint,   kw_disable,      kw_dist,         kw_end,
-         kw_endcase,      kw_endchecker,   kw_endclass,     kw_endclocking,
-         kw_endfunction,  kw_endgenerate,  kw_endgroup,     kw_endinterface,
-         kw_endmodule,    kw_endpackage,   kw_endprimitive, kw_endprogram,
-         kw_endproperty,  kw_endsequence,  kw_endspecify,   kw_endtable,
-         kw_endtask,      kw_extends,      kw_final,        kw_foreach,
-         kw_forever,      kw_fork,         kw_function,     kw_generate,
-         kw_highz0,       kw_highz1,       kw_iff,          kw_ifnone,
-         kw_ignore_bins,  kw_illegal_bins, kw_implements,   kw_import,
-         kw_initial,      kw_inout,        kw_input,        kw_inside,
-         kw_interconnect, kw_interface,    kw_intersect,    kw_join,
-         kw_join_any,     kw_join_none,    kw_large,        kw_let,
-         kw_local,        kw_localparam,   kw_macromodule,  kw_matches,
-         kw_medium,       kw_output,       kw_package,      kw_packed,
-         kw_parameter,    kw_primitive,    kw_priority,     kw_program,
-         kw_property,     kw_pull0,        kw_pull1,        kw_pure,
-         kw_rand,         kw_randc,        kw_randcase,     kw_randsequence,
-         kw_ref,          kw_repeat,       kw_sample,       kw_scalared,
-         kw_sequence,     kw_small,        kw_soft,         kw_solve,
-         kw_specify,      kw_specparam,    kw_strong0,      kw_strong1,
-         kw_supply0,      kw_supply1,      kw_table,        kw_tagged,
-         kw_task,         kw_tri,          kw_tri0,         kw_tri1,
-         kw_triand,       kw_trior,        kw_trireg,       kw_unique,
-         kw_unique0,      kw_uwire,        kw_var,          kw_vectored,
-         kw_wand,         kw_weak0,        kw_weak1,        kw_wildcard,
-         kw_wire,         kw_with,         kw_wor});
+    VerilogExtraKeywords =
+        std::unordered_set<IdentifierInfo *>({kw_always,
+                                              kw_always_comb,
+                                              kw_always_ff,
+                                              kw_always_latch,
+                                              kw_assert,
+                                              kw_assign,
+                                              kw_assume,
+                                              kw_automatic,
+                                              kw_before,
+                                              kw_begin,
+                                              kw_bins,
+                                              kw_binsof,
+                                              kw_casex,
+                                              kw_casez,
+                                              kw_celldefine,
+                                              kw_checker,
+                                              kw_clocking,
+                                              kw_constraint,
+                                              kw_cover,
+                                              kw_covergroup,
+                                              kw_coverpoint,
+                                              kw_disable,
+                                              kw_dist,
+                                              kw_end,
+                                              kw_endcase,
+                                              kw_endchecker,
+                                              kw_endclass,
+                                              kw_endclocking,
+                                              kw_endfunction,
+                                              kw_endgenerate,
+                                              kw_endgroup,
+                                              kw_endinterface,
+                                              kw_endmodule,
+                                              kw_endpackage,
+                                              kw_endprimitive,
+                                              kw_endprogram,
+                                              kw_endproperty,
+                                              kw_endsequence,
+                                              kw_endspecify,
+                                              kw_endtable,
+                                              kw_endtask,
+                                              kw_extends,
+                                              kw_final,
+                                              kw_foreach,
+                                              kw_forever,
+                                              kw_fork,
+                                              kw_function,
+                                              kw_generate,
+                                              kw_highz0,
+                                              kw_highz1,
+                                              kw_iff,
+                                              kw_ifnone,
+                                              kw_ignore_bins,
+                                              kw_illegal_bins,
+                                              kw_implements,
+                                              kw_import,
+                                              kw_initial,
+                                              kw_inout,
+                                              kw_input,
+                                              kw_inside,
+                                              kw_interconnect,
+                                              kw_interface,
+                                              kw_intersect,
+                                              kw_join,
+                                              kw_join_any,
+                                              kw_join_none,
+                                              kw_large,
+                                              kw_let,
+                                              kw_local,
+                                              kw_localparam,
+                                              kw_macromodule,
+                                              kw_matches,
+                                              kw_medium,
+                                              kw_output,
+                                              kw_package,
+                                              kw_packed,
+                                              kw_parameter,
+                                              kw_primitive,
+                                              kw_priority,
+                                              kw_program,
+                                              kw_property,
+                                              kw_pull0,
+                                              kw_pull1,
+                                              kw_pure,
+                                              kw_rand,
+                                              kw_randc,
+                                              kw_randcase,
+                                              kw_randsequence,
+                                              kw_ref,
+                                              kw_repeat,
+                                              kw_sample,
+                                              kw_scalared,
+                                              kw_sequence,
+                                              kw_small,
+                                              kw_soft,
+                                              kw_solve,
+                                              kw_specify,
+                                              kw_specparam,
+                                              kw_strong0,
+                                              kw_strong1,
+                                              kw_supply0,
+                                              kw_supply1,
+                                              kw_table,
+                                              kw_tagged,
+                                              kw_task,
+                                              kw_tri,
+                                              kw_tri0,
+                                              kw_tri1,
+                                              kw_triand,
+                                              kw_trior,
+                                              kw_trireg,
+                                              kw_unique,
+                                              kw_unique0,
+                                              kw_uwire,
+                                              kw_var,
+                                              kw_vectored,
+                                              kw_wand,
+                                              kw_weak0,
+                                              kw_weak1,
+                                              kw_wildcard,
+                                              kw_wire,
+                                              kw_with,
+                                              kw_wor,
+                                              kw_verilogHash,
+                                              kw_verilogHashHash});
   }
 
   // Context sensitive keywords.
@@ -1265,6 +1379,7 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_automatic;
   IdentifierInfo *kw_before;
   IdentifierInfo *kw_begin;
+  IdentifierInfo *kw_begin_keywords;
   IdentifierInfo *kw_bins;
   IdentifierInfo *kw_binsof;
   IdentifierInfo *kw_casex;
@@ -1276,10 +1391,20 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_cover;
   IdentifierInfo *kw_covergroup;
   IdentifierInfo *kw_coverpoint;
+  IdentifierInfo *kw_default_decay_time;
+  IdentifierInfo *kw_default_nettype;
+  IdentifierInfo *kw_default_trireg_strength;
+  IdentifierInfo *kw_delay_mode_distributed;
+  IdentifierInfo *kw_delay_mode_path;
+  IdentifierInfo *kw_delay_mode_unit;
+  IdentifierInfo *kw_delay_mode_zero;
   IdentifierInfo *kw_disable;
   IdentifierInfo *kw_dist;
+  IdentifierInfo *kw_elsif;
   IdentifierInfo *kw_end;
+  IdentifierInfo *kw_end_keywords;
   IdentifierInfo *kw_endcase;
+  IdentifierInfo *kw_endcelldefine;
   IdentifierInfo *kw_endchecker;
   IdentifierInfo *kw_endclass;
   IdentifierInfo *kw_endclocking;
@@ -1320,6 +1445,7 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_macromodule;
   IdentifierInfo *kw_matches;
   IdentifierInfo *kw_medium;
+  IdentifierInfo *kw_nounconnected_drive;
   IdentifierInfo *kw_output;
   IdentifierInfo *kw_packed;
   IdentifierInfo *kw_parameter;
@@ -1335,6 +1461,7 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_randcase;
   IdentifierInfo *kw_randsequence;
   IdentifierInfo *kw_repeat;
+  IdentifierInfo *kw_resetall;
   IdentifierInfo *kw_sample;
   IdentifierInfo *kw_scalared;
   IdentifierInfo *kw_sequence;
@@ -1350,12 +1477,15 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_table;
   IdentifierInfo *kw_tagged;
   IdentifierInfo *kw_task;
-  IdentifierInfo *kw_tri;
+  IdentifierInfo *kw_timescale;
   IdentifierInfo *kw_tri0;
   IdentifierInfo *kw_tri1;
+  IdentifierInfo *kw_tri;
   IdentifierInfo *kw_triand;
   IdentifierInfo *kw_trior;
   IdentifierInfo *kw_trireg;
+  IdentifierInfo *kw_unconnected_drive;
+  IdentifierInfo *kw_undefineall;
   IdentifierInfo *kw_unique;
   IdentifierInfo *kw_unique0;
   IdentifierInfo *kw_uwire;
@@ -1368,6 +1498,17 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_with;
   IdentifierInfo *kw_wor;
 
+  // Workaround for hashes and backticks in Verilog.
+  IdentifierInfo *kw_verilogHash;
+  IdentifierInfo *kw_verilogHashHash;
+
+  /// Returns \c true if \p Tok is a keyword or an identifier.
+  bool isWordLike(const FormatToken &Tok) const {
+    // getIdentifierinfo returns non-null for keywords as well as identifiers.
+    return Tok.Tok.getIdentifierInfo() != nullptr &&
+           !Tok.isOneOf(kw_verilogHash, kw_verilogHashHash);
+  }
+
   /// Returns \c true if \p Tok is a true JavaScript identifier, returns
   /// \c false if it is a keyword or a pseudo keyword.
   /// If \c AcceptIdentifierName is true, returns true not only for keywords,
@@ -1526,6 +1667,35 @@ struct AdditionalKeywords {
     }
   }
 
+  /// Returns whether \p Tok is a Verilog preprocessor directive.  This is
+  /// needed because macro expansions start with a backtick as well and they
+  /// need to be treated 
diff erently.
+  bool isVerilogPPDirective(const FormatToken &Tok) const {
+    auto Info = Tok.Tok.getIdentifierInfo();
+    if (!Info)
+      return false;
+    switch (Info->getPPKeywordID()) {
+    case tok::pp_define:
+    case tok::pp_else:
+    case tok::pp_endif:
+    case tok::pp_ifdef:
+    case tok::pp_ifndef:
+    case tok::pp_include:
+    case tok::pp_line:
+    case tok::pp_pragma:
+    case tok::pp_undef:
+      return true;
+    default:
+      return Tok.isOneOf(kw_begin_keywords, kw_celldefine,
+                         kw_default_decay_time, kw_default_nettype,
+                         kw_default_trireg_strength, kw_delay_mode_distributed,
+                         kw_delay_mode_path, kw_delay_mode_unit,
+                         kw_delay_mode_zero, kw_elsif, kw_end_keywords,
+                         kw_endcelldefine, kw_nounconnected_drive, kw_resetall,
+                         kw_timescale, kw_unconnected_drive, kw_undefineall);
+    }
+  }
+
   /// Returns whether \p Tok is a Verilog keyword that opens a block.
   bool isVerilogBegin(const FormatToken &Tok) const {
     // `table` is not included since it needs to be treated specially.
@@ -1541,8 +1711,8 @@ struct AdditionalKeywords {
                        kw_endgenerate, kw_endgroup, kw_endinterface,
                        kw_endmodule, kw_endpackage, kw_endprimitive,
                        kw_endprogram, kw_endproperty, kw_endsequence,
-                       kw_endspecify, kw_endtable, kw_endtask, kw_join_any,
-                       kw_join_none);
+                       kw_endspecify, kw_endtable, kw_endtask, kw_join,
+                       kw_join_any, kw_join_none);
   }
 
   /// Whether the token begins a block.

diff  --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index 8aee794b3f4f..e3af9548b015 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -992,6 +992,25 @@ FormatToken *FormatTokenLexer::getNextToken() {
     }
   }
 
+  if (Style.isVerilog()) {
+    // Verilog uses the backtick instead of the hash for preprocessor stuff.
+    // And it uses the hash for delays and parameter lists. In order to 
continue
+    // using `tok::hash` in other places, the backtick gets marked as the hash
+    // here.  And in order to tell the backtick and hash apart for
+    // Verilog-specific stuff, the hash becomes an identifier.
+    if (FormatTok->isOneOf(tok::hash, tok::hashhash)) {
+      FormatTok->Tok.setKind(tok::raw_identifier);
+    } else if (FormatTok->is(tok::raw_identifier)) {
+      if (FormatTok->TokenText == "`") {
+        FormatTok->Tok.setIdentifierInfo(nullptr);
+        FormatTok->Tok.setKind(tok::hash);
+      } else if (FormatTok->TokenText == "``") {
+        FormatTok->Tok.setIdentifierInfo(nullptr);
+        FormatTok->Tok.setKind(tok::hashhash);
+      }
+    }
+  }
+
   FormatTok->WhitespaceRange = SourceRange(
       WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
 
@@ -1079,8 +1098,51 @@ FormatToken *FormatTokenLexer::getNextToken() {
   return FormatTok;
 }
 
+bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {
+  // In Verilog the quote is not a character literal.
+  //
+  // Make the backtick and double backtick identifiers to match against them
+  // more easily.
+  //
+  // In Verilog an escaped identifier starts with backslash and ends with
+  // whitespace. Unless that whitespace is an escaped newline. A backslash can
+  // also begin an escaped newline outside of an escaped identifier. We check
+  // for that outside of the Regex since we can't use negative lookhead
+  // assertions. Simply changing the '*' to '+' breaks stuff as the escaped
+  // identifier may have a length of 0 according to Section A.9.3.
+  // FIXME: If there is an escaped newline in the middle of an escaped
+  // identifier, allow for pasting the two lines together, But escaped
+  // identifiers usually occur only in generated code anyway.
+  static const llvm::Regex VerilogToken(R"re(^('|``?|\\(\\)re"
+                                        "(\r?\n|\r)|[^[:space:]])*)");
+
+  SmallVector<StringRef, 4> Matches;
+  const char *Start = Lex->getBufferLocation();
+  if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start),
+                          &Matches)) {
+    return false;
+  }
+  // There is a null byte at the end of the buffer, so we don't have to check
+  // Start[1] is within the buffer.
+  if (Start[0] == '\\' && (Start[1] == '\r' || Start[1] == '\n'))
+    return false;
+  size_t Len = Matches[0].size();
+
+  Tok.setLength(Len);
+  Tok.setLocation(Lex->getSourceLocation(Start, Len));
+  // The kind has to be an identifier so we can match it against those defined
+  // in Keywords.
+  Tok.setKind(tok::raw_identifier);
+  Tok.setRawIdentifierData(Start);
+  Lex->seek(Lex->getCurrentBufferOffset() + Len, /*IsAtStartofline=*/false);
+  return true;
+}
+
 void FormatTokenLexer::readRawToken(FormatToken &Tok) {
-  Lex->LexFromRawLexer(Tok.Tok);
+  // For Verilog, first see if there is a special token, and fall back to the
+  // normal lexer if there isn't one.
+  if (!Style.isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))
+    Lex->LexFromRawLexer(Tok.Tok);
   Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
                             Tok.Tok.getLength());
   // For formatting, treat unterminated string literals like normal string

diff  --git a/clang/lib/Format/FormatTokenLexer.h 
b/clang/lib/Format/FormatTokenLexer.h
index 4856d29a0f9c..bff2c181d81e 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -126,6 +126,9 @@ class FormatTokenLexer {
   // Targets that may appear inside a C# attribute.
   static const llvm::StringSet<> CSharpAttributeTargets;
 
+  /// Handle Verilog-specific tokens.
+  bool readRawTokenVerilogSpecific(Token &Tok);
+
   void readRawToken(FormatToken &Tok);
 
   void resetLexer(unsigned Offset);

diff  --git a/clang/lib/Format/TokenAnnotator.cpp 
b/clang/lib/Format/TokenAnnotator.cpp
index dbd503d25862..1ee95f26d1fc 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1343,6 +1343,10 @@ class AnnotatingParser {
     // sequence.
     if (!CurrentToken->Tok.getIdentifierInfo())
       return Type;
+    // In Verilog macro expansions start with a backtick just like preprocessor
+    // directives. Thus we stop if the word is not a preprocessor directive.
+    if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
+      return LT_Invalid;
     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
     case tok::pp_include:
     case tok::pp_include_next:
@@ -1385,8 +1389,14 @@ class AnnotatingParser {
     if (!CurrentToken)
       return LT_Invalid;
     NonTemplateLess.clear();
-    if (CurrentToken->is(tok::hash))
-      return parsePreprocessorDirective();
+    if (CurrentToken->is(tok::hash)) {
+      // We were not yet allowed to use C++17 optional when this was being
+      // written. So we used LT_Invalid to mark that the line is not a
+      // preprocessor directive.
+      auto Type = parsePreprocessorDirective();
+      if (Type != LT_Invalid)
+        return Type;
+    }
 
     // Directly allow to 'import <string-literal>' to support protocol buffer
     // definitions (github.com/google/protobuf) or missing "#" (either way we
@@ -3663,8 +3673,9 @@ bool TokenAnnotator::spaceRequiredBefore(const 
AnnotatedLine &Line,
   if (Left.Finalized)
     return Right.hasWhitespaceBefore();
 
-  if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
-    return true; // Never ever merge two identifiers.
+  // Never ever merge two words.
+  if (Keywords.isWordLike(Right) && Keywords.isWordLike(Left))
+    return true;
 
   // Leave a space between * and /* to avoid C4138 `comment end` found outside
   // of comment.
@@ -3931,6 +3942,21 @@ bool TokenAnnotator::spaceRequiredBefore(const 
AnnotatedLine &Line,
         Right.is(TT_TemplateOpener)) {
       return true;
     }
+  } else if (Style.isVerilog()) {
+    // Don't add space within a delay like `#0`.
+    if (!Left.is(TT_BinaryOperator) &&
+        Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
+      return false;
+    }
+    // Add space after a delay.
+    if (!Right.is(tok::semi) &&
+        (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
+         Left.endsSequence(tok::numeric_constant,
+                           Keywords.kw_verilogHashHash) ||
+         (Left.is(tok::r_paren) && Left.MatchingParen &&
+          Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
+      return true;
+    }
   }
   if (Left.is(TT_ImplicitStringLiteral))
     return Right.hasWhitespaceBefore();

diff  --git a/clang/lib/Format/UnwrappedLineParser.cpp 
b/clang/lib/Format/UnwrappedLineParser.cpp
index 11e51da32ac1..d3383292f7a3 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1909,9 +1909,18 @@ void UnwrappedLineParser::parseStructuralElement(
       if (Style.isJavaScript())
         break;
 
-      TokenCount = Line->Tokens.size();
-      if (TokenCount == 1 ||
-          (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
+      auto OneTokenSoFar = [&]() {
+        const UnwrappedLineNode *Tok = &Line->Tokens.front(),
+                                *End = Tok + Line->Tokens.size();
+        while (Tok != End && Tok->Tok->is(tok::comment))
+          ++Tok;
+        // In Verilog, macro invocations start with a backtick which the code
+        // treats as a hash.  Skip it.
+        if (Style.isVerilog() && Tok != End && Tok->Tok->is(tok::hash))
+          ++Tok;
+        return End - Tok == 1;
+      };
+      if (OneTokenSoFar()) {
         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
           Line->Tokens.begin()->Tok->MustBreakBefore = true;
           parseLabel(!Style.IndentGotoLabels);
@@ -4283,6 +4292,8 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
     PreviousWasComment = FormatTok->is(tok::comment);
 
     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
+           (!Style.isVerilog() ||
+            Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
            FirstNonCommentOnLine) {
       distributeComments(Comments, FormatTok);
       Comments.clear();

diff  --git a/clang/unittests/Format/FormatTestVerilog.cpp 
b/clang/unittests/Format/FormatTestVerilog.cpp
index 49e55fff4e9c..3c48dfad4d1a 100644
--- a/clang/unittests/Format/FormatTestVerilog.cpp
+++ b/clang/unittests/Format/FormatTestVerilog.cpp
@@ -30,7 +30,9 @@ class FormatTestVerilog : public ::testing::Test {
     return *Result;
   }
 
-  static std::string format(llvm::StringRef Code, const FormatStyle &Style) {
+  static std::string
+  format(llvm::StringRef Code,
+         const FormatStyle &Style = getLLVMStyle(FormatStyle::LK_Verilog)) {
     return format(Code, 0, Code.size(), Style);
   }
 
@@ -43,6 +45,29 @@ class FormatTestVerilog : public ::testing::Test {
   }
 };
 
+TEST_F(FormatTestVerilog, Delay) {
+  // Delay by the default unit.
+  verifyFormat("#0;");
+  verifyFormat("#1;");
+  verifyFormat("#10;");
+  verifyFormat("#1.5;");
+  // Explicit unit.
+  verifyFormat("#1fs;");
+  verifyFormat("#1.5fs;");
+  verifyFormat("#1ns;");
+  verifyFormat("#1.5ns;");
+  verifyFormat("#1us;");
+  verifyFormat("#1.5us;");
+  verifyFormat("#1ms;");
+  verifyFormat("#1.5ms;");
+  verifyFormat("#1s;");
+  verifyFormat("#1.5s;");
+  // The following expression should be on the same line.
+  verifyFormat("#1 x = x;");
+  EXPECT_EQ("#1 x = x;", format("#1\n"
+                                "x = x;"));
+}
+
 TEST_F(FormatTestVerilog, If) {
   verifyFormat("if (x)\n"
                "  x = x;");
@@ -114,5 +139,113 @@ TEST_F(FormatTestVerilog, If) {
                "  {x} = {x};");
 }
 
+TEST_F(FormatTestVerilog, Preprocessor) {
+  auto Style = getLLVMStyle(FormatStyle::LK_Verilog);
+  Style.ColumnLimit = 20;
+
+  // Macro definitions.
+  EXPECT_EQ("`define X          \\\n"
+            "  if (x)           \\\n"
+            "    x = x;",
+            format("`define X if(x)x=x;", Style));
+  EXPECT_EQ("`define X(x)       \\\n"
+            "  if (x)           \\\n"
+            "    x = x;",
+            format("`define X(x) if(x)x=x;", Style));
+  EXPECT_EQ("`define X          \\\n"
+            "  x = x;           \\\n"
+            "  x = x;",
+            format("`define X x=x;x=x;", Style));
+  // Macro definitions with invocations inside.
+  EXPECT_EQ("`define LIST       \\\n"
+            "  `ENTRY           \\\n"
+            "  `ENTRY",
+            format("`define LIST \\\n"
+                   "`ENTRY \\\n"
+                   "`ENTRY",
+                   Style));
+  EXPECT_EQ("`define LIST       \\\n"
+            "  `x = `x;         \\\n"
+            "  `x = `x;",
+            format("`define LIST \\\n"
+                   "`x = `x; \\\n"
+                   "`x = `x;",
+                   Style));
+  EXPECT_EQ("`define LIST       \\\n"
+            "  `x = `x;         \\\n"
+            "  `x = `x;",
+            format("`define LIST `x=`x;`x=`x;", Style));
+  // Macro invocations.
+  verifyFormat("`x = (`x1 + `x2 + x);");
+  // Lines starting with a preprocessor directive should not be indented.
+  std::string Directives[] = {
+      "begin_keywords",
+      "celldefine",
+      "default_nettype",
+      "define",
+      "else",
+      "elsif",
+      "end_keywords",
+      "endcelldefine",
+      "endif",
+      "ifdef",
+      "ifndef",
+      "include",
+      "line",
+      "nounconnected_drive",
+      "pragma",
+      "resetall",
+      "timescale",
+      "unconnected_drive",
+      "undef",
+      "undefineall",
+  };
+  for (auto &Name : Directives) {
+    EXPECT_EQ("if (x)\n"
+              "`" +
+                  Name +
+                  "\n"
+                  "  ;",
+              format("if (x)\n"
+                     "`" +
+                         Name +
+                         "\n"
+                         ";",
+                     Style));
+  }
+  // Lines starting with a regular macro invocation should be indented as a
+  // normal line.
+  EXPECT_EQ("if (x)\n"
+            "  `x = `x;\n"
+            "`timescale 1ns / 1ps",
+            format("if (x)\n"
+                   "`x = `x;\n"
+                   "`timescale 1ns / 1ps",
+                   Style));
+  EXPECT_EQ("if (x)\n"
+            "`timescale 1ns / 1ps\n"
+            "  `x = `x;",
+            format("if (x)\n"
+                   "`timescale 1ns / 1ps\n"
+                   "`x = `x;",
+                   Style));
+  std::string NonDirectives[] = {
+      // For `__FILE__` and `__LINE__`, although the standard classifies them 
as
+      // preprocessor directives, they are used like regular macros.
+      "__FILE__", "__LINE__", "elif", "foo", "x",
+  };
+  for (auto &Name : NonDirectives) {
+    EXPECT_EQ("if (x)\n"
+              "  `" +
+                  Name + ";",
+              format("if (x)\n"
+                     "`" +
+                         Name +
+                         "\n"
+                         ";",
+                     Style));
+  }
+}
+
 } // namespace format
 } // end namespace clang


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to