Added language detection and a test. Addressed comments.

Hi djasper, klimek,

http://llvm-reviews.chandlerc.com/D2242

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D2242?vs=5799&id=5812#toc

Files:
  include/clang/Format/Format.h
  lib/Format/Format.cpp
  test/Format/language-detection.cpp
  unittests/Format/FormatTest.cpp
Index: include/clang/Format/Format.h
===================================================================
--- include/clang/Format/Format.h
+++ include/clang/Format/Format.h
@@ -30,6 +30,23 @@
 /// \brief The \c FormatStyle is used to configure the formatting to follow
 /// specific guidelines.
 struct FormatStyle {
+  /// \brief Supported languages. When stored in a configuration file, specifies
+  /// the language, that the configuration targets. When passed to the
+  /// reformat() function, enables syntax features specific for the language.
+  enum LanguageKind {
+    /// Should be used for C, C++, ObjectiveC, ObjectiveC++.
+    LK_Cpp,
+    /// Should be used for JavaScript.
+    LK_JavaScript,
+    /// When stored in a configuration file, means that configuration
+    /// may be used for all languages. Should not be passed to the reformat()
+    /// function.
+    LK_Any
+  };
+
+  /// \brief Language, this format style is targeted at.
+  LanguageKind Language;
+
   /// \brief The column limit.
   ///
   /// A column limit of \c 0 means that there is no column limit. In this case,
@@ -284,6 +301,7 @@
            IndentFunctionDeclarationAfterType ==
                R.IndentFunctionDeclarationAfterType &&
            IndentWidth == R.IndentWidth &&
+           Language == R.Language &&
            MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep &&
            NamespaceIndentation == R.NamespaceIndentation &&
            ObjCSpaceBeforeProtocolList == R.ObjCSpaceBeforeProtocolList &&
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -34,6 +34,17 @@
 namespace llvm {
 namespace yaml {
 template <>
+struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageKind> {
+  static void enumeration(IO &IO,
+                          clang::format::FormatStyle::LanguageKind &Value) {
+    IO.enumCase(Value, "Cpp", clang::format::FormatStyle::LK_Cpp);
+    IO.enumCase(Value, "JavaScript",
+                clang::format::FormatStyle::LK_JavaScript);
+    IO.enumCase(Value, "Any", clang::format::FormatStyle::LK_Any);
+  }
+};
+
+template <>
 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
   static void enumeration(IO &IO,
                           clang::format::FormatStyle::LanguageStandard &Value) {
@@ -106,6 +117,7 @@
         }
     }
 
+    IO.mapOptional("Language", Style.Language);
     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
     IO.mapOptional("ConstructorInitializerIndentWidth",
                    Style.ConstructorInitializerIndentWidth);
@@ -173,6 +185,26 @@
     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
   }
 };
+
+// Allows to read vector<FormatStyle> using the 0th element as a default value.
+// Elements will be written or read starting from the 1st element.
+// When writing, the 0th element is ignored.
+// When reading, keys, that are not present in the serialized form, will be
+// copied from the 0th element of the vector.
+template <>
+struct DocumentListTraits<std::vector<clang::format::FormatStyle> > {
+  static size_t size(IO &io, std::vector<clang::format::FormatStyle> &Seq) {
+    return Seq.size() - 1;
+  }
+  static clang::format::FormatStyle &
+  element(IO &io, std::vector<clang::format::FormatStyle> &Seq, size_t Index) {
+    if (Index + 2 > Seq.size()) {
+      clang::format::FormatStyle Template = Seq[0];
+      Seq.resize(Index + 2, Template);
+    }
+    return Seq[Index + 1];
+  }
+};
 }
 }
 
@@ -188,6 +220,7 @@
 
 FormatStyle getLLVMStyle() {
   FormatStyle LLVMStyle;
+  LLVMStyle.Language= FormatStyle::LK_Cpp;
   LLVMStyle.AccessModifierOffset = -2;
   LLVMStyle.AlignEscapedNewlinesLeft = false;
   LLVMStyle.AlignTrailingComments = true;
@@ -236,6 +269,7 @@
 
 FormatStyle getGoogleStyle() {
   FormatStyle GoogleStyle;
+  GoogleStyle.Language= FormatStyle::LK_Cpp;
   GoogleStyle.AccessModifierOffset = -1;
   GoogleStyle.AlignEscapedNewlinesLeft = true;
   GoogleStyle.AlignTrailingComments = true;
@@ -339,9 +373,20 @@
 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
   if (Text.trim().empty())
     return llvm::make_error_code(llvm::errc::invalid_argument);
+  std::vector<FormatStyle> Styles;
+  Styles.push_back(*Style);
   llvm::yaml::Input Input(Text);
-  Input >> *Style;
-  return Input.error();
+  Input >> Styles;
+  for (unsigned i = 1; i < Styles.size(); ++i) {
+    if ((Styles[i].Language== Styles[0].Language) ||
+        (Styles[i].Language == FormatStyle::LK_Any)) {
+      // FIXME: Error out on multiple configurations for the same language.
+      *Style = Styles[i];
+      Style->Language = Styles[0].Language;
+      return Input.error();
+    }
+  }
+  return llvm::make_error_code(llvm::errc::not_supported);
 }
 
 std::string configurationAsText(const FormatStyle &Style) {
@@ -979,24 +1024,44 @@
 
 private:
   void tryMergePreviousTokens() {
-    tryMerge_TMacro() || tryMergeJavaScriptIdentityOperators();
+    if (tryMerge_TMacro())
+      return;
+
+    if (Style.Language == FormatStyle::LK_JavaScript) {
+      static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
+      static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
+      static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
+                                               tok::greaterequal };
+      // FIXME: We probably need to change token type to mimic operator with the
+      // correct priority.
+      if (tryMergeTokens(JSIdentity))
+        return;
+      if (tryMergeTokens(JSNotIdentity))
+        return;
+      if (tryMergeTokens(JSShiftEqual))
+        return;
+    }
   }
 
-  bool tryMergeJavaScriptIdentityOperators() {
-    if (Tokens.size() < 2)
+  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
+    if (Tokens.size() < Kinds.size())
       return false;
-    FormatToken &First = *Tokens[Tokens.size() - 2];
-    if (!First.isOneOf(tok::exclaimequal, tok::equalequal))
-      return false;
-    FormatToken &Second = *Tokens.back();
-    if (!Second.is(tok::equal))
-      return false;
-    if (Second.WhitespaceRange.getBegin() != Second.WhitespaceRange.getEnd())
+
+    SmallVectorImpl<FormatToken *>::const_iterator First =
+        Tokens.end() - Kinds.size();
+    if (!First[0]->is(Kinds[0]))
       return false;
-    First.TokenText =
-        StringRef(First.TokenText.data(), First.TokenText.size() + 1);
-    First.ColumnWidth += 1;
-    Tokens.pop_back();
+    unsigned AddLength = 0;
+    for (unsigned i = 1; i < Kinds.size(); ++i) {
+      if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
+                                         First[i]->WhitespaceRange.getEnd())
+        return false;
+      AddLength += First[i]->TokenText.size();
+    }
+    Tokens.resize(Tokens.size() - Kinds.size() + 1);
+    First[0]->TokenText = StringRef(First[0]->TokenText.data(),
+                                    First[0]->TokenText.size() + AddLength);
+    First[0]->ColumnWidth += AddLength;
     return true;
   }
 
@@ -1194,6 +1259,17 @@
   }
 };
 
+static const char *getLanguageName(FormatStyle::LanguageKind Language) {
+  switch (Language) {
+  case FormatStyle::LK_Cpp:
+    return "C++";
+  case FormatStyle::LK_JavaScript:
+    return "JavaScript";
+  default:
+    return "Any";
+  }
+}
+
 class Formatter : public UnwrappedLineConsumer {
 public:
   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
@@ -1206,6 +1282,9 @@
                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
                                                                : "unknown")
                        << "\n");
+    assert(Style.Language != FormatStyle::LK_Any);
+    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
+                       << "\n");
   }
 
   tooling::Replacements format() {
@@ -1495,11 +1574,26 @@
     "parameters, e.g.:\n"
     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
 
+static void fillLanguageByFileName(StringRef FileName, FormatStyle *Style) {
+  if (FileName.endswith_lower(".c") || FileName.endswith_lower(".h") ||
+      FileName.endswith_lower(".cpp") || FileName.endswith_lower(".hpp") ||
+      FileName.endswith_lower(".cc") || FileName.endswith_lower(".hh") ||
+      FileName.endswith_lower(".cxx") || FileName.endswith_lower(".hxx") ||
+      FileName.endswith_lower(".m") || FileName.endswith_lower(".mm")) {
+    Style->Language = FormatStyle::LK_Cpp;
+  }
+  if (FileName.endswith_lower(".js")) {
+    Style->Language = FormatStyle::LK_JavaScript;
+  }
+}
+
 FormatStyle getStyle(StringRef StyleName, StringRef FileName) {
+  // FIXME: Configure fallback style from outside (add a command line option).
   // Fallback style in case the rest of this function can't determine a style.
   StringRef FallbackStyle = "LLVM";
   FormatStyle Style;
   getPredefinedStyle(FallbackStyle, &Style);
+  fillLanguageByFileName(FileName, &Style);
 
   if (StyleName.startswith("{")) {
     // Parse YAML/JSON style from the command line.
@@ -1514,9 +1608,11 @@
     if (!getPredefinedStyle(StyleName, &Style))
       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
                    << " style\n";
+    fillLanguageByFileName(FileName, &Style);
     return Style;
   }
 
+  SmallString<128> UnsuitableConfigFiles;
   SmallString<128> Path(FileName);
   llvm::sys::fs::make_absolute(Path);
   for (StringRef Directory = Path; !Directory.empty();
@@ -1548,16 +1644,27 @@
         continue;
       }
       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
-        llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
-                     << "\n";
+        if (ec == llvm::errc::not_supported) {
+          if (!UnsuitableConfigFiles.empty())
+            UnsuitableConfigFiles.append(", ");
+          UnsuitableConfigFiles.append(ConfigFile);
+        } else {
+          llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
+                       << "\n";
+        }
         continue;
       }
       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
       return Style;
     }
   }
   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
                << " style\n";
+  if (!UnsuitableConfigFiles.empty()) {
+    llvm::errs() << "Configuration file(s) do(es) not support "
+                 << getLanguageName(Style.Language) << ": "
+                 << UnsuitableConfigFiles << "\n";
+  }
   return Style;
 }
 
Index: test/Format/language-detection.cpp
===================================================================
--- /dev/null
+++ test/Format/language-detection.cpp
@@ -0,0 +1,7 @@
+// RUN: grep -Ev "// *[A-Z0-9_]+:" %s > %t.js
+// RUN: grep -Ev "// *[A-Z0-9_]+:" %s > %t.cpp
+// RUN: clang-format -style=llvm %t.js | FileCheck -strict-whitespace -check-prefix=CHECK1 %s
+// RUN: clang-format -style=llvm %t.cpp | FileCheck -strict-whitespace -check-prefix=CHECK2 %s
+// CHECK1: {{^a >>>= b;$}}
+// CHECK2: {{^a >> >= b;$}}
+a >>>= b;
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -6913,6 +6913,61 @@
   CHECK_PARSE("Standard: C++11", Standard, FormatStyle::LS_Cpp11);
   CHECK_PARSE("Standard: Auto", Standard, FormatStyle::LS_Auto);
 
+  Style.Language= FormatStyle::LK_Cpp;
+  CHECK_PARSE("Language: Cpp\n"
+              "IndentWidth: 12", IndentWidth, 12u);
+  EXPECT_EQ(parseConfiguration("Language: JavaScript\n"
+                               "IndentWidth: 34",
+                               &Style),
+            llvm::errc::not_supported);
+  EXPECT_EQ(Style.IndentWidth, 12u);
+  CHECK_PARSE("Language: Any\n"
+              "IndentWidth: 56", IndentWidth, 56u);
+  EXPECT_EQ(Style.Language, FormatStyle::LK_Cpp);
+
+  Style.Language = FormatStyle::LK_JavaScript;
+  CHECK_PARSE("Language: JavaScript\n"
+              "IndentWidth: 12", IndentWidth, 12u);
+  EXPECT_EQ(parseConfiguration("Language: Cpp\n"
+                               "IndentWidth: 34",
+                               &Style),
+            llvm::errc::not_supported);
+  EXPECT_EQ(Style.IndentWidth, 12u);
+  CHECK_PARSE("Language: Any\n"
+              "IndentWidth: 56", IndentWidth, 56u);
+  EXPECT_EQ(Style.Language, FormatStyle::LK_JavaScript);
+
+  CHECK_PARSE("---\n"
+              "Language: JavaScript\n"
+              "IndentWidth: 12\n"
+              "---\n"
+              "Language: Cpp\n"
+              "IndentWidth: 34\n"
+              "...\n",
+              IndentWidth, 12u);
+
+  Style.Language = FormatStyle::LK_Cpp;
+  CHECK_PARSE("---\n"
+              "Language: JavaScript\n"
+              "IndentWidth: 12\n"
+              "---\n"
+              "Language: Cpp\n"
+              "IndentWidth: 34\n"
+              "...\n",
+              IndentWidth, 34u);
+  CHECK_PARSE("---\n"
+              "Language: JavaScript\n"
+              "IndentWidth: 56\n"
+              "---\n"
+              "Language: Any\n"
+              "IndentWidth: 78\n"
+              "...\n",
+              IndentWidth, 78u);
+
+  // FIXME: Error out on multiple configs for the same language.
+
+  EXPECT_EQ(Style.Language, FormatStyle::LK_Cpp);
+
   Style.UseTab = FormatStyle::UT_ForIndentation;
   CHECK_PARSE("UseTab: false", UseTab, FormatStyle::UT_Never);
   CHECK_PARSE("UseTab: true", UseTab, FormatStyle::UT_Always);
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to