MyDeveloperDay updated this revision to Diff 352744.
MyDeveloperDay added a comment.

I have reworked this patch to utilise the fact that clang-format can format a 
JSON object in javascript reasonably well.

This technique adds for JSON files only a replacement to the front of the JSON 
transforming:

  {
      "A": 1,
      "B": [ 2,3 ]
  }

into

  x = {
      "A": 1,
      "B": [ 2,3 ]
  }

This is then parsed by clang-format the a more traditional sense, allowing a 
similar technique as would be used when formatting the code, (but with some 
JSON specific rules, to make it follow primarily the style used by 
https://github.com/kapilratnani/JSON-Viewer (Notepad++ plugin, which at least 
I've used extensively)

After formatter, a subsequent replacement is merged to remove the x = from the 
front of the json, rendering it back to its original form.

  {
      "A": 1,
      "B": [ 2,3 ]
  }

I prefer to use this approach rather than our Support/JSON libraries which 
aren't really setup for formatting options, this give us greater flexibility 
later on if users want custom rules from brace wrapping.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93528/new/

https://reviews.llvm.org/D93528

Files:
  clang/docs/ClangFormat.rst
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/Format.cpp
  clang/lib/Format/TokenAnnotator.cpp
  clang/tools/clang-format/ClangFormat.cpp
  clang/unittests/Format/CMakeLists.txt

Index: clang/unittests/Format/CMakeLists.txt
===================================================================
--- clang/unittests/Format/CMakeLists.txt
+++ clang/unittests/Format/CMakeLists.txt
@@ -9,6 +9,7 @@
   FormatTestCSharp.cpp
   FormatTestJS.cpp
   FormatTestJava.cpp
+  FormatTestJson.cpp
   FormatTestObjC.cpp
   FormatTestProto.cpp
   FormatTestRawStrings.cpp
Index: clang/tools/clang-format/ClangFormat.cpp
===================================================================
--- clang/tools/clang-format/ClangFormat.cpp
+++ clang/tools/clang-format/ClangFormat.cpp
@@ -411,6 +411,17 @@
   unsigned CursorPosition = Cursor;
   Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges,
                                        AssumedFileName, &CursorPosition);
+
+  // To format JSON insert a variable to trick the code into thinking its
+  // JavaScript.
+  if (FormatStyle->isJson()) {
+    auto Err = Replaces.add(tooling::Replacement(
+        tooling::Replacement(AssumedFileName, 0, 0, "x = ")));
+    if (Err) {
+      llvm::errs() << "Bad Json variable insertion\n";
+    }
+  }
+
   auto ChangedCode = tooling::applyAllReplacements(Code->getBuffer(), Replaces);
   if (!ChangedCode) {
     llvm::errs() << llvm::toString(ChangedCode.takeError()) << "\n";
@@ -506,7 +517,8 @@
   cl::SetVersionPrinter(PrintVersion);
   cl::ParseCommandLineOptions(
       argc, argv,
-      "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n"
+      "A tool to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# "
+      "code.\n\n"
       "If no arguments are specified, it formats the code from standard input\n"
       "and writes the result to the standard output.\n"
       "If <file>s are given, it reformats the files. If -i is specified\n"
Index: clang/lib/Format/TokenAnnotator.cpp
===================================================================
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -2900,6 +2900,8 @@
                                           const FormatToken &Right) {
   if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
     return true;
+  if (Style.isJson() && Left.is(tok::string_literal) && Right.is(tok::colon))
+    return false;
   if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
     return true;
   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
@@ -3221,6 +3223,9 @@
     // and "%d %d"
     if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
       return HasExistingWhitespace();
+  } else if (Style.isJson()) {
+    if (Right.is(tok::colon))
+      return false;
   } else if (Style.isCSharp()) {
     // Require spaces around '{' and  before '}' unless they appear in
     // interpolated strings. Interpolated strings are merged into a single token
@@ -3695,6 +3700,26 @@
       return true;
   }
 
+  // Basic JSON newline processing.
+  if (Style.isJson()) {
+    // Always break after a JSON record opener.
+    // {
+    // }
+    if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
+      return true;
+    // Always break after a JSON array opener.
+    // [
+    // ]
+    if (Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
+        !Right.is(tok::r_square))
+      return true;
+    // Always break afer successive entries.
+    // 1,
+    // 2
+    if (Left.is(tok::comma))
+      return true;
+  }
+
   // If the last token before a '}', ']', or ')' is a comma or a trailing
   // comment, the intention is to insert a line break after it in order to make
   // shuffling around entries easier. Import statements, especially in
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -63,6 +63,7 @@
     IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
     IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto);
     IO.enumCase(Value, "CSharp", FormatStyle::LK_CSharp);
+    IO.enumCase(Value, "Json", FormatStyle::LK_Json);
   }
 };
 
@@ -2795,6 +2796,25 @@
   if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code))
     return {tooling::Replacements(), 0};
 
+  // JSON only needs the formatting passing.
+  if (Style.isJson()) {
+    std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+    auto Env =
+        std::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn,
+                                      NextStartColumn, LastStartColumn);
+    // Perform the actual formatting pass.
+    tooling::Replacements Replaces =
+        Formatter(*Env, Style, Status).process().first;
+    // add a replacement to remove the "x = " from the result.
+    if (!Replaces.add(tooling::Replacement(FileName, 0, 4, ""))) {
+      // apply the reformatting changes and the removal of "x = ".
+      if (applyAllReplacements(Code, Replaces)) {
+        return {Replaces, 0};
+      }
+    }
+    return {tooling::Replacements(), 0};
+  }
+
   typedef std::function<std::pair<tooling::Replacements, unsigned>(
       const Environment &)>
       AnalyzerPass;
@@ -2960,6 +2980,8 @@
     return FormatStyle::LK_TableGen;
   if (FileName.endswith_lower(".cs"))
     return FormatStyle::LK_CSharp;
+  if (FileName.endswith_lower(".json"))
+    return FormatStyle::LK_Json;
   return FormatStyle::LK_Cpp;
 }
 
Index: clang/include/clang/Format/Format.h
===================================================================
--- clang/include/clang/Format/Format.h
+++ clang/include/clang/Format/Format.h
@@ -2473,6 +2473,8 @@
     LK_Java,
     /// Should be used for JavaScript.
     LK_JavaScript,
+    /// Should be used for JSON.
+    LK_Json,
     /// Should be used for Objective-C, Objective-C++.
     LK_ObjC,
     /// Should be used for Protocol Buffers
@@ -2486,6 +2488,7 @@
   };
   bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
   bool isCSharp() const { return Language == LK_CSharp; }
+  bool isJson() const { return Language == LK_Json; }
 
   /// Language, this format style is targeted at.
   LanguageKind Language;
@@ -3715,6 +3718,8 @@
     return "Java";
   case FormatStyle::LK_JavaScript:
     return "JavaScript";
+  case FormatStyle::LK_Json:
+    return "Json";
   case FormatStyle::LK_Proto:
     return "Proto";
   case FormatStyle::LK_TableGen:
Index: clang/docs/ClangFormatStyleOptions.rst
===================================================================
--- clang/docs/ClangFormatStyleOptions.rst
+++ clang/docs/ClangFormatStyleOptions.rst
@@ -2861,6 +2861,9 @@
   * ``LK_JavaScript`` (in configuration: ``JavaScript``)
     Should be used for JavaScript.
 
+  * ``LK_Json`` (in configuration: ``Json``)
+    Should be used for JSON.
+
   * ``LK_ObjC`` (in configuration: ``ObjC``)
     Should be used for Objective-C, Objective-C++.
 
Index: clang/docs/ClangFormat.rst
===================================================================
--- clang/docs/ClangFormat.rst
+++ clang/docs/ClangFormat.rst
@@ -11,12 +11,12 @@
 ===============
 
 :program:`clang-format` is located in `clang/tools/clang-format` and can be used
-to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.
+to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code.
 
 .. code-block:: console
 
   $ clang-format -help
-  OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.
+  OVERVIEW: A tool to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code.
 
   If no arguments are specified, it formats the code from standard input
   and writes the result to the standard output.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to