Re: [cfe-commits] [PATCH] Initial version of formatting library

Daniel Jasper Fri, 16 Nov 2012 11:47:48 -0800

Hi silvas, klimek, doug.gregor,

http://llvm-reviews.chandlerc.com/D80


CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D80?vs=333&id=337#toc

Files:
  include/clang/Format/Format.h
  lib/CMakeLists.txt
  lib/Format/CMakeLists.txt
  lib/Format/Format.cpp
  lib/Format/Makefile
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/UnwrappedLineParser.h
  lib/Makefile
  unittests/CMakeLists.txt
  unittests/Format/CMakeLists.txt
  unittests/Format/FormatTest.cpp
  unittests/Format/Makefile

Index: include/clang/Format/Format.h
===================================================================
--- /dev/null
+++ include/clang/Format/Format.h
@@ -0,0 +1,47 @@
+//===--- Format.h - Format C++ code -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  \file
+//  Various functions to configurably format source code.
+//
+//  This is EXPERIMENTAL code under heavy development. It is not in a state yet,
+//  where it can be used to format real code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_FORMAT_H_
+#define LLVM_CLANG_FORMAT_FORMAT_H
+
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Tooling/Refactoring.h"
+
+namespace clang {
+
+class Lexer;
+class SourceManager;
+
+namespace format {
+
+/// \brief A character range of source code.
+struct CodeRange {
+  CodeRange(unsigned Offset, unsigned Length)
+    : Offset(Offset), Length(Length) {}
+
+  unsigned Offset;
+  unsigned Length;
+};
+
+/// \brief Reformats the given Ranges in the token stream coming out of \c Lex.
+tooling::Replacements reformat(Lexer &Lex, SourceManager &SourceMgr,
+                               std::vector<CodeRange> Ranges);
+
+} // end namespace format
+} // end namespace clang
+
+#endif // LLVM_CLANG_FORMAT_FORMAT_H
Index: lib/CMakeLists.txt
===================================================================
--- lib/CMakeLists.txt
+++ lib/CMakeLists.txt
@@ -16,3 +16,4 @@
 add_subdirectory(FrontendTool)
 add_subdirectory(Tooling)
 add_subdirectory(StaticAnalyzer)
+add_subdirectory(Format)
Index: lib/Format/CMakeLists.txt
===================================================================
--- /dev/null
+++ lib/Format/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_clang_library(clangFormat
+  UnwrappedLineParser.cpp
+  Format.cpp
+  )
+
+add_dependencies(clangFormat
+  ClangAttrClasses
+  ClangAttrList
+  ClangDeclNodes
+  ClangDiagnosticCommon
+  ClangDiagnosticFrontend
+  ClangStmtNodes
+  )
+
+target_link_libraries(clangFormat
+  clangBasic
+  clangFrontend
+  clangAST
+  clangASTMatchers
+  clangRewriteCore
+  clangRewriteFrontend
+  )
Index: lib/Format/Format.cpp
===================================================================
--- /dev/null
+++ lib/Format/Format.cpp
@@ -0,0 +1,252 @@
+//===--- Format.cpp - Format C++ code -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  \file
+//  \brief This file implements functions declared in Format.h. This will be
+//  split into separate files as we go.
+//
+//  This is EXPERIMENTAL code under heavy development. It is not in a state yet,
+//  where it can be used to format real code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Format/Format.h"
+
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+
+#include "UnwrappedLineParser.h"
+
+namespace clang {
+namespace format {
+
+using llvm::MutableArrayRef;
+
+class UnwrappedLineFormatter {
+public:
+  UnwrappedLineFormatter(SourceManager &SourceMgr,
+                         const UnwrappedLine &Line,
+                         tooling::Replacements &Replaces)
+      : SourceMgr(SourceMgr), Line(Line), Replaces(Replaces) {}
+
+  void format() {
+    addNewline(Line.Tokens[0], Line.Level);
+    count = 0;
+    IndentState State;
+    State.ParenLevel = 0;
+    State.Column = Line.Level * 2 + Line.Tokens[0].Tok.getLength();
+
+    State.UsedIndent.push_back(Line.Level * 2);
+    State.Indent.push_back(Line.Level * 2 + 4);
+
+    // Start iterating at 1 as we have correctly formatted of Token #0 above.
+    for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) {
+      bool InsertNewLine = Line.Tokens[i].NewlinesBefore > 0;
+      if (!InsertNewLine) {
+        unsigned NoBreak = numLines(State, false, i + 1,
+                               Line.Tokens.size()-1, 100000);
+        unsigned Break = numLines(State, true, i + 1, Line.Tokens.size()-1, 100000);
+        InsertNewLine = Break < NoBreak;
+      }
+      addToken(i, InsertNewLine, false, State);
+    }
+  }
+
+private:
+  /// \brief The current state when indenting a unwrapped line.
+  ///
+  /// As the indenting tries different combinations this is copied by value.
+  struct IndentState {
+    /// \brief The current parenthesis level, i.e. the number of opening minus
+    /// the number of closing parenthesis left of the current position.
+    unsigned ParenLevel;
+
+    /// \brief The number of used columns in the current line.
+    unsigned Column;
+
+    /// \brief The position to which a specific parenthesis level needs to be
+    /// indented.
+    std::vector<unsigned> Indent;
+
+    /// \brief The indents actively used by a parenthesis level.
+    ///
+    /// This is used to prevent situations like:
+    /// \code
+    ///   callA(callB(
+    ///       callC()),
+    ///         callD()).
+    /// \endcode
+    /// We might (configurably) not want callC() to be indented less callD()
+    /// as it has a higher indent level.
+    std::vector<unsigned> UsedIndent;
+  };
+
+  /// Append the token at \c Index to \c State.
+  void addToken(unsigned Index, bool Newline, bool DryRun, IndentState &State) {
+    if (Line.Tokens[Index].Tok.getKind() == tok::l_paren) {
+      State.UsedIndent.push_back(State.UsedIndent.back());
+      State.Indent.push_back(State.UsedIndent.back() + 4);
+      ++State.ParenLevel;
+    }
+    if (Newline) {
+      if (!DryRun)
+        replaceWhitespace(Line.Tokens[Index], 1,
+                          State.Indent[State.ParenLevel]);
+      State.Column = State.Indent[State.ParenLevel] +
+          Line.Tokens[Index].Tok.getLength();
+      State.UsedIndent[State.ParenLevel] = State.Indent[State.ParenLevel];
+    } else {
+      bool Space = spaceRequiredBetween(Line.Tokens[Index - 1].Tok,
+                                        Line.Tokens[Index].Tok);
+      //if (Line.Tokens[Index].NewlinesBefore == 0)
+      //  Space = Line.Tokens[Index].WhiteSpaceLength > 0;
+      if (!DryRun)
+        replaceWhitespace(Line.Tokens[Index], 0, Space ? 1 : 0);
+      if (Line.Tokens[Index - 1].Tok.getKind() == tok::l_paren)
+        State.Indent[State.ParenLevel] = State.Column;
+      State.Column += Line.Tokens[Index].Tok.getLength() + (Space ? 1 : 0);
+    }
+
+    if (Line.Tokens[Index].Tok.getKind() == tok::r_paren) {
+      // FIXME: We should be able to handle this kind of code.
+      assert(State.ParenLevel != 0 && "Unexpected ')'.");
+      --State.ParenLevel;
+      State.Indent.pop_back();
+    }
+  }
+
+  bool canBreakAfter(Token tok) {
+    return tok.getKind() == tok::comma || tok.getKind() == tok::semi ||
+        tok.getKind() == tok::l_paren;
+  }
+
+  /// \brief Calculate the number of lines needed to format the remaining part
+  /// of the unwrapped line.
+  ///
+  /// Assumes the formatting of the \c Token until \p EndIndex has led to
+  /// the \c IndentState \p State. If \p NewLine is set, a new line will be
+  /// added after the previous token.
+  ///
+  /// \param EndIndex is the last token belonging to the unwrapped line.
+  ///
+  /// \param StopAt is used for optimization. If we can determine that we'll
+  /// definitely need at least \p StopAt additional lines, we already know of a
+  /// better solution.
+  unsigned numLines(IndentState State, bool NewLine, unsigned Index,
+                    unsigned EndIndex, unsigned StopAt) {
+    count++;
+
+    // We are at the end of the unwrapped line, so we don't need any more lines.
+    if (Index > EndIndex)
+      return 0;
+
+    addToken(Index - 1, NewLine, true, State);
+    if (NewLine)
+      --StopAt;
+
+    // Exceeding 80 columns is bad.
+    if (State.Column > 80)
+      return 10000;
+
+    if (StopAt < 1)
+      return 10000;
+
+    unsigned NoBreak = numLines(State, false, Index + 1, EndIndex, StopAt);
+    if (!canBreakAfter(Line.Tokens[Index - 1].Tok))
+      return NoBreak + (NewLine ? 1 : 0);
+    unsigned Break = numLines(State, true, Index + 1, EndIndex,
+                         std::min(StopAt, NoBreak));
+    return std::min(NoBreak, Break) + (NewLine ? 1 : 0);
+  }
+
+  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
+  /// each \c FormatToken.
+  void replaceWhitespace(const FormatToken &Tok, unsigned NewLines,
+                         unsigned Spaces) {
+    Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
+                                         Tok.WhiteSpaceLength,
+                                         std::string(NewLines, '\n') +
+                                         std::string(Spaces, ' ')));
+  }
+
+  bool isIfForOrWhile(Token Tok) {
+    if (Tok.getKind() != tok::raw_identifier)
+      return false;
+    StringRef Data(SourceMgr.getCharacterData(Tok.getLocation()),
+                   Tok.getLength());
+    return Data == "for" || Data == "while" || Data == "if";
+  }
+
+  bool spaceRequiredBetween(Token Left, Token Right) {
+
+    if (Left.is(tok::period) || Right.is(tok::period))
+      return false;
+    if (Left.is(tok::colon) || Right.is(tok::colon))
+      return false;
+    if (Left.is(tok::plusplus) && Right.is(tok::raw_identifier))
+      return false;
+    if (Left.is(tok::l_paren))
+      return false;
+    if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
+      return false;
+    if (Right.is(tok::l_paren)) {
+      return isIfForOrWhile(Left);
+    }
+    return true;
+  }
+
+  /// \brief Add a new line and the required indent before \p Token.
+  void addNewline(const FormatToken &Token, unsigned Level) {
+      //unsigned Index, unsigned Level) {
+    if (Token.WhiteSpaceStart.isValid()) {
+      unsigned Newlines = Token.NewlinesBefore;
+      unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
+      if (Newlines == 0 && Offset != 0)
+        Newlines = 1;
+      replaceWhitespace(Token, Newlines, Level * 2);
+    }
+  }
+
+  SourceManager &SourceMgr;
+  const UnwrappedLine &Line;
+  tooling::Replacements &Replaces;
+  unsigned int count;
+};
+
+class Formatter : public UnwrappedLineConsumer {
+public:
+  Formatter(Lexer &Lex, SourceManager &SourceMgr,
+            const std::vector<CodeRange> &Ranges)
+      : Lex(Lex), SourceMgr(SourceMgr) {}
+
+  tooling::Replacements format() {
+    UnwrappedLineParser Parser(Lex, SourceMgr, *this);
+    Parser.parse();
+    return Replaces;
+  }
+
+private:
+  virtual void formatUnwrappedLine(const UnwrappedLine &TheLine) {
+    UnwrappedLineFormatter Formatter(SourceMgr, TheLine, Replaces);
+    Formatter.format();
+  }
+
+  Lexer &Lex;
+  SourceManager &SourceMgr;
+  tooling::Replacements Replaces;
+};
+
+tooling::Replacements reformat(Lexer &Lex, SourceManager &SourceMgr,
+                               std::vector<CodeRange> Ranges) {
+  Formatter formatter(Lex, SourceMgr, Ranges);
+  return formatter.format();
+}
+
+}  // namespace format
+}  // namespace clang
Index: lib/Format/Makefile
===================================================================
--- /dev/null
+++ lib/Format/Makefile
@@ -0,0 +1,13 @@
+##===- clang/lib/Tooling/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+CLANG_LEVEL := ../..
+LIBRARYNAME := clangTooling
+
+include $(CLANG_LEVEL)/Makefile
Index: lib/Format/UnwrappedLineParser.cpp
===================================================================
--- /dev/null
+++ lib/Format/UnwrappedLineParser.cpp
@@ -0,0 +1,204 @@
+//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  \file
+//  \brief This file contains the implementation of the UnwrappedLineParser,
+//  which turns a stream of tokens into UnwrappedLines.
+//
+//  This is EXPERIMENTAL code under heavy development. It is not in a state yet,
+//  where it can be used to format real code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "UnwrappedLineParser.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace format {
+
+UnwrappedLineParser::UnwrappedLineParser(Lexer &Lex, SourceManager &SourceMgr,
+                                         UnwrappedLineConsumer &Callback)
+    : Lex(Lex), SourceMgr(SourceMgr), Callback(Callback) {
+  Lex.SetKeepWhitespaceMode(true);
+}
+
+void UnwrappedLineParser::parse() {
+  parseToken();
+  parseLevel();
+}
+
+void UnwrappedLineParser::parseLevel() {
+  do {
+    switch(FormatTok.Tok.getKind()) {
+      case tok::hash:
+        parsePPDirective();
+        break;
+      case tok::comment:
+        parseComment();
+        break;
+      case tok::l_brace:
+        parseBlock();
+        addUnwrappedLine();
+        break;
+      case tok::r_brace:
+        return;
+      default:
+        parseStatement();
+        break;
+    }
+  } while (!eof());
+}
+
+void UnwrappedLineParser::parseBlock() {
+  nextToken();
+  addUnwrappedLine();
+  ++Line.Level;
+  parseLevel();
+  --Line.Level;
+  if (FormatTok.Tok.getKind() != tok::r_brace) abort();
+  nextToken();
+  if (FormatTok.Tok.getKind() == tok::semi)
+    nextToken();
+}
+
+void UnwrappedLineParser::parsePPDirective() {
+  while (!eof()) {
+    nextToken();
+    if (FormatTok.NewlinesBefore > 0) return;
+  }
+}
+
+void UnwrappedLineParser::parseComment() {
+  while (!eof()) {
+    nextToken();
+    if (FormatTok.NewlinesBefore > 0) {
+      addUnwrappedLine();
+      return;
+    }
+  }
+}
+
+void UnwrappedLineParser::parseStatement() {
+  do {
+    switch (FormatTok.Tok.getKind()) {
+      case tok::semi:
+        nextToken();
+        addUnwrappedLine();
+        return;
+      case tok::l_paren:
+        parseParens();
+        break;
+      case tok::l_brace:
+        parseBlock();
+        addUnwrappedLine();
+        return;
+      case tok::raw_identifier:
+        if (tokenText() == "if") {
+          parseIfThenElse();
+          return;
+        }
+      default:
+        nextToken();
+        break;
+    }
+  } while (!eof());
+} 
+
+void UnwrappedLineParser::parseParens() {
+  assert(FormatTok.Tok.getKind() == tok::l_paren && "'(' expected.");
+  nextToken();
+  do {
+    switch (FormatTok.Tok.getKind()) {
+      case tok::l_paren:
+        parseParens();
+        break;
+      case tok::r_paren:
+        nextToken();
+        return;
+      default:
+        nextToken();
+        break;
+    }
+  } while (!eof());
+}
+
+void UnwrappedLineParser::parseIfThenElse() {
+  assert(FormatTok.Tok.getKind() == tok::raw_identifier &&
+         "Identifier expected");
+  nextToken();
+  parseParens();
+  bool NeedsUnwrappedLine = false;
+  if (FormatTok.Tok.getKind() == tok::l_brace) {
+    parseBlock();
+    NeedsUnwrappedLine = true;
+  } else {
+    addUnwrappedLine();
+    ++Line.Level;
+    parseStatement();
+    --Line.Level;
+  }
+  if (FormatTok.Tok.is(tok::raw_identifier) && tokenText() == "else") {
+    nextToken();
+    if (FormatTok.Tok.getKind() == tok::l_brace) {
+      parseBlock();
+      addUnwrappedLine();
+    } else {
+      addUnwrappedLine();
+      ++Line.Level;
+      parseStatement();
+      --Line.Level;
+    }
+  } else if (NeedsUnwrappedLine) {
+    addUnwrappedLine();
+  }
+}
+
+void UnwrappedLineParser::addUnwrappedLine() {
+  Callback.formatUnwrappedLine(Line);
+  Line.Tokens.clear();
+}
+
+bool UnwrappedLineParser::eof() const {
+  return FormatTok.Tok.getKind() == tok::eof;
+}
+
+void UnwrappedLineParser::nextToken() {
+  if (eof())
+    return;
+  Line.Tokens.push_back(FormatTok);
+  parseToken();
+}
+
+void UnwrappedLineParser::parseToken() {
+  FormatTok = FormatToken();
+  Lex.LexFromRawLexer(FormatTok.Tok);
+  FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
+
+  // Consume and record whitespace until we find a significant
+  // token.
+  while (FormatTok.Tok.getKind() == tok::unknown) {
+    StringRef Data = tokenText();
+    if (std::find(Data.begin(), Data.end(), '\n') != Data.end())
+      ++FormatTok.NewlinesBefore;
+    FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
+
+    if (eof()) return;
+    Lex.LexFromRawLexer(FormatTok.Tok);
+  }
+}
+
+StringRef UnwrappedLineParser::tokenText() {
+  StringRef Data(SourceMgr.getCharacterData(FormatTok.Tok.getLocation()),
+                 FormatTok.Tok.getLength());
+  return Data;
+}
+
+} // end namespace format
+} // end namespace clang
Index: lib/Format/UnwrappedLineParser.h
===================================================================
--- /dev/null
+++ lib/Format/UnwrappedLineParser.h
@@ -0,0 +1,108 @@
+//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  \file
+//  \brief This file contains the declaration of the UnwrappedLineParser,
+//  which turns a stream of tokens into UnwrappedLines.
+//
+//  This is EXPERIMENTAL code under heavy development. It is not in a state yet,
+//  where it can be used to format real code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
+#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
+
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+
+namespace clang {
+namespace format {
+
+/// \brief A wrapper around a \c Token storing information about the
+/// whitespace characters preceeding it.
+struct FormatToken {
+  FormatToken() : NewlinesBefore(0), WhiteSpaceLength(0) {}
+
+  /// \brief The \c Token.
+  Token Tok;
+
+  /// \brief The number of newlines immediately before the \c Token.
+  ///
+  /// This can be used to determine what the user wrote in the original code
+  /// and thereby e.g. leave an empty line between two function definitions.
+  unsigned NewlinesBefore;
+
+  /// \brief The location of the start of the whitespace immediately preceeding
+  /// the \c Token.
+  ///
+  /// Used together with \c WhiteSpaceLength to create a \c Replacement.
+  SourceLocation WhiteSpaceStart;
+
+  /// \brief The length in characters of the whitespace immediately preceeding
+  /// the \c Token.
+  unsigned WhiteSpaceLength;
+};
+
+/// \brief An unwrapped line is a sequence of \c Token, that we would like to
+/// put on a single line if there was no column limit.
+///
+/// This is used as a main interface between the \c UnwrappedLineParser and the
+/// \c UnwrappedLineFormatter. The key property is that changing the formatting
+/// within an unwrapped line does not affect any other unwrapped lines.
+struct UnwrappedLine {
+  UnwrappedLine() : Level(0) {}
+
+  /// \brief The \c Token comprising this \c UnwrappedLine.
+  SmallVector<FormatToken, 16> Tokens;
+
+  /// \brief The indent level of the \c UnwrappedLine.
+  unsigned Level;
+};
+
+class UnwrappedLineConsumer {
+public:
+  virtual void formatUnwrappedLine(const UnwrappedLine &Line) = 0;
+};
+
+class UnwrappedLineParser {
+public:
+  UnwrappedLineParser(Lexer &Lex, SourceManager &SourceMgr,
+                     UnwrappedLineConsumer &Callback);
+
+  void parse();
+
+private:
+  void parseLevel();
+  void parseBlock();
+  void parsePPDirective();
+  void parseComment();
+  void parseStatement();
+  void parseParens();
+  void parseIfThenElse();
+  void addUnwrappedLine();
+  bool eof() const;
+  void nextToken();
+  void parseToken();
+
+  /// Returns the text of \c FormatTok.
+  StringRef tokenText();
+
+  UnwrappedLine Line;
+  FormatToken FormatTok;
+
+  Lexer &Lex;
+  SourceManager &SourceMgr;
+  UnwrappedLineConsumer &Callback;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
Index: lib/Makefile
===================================================================
--- lib/Makefile
+++ lib/Makefile
@@ -10,7 +10,7 @@
 
 PARALLEL_DIRS = Headers Basic Lex Parse AST ASTMatchers Sema CodeGen Analysis \
                 StaticAnalyzer Edit Rewrite ARCMigrate Serialization Frontend \
-                FrontendTool Tooling Driver
+                FrontendTool Tooling Driver Format
 
 include $(CLANG_LEVEL)/Makefile
 
Index: unittests/CMakeLists.txt
===================================================================
--- unittests/CMakeLists.txt
+++ unittests/CMakeLists.txt
@@ -15,3 +15,4 @@
 add_subdirectory(Lex)
 add_subdirectory(Frontend)
 add_subdirectory(Tooling)
+add_subdirectory(Format)
Index: unittests/Format/CMakeLists.txt
===================================================================
--- /dev/null
+++ unittests/Format/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  asmparser
+  support
+  mc
+  )
+
+add_clang_unittest(FormatTests
+  FormatTest.cpp
+  )
+
+target_link_libraries(FormatTests
+  clangAST
+  clangFormat
+  clangTooling
+  clangRewriteCore
+  )
Index: unittests/Format/FormatTest.cpp
===================================================================
--- /dev/null
+++ unittests/Format/FormatTest.cpp
@@ -0,0 +1,162 @@
+//===- unittest/Format/FormatTest.cpp - Formatting unit tests -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../Tooling/RewriterTestContext.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Format/Format.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+
+class FormatTest : public ::testing::Test {
+protected:
+  std::string format(llvm::StringRef Code, unsigned offset, unsigned length) {
+    RewriterTestContext Context;
+    FileID ID = Context.createInMemoryFile("input.cc", Code);
+    std::vector<CodeRange> Ranges(1, CodeRange(offset, length));
+    Lexer Lex(ID, Context.Sources.getBuffer(ID), Context.Sources,
+              LangOptions());
+    tooling::Replacements Replace = reformat(Lex, Context.Sources, Ranges);
+    EXPECT_TRUE(applyAllReplacements(Replace, Context.Rewrite));
+    //llvm::outs() << Context.getRewrittenText(ID) << "\n";
+    return Context.getRewrittenText(ID);
+  }
+};
+
+TEST_F(FormatTest, DoesNotChangeCorrectlyFormatedCode) {
+  EXPECT_EQ(";", format(";", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsGlobalStatementsAt0) {
+  EXPECT_EQ("int i;", format("  int i;", 0, 1));
+  EXPECT_EQ("\nint i;", format(" \n\t \r  int i;", 0, 1));
+  EXPECT_EQ("int i;\nint j;", format("    int i; int j;", 0, 1));
+  EXPECT_EQ("int i;\nint j;", format("    int i;\n  int j;", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsUnwrappedLinesAtFirstFormat) {
+  EXPECT_EQ("int\n    i;", format("int\ni;", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsNestedBlockStatements) {
+  EXPECT_EQ("{\n  {\n    {\n    }\n  }\n}", format("{{{}}}", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsForLoop) {
+  EXPECT_EQ("for (int i = 0; i < 10; ++i);",
+            format("for(int i=0;i<10;++i);", 0 , 1));
+  EXPECT_EQ("for (int i = 0;\n     i < 10;\n     ++i);",
+            format("for(int i=0;\ni<10;\n++i);", 0 , 1));
+}
+
+TEST_F(FormatTest, FormatsWhileLoop) {
+  EXPECT_EQ("while (true) {\n}", format("while(true){}", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsNestedCall) {
+  EXPECT_EQ("Method(1,\n"
+            "       2(\n"
+            "           3));",
+            format("Method(1,\n2(\n3));", 0, 1));
+  EXPECT_EQ("Method(1(2,\n"
+            "         3()));", format("Method(1(2,\n3()));", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsAwesomeMethodCall) {
+  EXPECT_EQ(
+      "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod(\n"
+      "    parameter, parameter, parameter)), SecondLongCall(some_parameter));",
+      format(
+          "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod(\n"
+          "parameter , parameter, parameter)), SecondLongCall("
+          "some_parameter) );", 0, 1));
+  EXPECT_EQ(
+      "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod(\n"
+      "    parameter, parameter, parameter)), SecondLongCall(some_parameter));",
+      format(
+          "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod("
+          "parameter,parameter,parameter)),SecondLongCall("
+          "some_parameter) );", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsFunctionDefinition) {
+  EXPECT_EQ(
+      "void f(int a, int b, int c, int d, int e, int f, int g,"
+      " int h, int j, int f,\n       int c, int ddddddddddddd) {\n}",
+      format("void f(int a, int b, int c, int d, int e, int f, int g,"
+        "int h, int j, int f, int c, int ddddddddddddd) {}", 0, 1));
+}
+
+TEST_F(FormatTest, FormatIfWithoutCompountStatement) {
+  EXPECT_EQ(
+      "if (true)\n  f();\ng();",
+      format("if (true) f(); g();", 0, 1));
+  EXPECT_EQ(
+      "if (a)\n  if (b)\n    if (c)\n      g();\nh();",
+      format("if(a)if(b)if(c)g();h();", 0, 1));
+  EXPECT_EQ(
+      "if (a)\n  if (b) {\n    f();\n  }\ng();",
+      format("if(a)if(b) {f();}g();", 0, 1));
+}
+
+TEST_F(FormatTest, ParseIfThenElse) {
+  EXPECT_EQ(
+      "if (true)\n"
+      "  if (true)\n"
+      "    if (true)\n"
+      "      f();\n"
+      "    else\n"
+      "      g();\n"
+      "  else\n"
+      "    h();\n"
+      "else\n"
+      "  i();",
+      format("if(true)\nif(true)\nif(true)\nf();\n"
+             "else\ng();\nelse\nh();\nelse\ni();", 0, 1));
+  EXPECT_EQ(
+      "if (true)\n"
+      "  if (true)\n"
+      "    if (true) {\n"
+      "      if (true)\n"
+      "        f();\n"
+      "    } else {\n"
+      "      g();\n"
+      "    }\n"
+      "  else\n"
+      "    h();\n"
+      "else {\n"
+      "  i();\n"
+      "}",
+      format("if(true)\nif(true)\nif(true){\nif(true)f();\n"
+             "}else{\ng();\n}\nelse\nh();\nelse{\ni();\n}", 0, 1));
+}
+
+TEST_F(FormatTest, UnderstandsSingleLineComments) {
+  EXPECT_EQ(
+      "// line 1\n// line 2\nvoid f() {\n}\n",
+      format("// line 1\n// line 2\nvoid f() {}\n", 0, 1));
+
+  EXPECT_EQ(
+      "void f() {\n  // Doesn't do anything\n}",
+      format("void f() {\n// Doesn't do anything\n}", 0, 1));
+}
+
+TEST_F(FormatTest, DoesNotBreakSemiAfterClassDecl) {
+  EXPECT_EQ(
+      "class A {\n};\n", format("class A{};\n", 0, 1));
+}
+
+TEST_F(FormatTest, UnderstandsPPKeywords) {
+  EXPECT_EQ(
+      "#include <a.h>\\\nest\nb\n", format("#include <a.h>\\\nest\nb\n", 0, 1));
+}
+
+} // end namespace tooling
+} // end namespace clang
Index: unittests/Format/Makefile
===================================================================
--- /dev/null
+++ unittests/Format/Makefile
@@ -0,0 +1,19 @@
+##===- unittests/Format/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+CLANG_LEVEL = ../..
+TESTNAME = Format
+include $(CLANG_LEVEL)/../../Makefile.config
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) asmparser support mc
+USEDLIBS = clangFormat.a clangTooling.a clangFrontend.a clangSerialization.a \
+           clangDriver.a clangParse.a clangRewriteCore.a
+           clangRewriteFrontend.a clangSema.a clangAnalysis.a clangEdit.a \
+           clangAST.a clangASTMatchers.a clangLex.a clangBasic.a
+
+include $(CLANG_LEVEL)/unittests/Makefile

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: [cfe-commits] [PATCH] Initial version of formatting library

Reply via email to