Hi klimek,
This formatting library will be used by a stand-alone clang-format tool and can
also be used when writing other refactorings.
Manuel's original design document:
https://docs.google.com/a/google.com/document/d/1gpckL2U_6QuU9YW2L1ABsc4Fcogn5UngKk7fE5dDOoA/edit
This is still far away from being finished or useful to format real code. But I
think, it should be checked in to get as much feedback as possible and
collaborate on certain parts.
http://llvm-reviews.chandlerc.com/D80
Files:
include/clang/Format/Format.h
lib/CMakeLists.txt
lib/Format/CMakeLists.txt
lib/Format/Format.cpp
lib/Format/Makefile
lib/Makefile
unittests/CMakeLists.txt
unittests/Format/CMakeLists.txt
unittests/Format/FormatTest.cpp
unittests/Format/Makefile
Index: include/clang/Format/Format.h
===================================================================
--- /dev/null
+++ include/clang/Format/Format.h
@@ -0,0 +1,46 @@
+//===--- Format.h - Format C++ code -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
+// where it can be used to format real code.
+//
+// Various functions to configurably format source code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_FORMAT_H_
+#define LLVM_CLANG_FORMAT_FORMAT_H
+
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Tooling/Refactoring.h"
+
+namespace clang {
+
+class Lexer;
+class SourceManager;
+
+namespace format {
+
+/// \brief A character range of source code.
+struct CodeRange {
+ CodeRange(unsigned Offset, unsigned Length)
+ : Offset(Offset), Length(Length) {}
+
+ unsigned Offset;
+ unsigned Length;
+};
+
+/// \brief Reformats the given Ranges in the token stream coming out of \c Lex.
+tooling::Replacements reformat(Lexer &Lex, SourceManager &Sources,
+ std::vector<CodeRange> Ranges);
+
+} // end namespace format
+} // end namespace clang
+
+#endif // LLVM_CLANG_FORMAT_FORMAT_H
Index: lib/CMakeLists.txt
===================================================================
--- lib/CMakeLists.txt
+++ lib/CMakeLists.txt
@@ -16,3 +16,4 @@
add_subdirectory(FrontendTool)
add_subdirectory(Tooling)
add_subdirectory(StaticAnalyzer)
+add_subdirectory(Format)
Index: lib/Format/CMakeLists.txt
===================================================================
--- /dev/null
+++ lib/Format/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_clang_library(clangFormat
+ Format.cpp
+ )
+
+add_dependencies(clangFormat
+ ClangAttrClasses
+ ClangAttrList
+ ClangDeclNodes
+ ClangDiagnosticCommon
+ ClangDiagnosticFrontend
+ ClangStmtNodes
+ )
+
+target_link_libraries(clangFormat
+ clangBasic
+ clangFrontend
+ clangAST
+ clangASTMatchers
+ clangRewriteCore
+ clangRewriteFrontend
+ )
Index: lib/Format/Format.cpp
===================================================================
--- /dev/null
+++ lib/Format/Format.cpp
@@ -0,0 +1,305 @@
+//===--- Format.cpp - Format C++ code -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
+// where it can be used to format real code.
+//
+// Implements Format.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Format/Format.h"
+
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+
+namespace clang {
+namespace format {
+
+// An unbreakable unit of tokens.
+// All characters between Dings will be up for reformatting.
+struct UnbreakableEntity {
+ UnbreakableEntity() : Length(0) {}
+ unsigned Length;
+ std::vector<Token> Tokens;
+ unsigned NewlinesBefore;
+
+ unsigned WhiteSpaceLength;
+ SourceLocation WhiteSpaceStart;
+};
+
+struct FormatToken {
+ Token Tok;
+
+ unsigned NewlinesBefore;
+ unsigned WhiteSpaceLength;
+ SourceLocation WhiteSpaceStart;
+};
+
+class Formatter {
+public:
+ Formatter(Lexer &Lex, SourceManager &Sources,
+ const std::vector<CodeRange> &Ranges)
+ : Lex(Lex), Sources(Sources), EndOfFile(false) {}
+
+ tooling::Replacements format() {
+ Lex.SetKeepWhitespaceMode(true);
+
+ FormatToken NextToken;
+ NextToken.WhiteSpaceLength = 0;
+
+ // Read token stream and turn tokens into FormatTokens.
+ while (!EndOfFile) {
+ NextToken.Tok = getNextToken();
+ StringRef Data(Sources.getCharacterData(NextToken.Tok.getLocation()),
+ NextToken.Tok.getLength());
+ if (NextToken.WhiteSpaceLength == 0) {
+ NextToken.WhiteSpaceStart = NextToken.Tok.getLocation();
+ NextToken.NewlinesBefore = 0;
+ }
+ if (NextToken.Tok.getKind() == tok::unknown) {
+ StringRef Data(Sources.getCharacterData(NextToken.Tok.getLocation()),
+ NextToken.Tok.getLength());
+ if (std::find(Data.begin(), Data.end(), '\n') != Data.end())
+ ++NextToken.NewlinesBefore;
+ NextToken.WhiteSpaceLength += NextToken.Tok.getLength();
+ continue;
+ }
+ Tokens.push_back(NextToken);
+ NextToken.WhiteSpaceLength = 0;
+ }
+
+ splitAndFormatContinuations();
+
+ return Replaces;
+ }
+
+private:
+ /// \brief Split token stream into continuations, i.e. something that we'd
+ /// on a single line if we didn't have a column limit.
+ void splitAndFormatContinuations() {
+ unsigned Level = 0;
+ unsigned ParenLevel = 0;
+ unsigned ContinuationStart = 0;
+ std::vector<bool> IsCompound;
+ IsCompound.push_back(true);
+ for (unsigned i = 0; i < Tokens.size(); i++) {
+ if (Tokens[i].Tok.getKind() == tok::l_paren) {
+ ++ParenLevel;
+ } else if (Tokens[i].Tok.getKind() == tok::r_paren) {
+ --ParenLevel;
+ } else if (ParenLevel == 0) {
+ if (Tokens[i].Tok.getKind() == tok::l_brace ||
+ Tokens[i].Tok.getKind() == tok::r_brace ||
+ Tokens[i].Tok.getKind() == tok::semi) {
+ if (Tokens[i].Tok.getKind() == tok::r_brace) {
+ --Level;
+ IsCompound.pop_back();
+ addNewline(ContinuationStart, Level);
+ formatContinuation(ContinuationStart, i, Level);
+
+ while (!IsCompound.back()) {
+ --Level;
+ IsCompound.pop_back();
+ }
+ } else {
+ addNewline(ContinuationStart, Level);
+ formatContinuation(ContinuationStart, i, Level);
+ }
+
+ while (Tokens[i].Tok.getKind() == tok::semi && !IsCompound.back()) {
+ --Level;
+ IsCompound.pop_back();
+ }
+
+ if (Tokens[i].Tok.getKind() == tok::l_brace) {
+ ++Level;
+ IsCompound.push_back(true);
+ }
+
+ ContinuationStart = i + 1;
+ }
+
+ else if (i != ContinuationStart) {
+ if (isIfForOrWhile(Tokens[ContinuationStart].Tok)) {
+ addNewline(ContinuationStart, Level);
+ formatContinuation(ContinuationStart, i - 1, Level);
+ ++Level;
+ IsCompound.push_back(false);
+ ContinuationStart = i;
+ }
+ }
+ }
+ }
+ }
+
+ // The current state when indenting a continuation.
+ struct IndentState {
+ unsigned ParenLevel;
+ unsigned Column;
+ std::vector<unsigned> Indent;
+ std::vector<unsigned> UsedIndent;
+ };
+
+ // Append the token at 'Index' to the IndentState 'State'.
+ void addToken(unsigned Index, bool Newline, bool DryRun, IndentState &State) {
+ if (Tokens[Index].Tok.getKind() == tok::l_paren) {
+ State.UsedIndent.push_back(State.UsedIndent.back());
+ State.Indent.push_back(State.UsedIndent.back() + 4);
+ ++State.ParenLevel;
+ }
+ if (Newline) {
+ if (!DryRun)
+ setWhitespace(Tokens[Index], 1, State.Indent[State.ParenLevel]);
+ State.Column = State.Indent[State.ParenLevel] +
+ Tokens[Index].Tok.getLength();
+ State.UsedIndent[State.ParenLevel] = State.Indent[State.ParenLevel];
+ } else {
+ bool Space = spaceRequiredBetween(Tokens[Index - 1].Tok,
+ Tokens[Index].Tok);
+ if (!DryRun)
+ setWhitespace(Tokens[Index], 0, Space ? 1 : 0);
+ if (Tokens[Index - 1].Tok.getKind() == tok::l_paren)
+ State.Indent[State.ParenLevel] = State.Column;
+ State.Column += Tokens[Index].Tok.getLength() + (Space ? 1 : 0);
+ }
+
+ if (Tokens[Index].Tok.getKind() == tok::r_paren) {
+ --State.ParenLevel;
+ State.Indent.pop_back();
+ }
+ }
+
+ bool canBreakAfter(Token tok) {
+ return tok.getKind() == tok::comma || tok.getKind() == tok::semi ||
+ tok.getKind() == tok::l_paren;
+ }
+
+ // Calculate the number of lines needed to format the remaining part of the
+ // continuation starting in the state 'State'. If 'NewLine' is set, a new line
+ // will be added after the previous token.
+ // 'EndIndex' is the last token belonging to the continuation.
+ // 'StopAt' is used for optimization. If we can determine that we'll
+ // definitely need more than 'StopAt' additional lines, we already know of a
+ // better solution.
+ int numLines(IndentState State, bool NewLine, unsigned Index,
+ unsigned EndIndex, int StopAt) {
+ count++;
+
+ // We are at the end of the continuation, so we don't need any more lines.
+ if (Index > EndIndex)
+ return 0;
+
+ addToken(Index - 1, NewLine, true, State);
+ if (NewLine)
+ --StopAt;
+
+ // Exceeding 80 columns is bad.
+ if (State.Column > 80)
+ return 10000;
+
+ if (StopAt < 1)
+ return 10000;
+
+ int NoBreak = numLines(State, false, Index + 1, EndIndex, StopAt);
+ if (!canBreakAfter(Tokens[Index - 1].Tok))
+ return NoBreak + (NewLine ? 1 : 0);
+ int Break = numLines(State, true, Index + 1, EndIndex,
+ std::min(StopAt, NoBreak));
+ return std::min(NoBreak, Break) + (NewLine ? 1 : 0);
+ }
+
+ void formatContinuation(unsigned StartIndex, unsigned EndIndex,
+ unsigned Level) {
+ count = 0;
+ IndentState State;
+ State.ParenLevel = 0;
+ State.Column = Level * 2 + Tokens[StartIndex].Tok.getLength();
+ State.UsedIndent.push_back(Level * 2);
+ State.Indent.push_back(Level * 2 + 4);
+ for (unsigned i = StartIndex + 1; i <= EndIndex; ++i) {
+ bool InsertNewLine = Tokens[i].NewlinesBefore > 0;
+ if (!InsertNewLine) {
+ int NoBreak = numLines(State, false, i + 1, EndIndex, 100000);
+ int Break = numLines(State, true, i + 1, EndIndex, 100000);
+ InsertNewLine = Break < NoBreak;
+ }
+ addToken(i, InsertNewLine, false, State);
+ }
+ llvm::outs() << "Tried combinations: " << count << "\n";
+ }
+
+ void setWhitespace(const FormatToken& Tok, unsigned NewLines,
+ unsigned Spaces) {
+ Replaces.insert(tooling::Replacement(Sources, Tok.WhiteSpaceStart,
+ Tok.WhiteSpaceLength,
+ std::string(NewLines, '\n') +
+ std::string(Spaces, ' ')));
+ }
+
+ bool isIfForOrWhile(Token Tok) {
+ if (Tok.getKind() != tok::raw_identifier)
+ return false;
+ StringRef Data(Sources.getCharacterData(Tok.getLocation()),
+ Tok.getLength());
+ return Data == "for" || Data == "while" || Data == "if";
+ }
+
+ bool spaceRequiredBetween(Token Left, Token Right) {
+ if (Left.is(tok::period) || Right.is(tok::period))
+ return false;
+ if (Left.is(tok::colon) || Right.is(tok::colon))
+ return false;
+ if (Left.is(tok::plusplus) && Right.is(tok::raw_identifier))
+ return false;
+ if (Left.is(tok::l_paren))
+ return false;
+ if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
+ return false;
+ if (Right.is(tok::l_paren)) {
+ return isIfForOrWhile(Left);
+ }
+ return true;
+ }
+
+ Token getNextToken() {
+ Token tok;
+ EndOfFile = Lex.LexFromRawLexer(tok);
+ return tok;
+ }
+
+ /// \brief Add a new line before token \c Index.
+ void addNewline(unsigned Index, unsigned Level) {
+ if (Tokens[Index].WhiteSpaceStart.isValid()) {
+ unsigned Newlines = Tokens[Index].NewlinesBefore;
+ if (Newlines == 0 && Index != 0)
+ Newlines = 1;
+ setWhitespace(Tokens[Index], Newlines, Level * 2);
+ }
+ }
+
+ Lexer &Lex;
+ SourceManager &Sources;
+ bool EndOfFile;
+ tooling::Replacements Replaces;
+ std::vector<UnbreakableEntity> Entities;
+ std::vector<FormatToken> Tokens;
+
+ // Count number of tried states visited when formatting a continuation.
+ unsigned int count;
+};
+
+tooling::Replacements reformat(Lexer &Lex, SourceManager &Sources,
+ std::vector<CodeRange> Ranges) {
+ Formatter formatter(Lex, Sources, Ranges);
+ return formatter.format();
+}
+
+} // namespace format
+} // namespace clang
Index: lib/Format/Makefile
===================================================================
--- /dev/null
+++ lib/Format/Makefile
@@ -0,0 +1,13 @@
+##===- clang/lib/Tooling/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+CLANG_LEVEL := ../..
+LIBRARYNAME := clangTooling
+
+include $(CLANG_LEVEL)/Makefile
Index: lib/Makefile
===================================================================
--- lib/Makefile
+++ lib/Makefile
@@ -10,7 +10,7 @@
PARALLEL_DIRS = Headers Basic Lex Parse AST ASTMatchers Sema CodeGen Analysis \
StaticAnalyzer Edit Rewrite ARCMigrate Serialization Frontend \
- FrontendTool Tooling Driver
+ FrontendTool Tooling Driver Format
include $(CLANG_LEVEL)/Makefile
Index: unittests/CMakeLists.txt
===================================================================
--- unittests/CMakeLists.txt
+++ unittests/CMakeLists.txt
@@ -15,3 +15,4 @@
add_subdirectory(Lex)
add_subdirectory(Frontend)
add_subdirectory(Tooling)
+add_subdirectory(Format)
Index: unittests/Format/CMakeLists.txt
===================================================================
--- /dev/null
+++ unittests/Format/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(LLVM_LINK_COMPONENTS
+ ${LLVM_TARGETS_TO_BUILD}
+ asmparser
+ support
+ mc
+ )
+
+add_clang_unittest(FormatTests
+ FormatTest.cpp
+ )
+
+target_link_libraries(FormatTests
+ clangAST
+ clangFormat
+ clangTooling
+ clangRewriteCore
+ )
Index: unittests/Format/FormatTest.cpp
===================================================================
--- /dev/null
+++ unittests/Format/FormatTest.cpp
@@ -0,0 +1,110 @@
+//===- unittest/Format/FormatTest.cpp - Formatting unit tests -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../Tooling/RewriterTestContext.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Format/Format.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+
+class FormatTest : public ::testing::Test {
+protected:
+ std::string format(llvm::StringRef Code, unsigned offset, unsigned length) {
+ RewriterTestContext Context;
+ FileID ID = Context.createInMemoryFile("input.cc", Code);
+ std::vector<CodeRange> Ranges(1, CodeRange(offset, length));
+ Lexer Lex(ID, Context.Sources.getBuffer(ID), Context.Sources,
+ LangOptions());
+ tooling::Replacements Replace = reformat(Lex, Context.Sources, Ranges);
+ EXPECT_TRUE(applyAllReplacements(Replace, Context.Rewrite));
+ llvm::outs() << Context.getRewrittenText(ID) << "\n";
+ return Context.getRewrittenText(ID);
+ }
+};
+
+TEST_F(FormatTest, DoesNotChangeCorrectlyFormatedCode) {
+ EXPECT_EQ(";", format(";", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsGlobalStatementsAt0) {
+ EXPECT_EQ("int i;", format(" int i;", 0, 1));
+ EXPECT_EQ("\nint i;", format(" \n\t \r int i;", 0, 1));
+ EXPECT_EQ("int i;\nint j;", format(" int i; int j;", 0, 1));
+ EXPECT_EQ("int i;\nint j;", format(" int i;\n int j;", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsContinuationsAtFirstFormat) {
+ EXPECT_EQ("int\n i;", format("int\ni;", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsNestedBlockStatements) {
+ EXPECT_EQ("{\n {\n {\n }\n }\n}", format("{{{}}}", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsForLoop) {
+ EXPECT_EQ("for (int i = 0; i < 10; ++i);",
+ format("for(int i=0;i<10;++i);", 0 , 1));
+ EXPECT_EQ("for (int i = 0;\n i < 10;\n ++i);",
+ format("for(int i=0;\ni<10;\n++i);", 0 , 1));
+}
+
+TEST_F(FormatTest, FormatsWhileLoop) {
+ EXPECT_EQ("while (true) {\n}", format("while(true){}", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsNestedCall) {
+ EXPECT_EQ("Method(1,\n"
+ " 2(\n"
+ " 3));",
+ format("Method(1,\n2(\n3));", 0, 1));
+ EXPECT_EQ("Method(1(2,\n"
+ " 3()));", format("Method(1(2,\n3()));", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsAwesomeMethodCall) {
+ EXPECT_EQ(
+ "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod(\n"
+ " parameter, parameter, parameter)), SecondLongCall(some_parameter));",
+ format(
+ "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod(\n"
+ "parameter , parameter, parameter)), SecondLongCall("
+ "some_parameter) );", 0, 1));
+ EXPECT_EQ(
+ "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod(\n"
+ " parameter, parameter, parameter)), SecondLongCall(some_parameter));",
+ format(
+ "SomeLongMethodName(SomeReallyLongMethod(CallOtherReallyLongMethod("
+ "parameter,parameter,parameter)),SecondLongCall("
+ "some_parameter) );", 0, 1));
+}
+
+TEST_F(FormatTest, FormatsFunctionDefinition) {
+ EXPECT_EQ(
+ "void f(int a, int b, int c, int d, int e, int f, int g,"
+ " int h, int j, int f,\n int c, int ddddddddddddd) {\n}",
+ format("void f(int a, int b, int c, int d, int e, int f, int g,"
+ "int h, int j, int f, int c, int ddddddddddddd) {}", 0, 1));
+}
+
+TEST_F(FormatTest, FormatIfWithoutCompountStatement) {
+ EXPECT_EQ(
+ "if (true)\n f();\ng();",
+ format("if (true) f(); g();", 0, 1));
+ EXPECT_EQ(
+ "if (a)\n if (b)\n if (c)\n g();\nh();",
+ format("if(a)if(b)if(c)g();h();", 0, 1));
+ EXPECT_EQ(
+ "if (a)\n if (b) {\n f();\n }\ng();",
+ format("if(a)if(b) {f();}g();", 0, 1));
+}
+
+} // end namespace tooling
+} // end namespace clang
Index: unittests/Format/Makefile
===================================================================
--- /dev/null
+++ unittests/Format/Makefile
@@ -0,0 +1,19 @@
+##===- unittests/Format/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+CLANG_LEVEL = ../..
+TESTNAME = Format
+include $(CLANG_LEVEL)/../../Makefile.config
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) asmparser support mc
+USEDLIBS = clangFormat.a clangTooling.a clangFrontend.a clangSerialization.a \
+ clangDriver.a clangParse.a clangRewriteCore.a
+ clangRewriteFrontend.a clangSema.a clangAnalysis.a clangEdit.a \
+ clangAST.a clangASTMatchers.a clangLex.a clangBasic.a
+
+include $(CLANG_LEVEL)/unittests/Makefile
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits