[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-11-06 Thread pre-merge checks [bot] via Phabricator via cfe-commits
merge_guards_bot added a comment.

Build result: fail - 59843 tests passed, 21 failed and 768 were skipped.

  failed: lld.ELF/linkerscript/filename-spec.s
  failed: Clang.Index/index-module-with-vfs.m
  failed: Clang.Modules/double-quotes.m
  failed: Clang.Modules/framework-public-includes-private.m
  failed: Clang.VFS/external-names.c
  failed: Clang.VFS/framework-import.m
  failed: Clang.VFS/implicit-include.c
  failed: Clang.VFS/include-mixed-real-and-virtual.c
  failed: Clang.VFS/include-real-from-virtual.c
  failed: Clang.VFS/include-virtual-from-real.c
  failed: Clang.VFS/include.c
  failed: Clang.VFS/incomplete-umbrella.m
  failed: Clang.VFS/module-import.m
  failed: Clang.VFS/module_missing_vfs.m
  failed: Clang.VFS/real-path-found-first.m
  failed: Clang.VFS/relative-path.c
  failed: Clang.VFS/test_nonmodular.c
  failed: Clang.VFS/umbrella-framework-import-skipnonexist.m
  failed: Clang.VFS/vfsroot-include.c
  failed: Clang.VFS/vfsroot-module.m
  failed: Clang.VFS/vfsroot-with-overlay.c

Log files: console-log.txt 
,
 CMakeCache.txt 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-11-06 Thread Ilya Biryukov via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG58fa50f43701: [Syntax] Add nodes for most common statements 
(authored by ilya-biryukov).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -41,8 +41,8 @@
 
   void HandleTranslationUnit(ASTContext ) override {
 Arena = std::make_unique(Ctx.getSourceManager(),
- Ctx.getLangOpts(),
- std::move(*Tokens).consume());
+Ctx.getLangOpts(),
+std::move(*Tokens).consume());
 Tokens = nullptr; // make sure we fail if this gets called twice.
 Root = syntax::buildSyntaxTree(*Arena, *Ctx.getTranslationUnitDecl());
   }
@@ -65,7 +65,7 @@
 auto Tokens =
 std::make_unique(CI.getPreprocessor());
 return std::make_unique(Root, Arena,
-  std::move(Tokens));
+ std::move(Tokens));
   }
 
 private:
@@ -136,18 +136,315 @@
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 `-TopLevelDeclaration
   |-void
   |-foo
   |-(
   |-)
   `-CompoundStatement
-|-2: {
-`-3: }
+|-{
+`-}
 )txt"},
-  };
+  // if.
+  {
+  R"cpp(
+int main() {
+  if (true) {}
+  if (true) {} else if (false) {}
+}
+)cpp",
+  R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-int
+  |-main
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-IfStatement
+| |-if
+| |-(
+| |-UnknownExpression
+| | `-true
+| |-)
+| `-CompoundStatement
+|   |-{
+|   `-}
+|-IfStatement
+| |-if
+| |-(
+| |-UnknownExpression
+| | `-true
+| |-)
+| |-CompoundStatement
+| | |-{
+| | `-}
+| |-else
+| `-IfStatement
+|   |-if
+|   |-(
+|   |-UnknownExpression
+|   | `-false
+|   |-)
+|   `-CompoundStatement
+| |-{
+| `-}
+`-}
+)txt"},
+  // for.
+  {R"cpp(
+void test() {
+  for (;;)  {}
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-ForStatement
+| |-for
+| |-(
+| |-;
+| |-;
+| |-)
+| `-CompoundStatement
+|   |-{
+|   `-}
+`-}
+)txt"},
+  // declaration statement.
+  {"void test() { int a = 10; }",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-DeclarationStatement
+| |-int
+| |-a
+| |-=
+| |-10
+| `-;
+`-}
+)txt"},
+  {"void test() { ; }", R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-EmptyStatement
+| `-;
+`-}
+)txt"},
+  // switch, case and default.
+  {R"cpp(
+void test() {
+  switch (true) {
+case 0:
+default:;
+  }
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-SwitchStatement
+| |-switch
+| |-(
+| |-UnknownExpression
+| | `-true
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-CaseStatement
+|   | |-case
+|   | |-UnknownExpression
+|   | | `-0
+|   | |-:
+|   | `-DefaultStatement
+|   |   |-default
+|   |   |-:
+|   |   `-EmptyStatement
+|   | `-;
+|   `-}
+`-}
+)txt"},
+  // while.
+  {R"cpp(
+void test() {
+  while (true) { continue; break; }
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-WhileStatement
+| |-while
+| |-(
+| |-UnknownExpression
+| | `-true
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ContinueStatement
+|   | |-continue
+|   | `-;
+|   |-BreakStatement
+|   | |-break
+|   | `-;
+|   `-}
+`-}
+)txt"},
+  // return.
+  {R"cpp(
+int test() { return 1; }
+  )cpp",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-int
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-ReturnStatement
+| |-return
+| |-UnknownExpression
+| | `-1
+| `-;
+`-}
+)txt"},
+  // Range-based for.

[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-11-06 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:193
+  syntax::Statement *thenStatement();
+  syntax::Leaf *elseKeyword();
+  syntax::Statement *elseStatement();

sammccall wrote:
> ilya-biryukov wrote:
> > sammccall wrote:
> > > I think throughout it's important to mark which of these are:
> > >  - nullable in correct code
> > >  - nullable in code generated by recovery
> > I would suggest to only mark the nodes that are nullable in the correct 
> > code. For recovery, I would assume the following rule (please tell me if 
> > I'm wrong):
> > 
> > On a construct whose parsing involved recovery:
> > - if the node has an introducing token (`if`, `try`, etc.), the 
> > corresponding child cannot be null.
> > - any other child can be null.
> Agree with this strategy, and the fact that it doesn't need to be documented 
> on every node/occurrence.
> 
> But it should definitely be documented somewhere at a high level! (With clang 
> AST, this sort of thing feels like tribal knowledge)
Added a corresponding comment to the file header.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:79
+  ExpressionStatement_expression,
+  CompoundStatement_statement
 };

sammccall wrote:
> As discussed offline, there's some options about how abstract/concrete these 
> roles should be.
> 
> e.g. for a list of function args, this could be 
> FunctionOpenParen/FunctionArgExpr/FunctionArgComma/FunctionCloseParam 
> (specific) <-> OpenParen/Arg/Comma/CloseParen <-> Open/Item/Separator/Close.
> 
> The more specific ones are somewhat redundant with the parent/child type (but 
> easy to assign systematically), and the more generic ones are more orthogonal 
> (but require more design and may by hard to always make consistent).
> 
> The concrete advantage of the generic roles is being able to write code like 
> `getTrailingSemicolon(Tree*)` or `findLoopBreak(Stmt*)` or 
> `removeListItem(Tree*, int)` in a fairly generic way, without resorting to 
> adding a `Loop` base class or handling each case with separate code.
> 
> This is up to you, though.
I definitely agree that writing generic functions is simpler with the proposed 
approach.

However, I am aiming for safer APIs here, albeit less generic. E.g. we'll have 
something like 
`removeFunctionArgument(ArgumentList*, int)` and 
`removeInitializer(InitializerList*, int)`
rather than `removeListItem(Tree*, int)` in the public API.

Reasons are discoverability of the operations for particular node types.

Generic functions might still make sense as an implementation detail to share 
the code.
I'll keep as is for now, but will keep the suggestion in mind.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:265
+  syntax::Leaf *returnKeyword();
+  syntax::Expression *value();
+};

sammccall wrote:
> nullable, marked somehow
> 
> Optional is tempting as a systematic and hard-to-ignore way of 
> documenting that.
> And it reflects the fact that there are three conceptual states for children: 
> present, legally missing, brokenly missing.
> 
> At the same time, I'm not sure how to feel about the fact that in practice 
> this can't be present but null, and the fact that *other* non-optional 
> pointers can be null.
Having `Optional` models the problem space better, but is much 
harder to use on the client side.
I'd keep as is, the file comment explains that one should assume all accessors 
can return null.

Update the comment here to indicate both `return;` and `return ;` are 
represented by this node.



Comment at: clang/lib/Tooling/Syntax/BuildTree.cpp:99
+  /// semicolon when needed.
   llvm::ArrayRef getRange(const Stmt *S) const {
+auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());

sammccall wrote:
> since Expr : Stmt, we need to be a bit wary of overloading based on static 
> type.
> 
> It's tempting to say it's correct here: if we statically know E is an Expr, 
> then maybe it's never correct to consume the semicolon. But is the converse 
> true? e.g. if we're traversing using RAV and call getRange() in visitstmt...
> 
> (The alternatives seem to be a) removing the expr version of the function, 
> and having the stmt version take a `bool ConsumeSemi` or b) change the stmt 
> version to have (dynamic) expr behave like the expr overload, and handle it 
> specially when forming exprstmt. More verbose, genuinely conflicted here)
Using two functions with different names now.



Comment at: clang/lib/Tooling/Syntax/BuildTree.cpp:270
+
+  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
+// We override to traverse range initializer as VarDecl.

sammccall wrote:
> maybe group with corresponding `WalkUpFromCXXForRangeStmt`?
> (Could also group all `Traverse*` together if you prefer. Current ordering 
> seems a little random)
Went for grouping `Traverse*` and 

[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-11-06 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov updated this revision to Diff 228023.
ilya-biryukov marked 20 inline comments as done.
ilya-biryukov added a comment.

- Group Traverse* and Walk* together
- s/RAT/RAV
- Add a comment about nullability of the accessors
- Name function for consuming statements and expressions differently


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -41,8 +41,8 @@
 
   void HandleTranslationUnit(ASTContext ) override {
 Arena = std::make_unique(Ctx.getSourceManager(),
- Ctx.getLangOpts(),
- std::move(*Tokens).consume());
+Ctx.getLangOpts(),
+std::move(*Tokens).consume());
 Tokens = nullptr; // make sure we fail if this gets called twice.
 Root = syntax::buildSyntaxTree(*Arena, *Ctx.getTranslationUnitDecl());
   }
@@ -65,7 +65,7 @@
 auto Tokens =
 std::make_unique(CI.getPreprocessor());
 return std::make_unique(Root, Arena,
-  std::move(Tokens));
+ std::move(Tokens));
   }
 
 private:
@@ -136,18 +136,315 @@
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 `-TopLevelDeclaration
   |-void
   |-foo
   |-(
   |-)
   `-CompoundStatement
-|-2: {
-`-3: }
+|-{
+`-}
 )txt"},
-  };
+  // if.
+  {
+  R"cpp(
+int main() {
+  if (true) {}
+  if (true) {} else if (false) {}
+}
+)cpp",
+  R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-int
+  |-main
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-IfStatement
+| |-if
+| |-(
+| |-UnknownExpression
+| | `-true
+| |-)
+| `-CompoundStatement
+|   |-{
+|   `-}
+|-IfStatement
+| |-if
+| |-(
+| |-UnknownExpression
+| | `-true
+| |-)
+| |-CompoundStatement
+| | |-{
+| | `-}
+| |-else
+| `-IfStatement
+|   |-if
+|   |-(
+|   |-UnknownExpression
+|   | `-false
+|   |-)
+|   `-CompoundStatement
+| |-{
+| `-}
+`-}
+)txt"},
+  // for.
+  {R"cpp(
+void test() {
+  for (;;)  {}
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-ForStatement
+| |-for
+| |-(
+| |-;
+| |-;
+| |-)
+| `-CompoundStatement
+|   |-{
+|   `-}
+`-}
+)txt"},
+  // declaration statement.
+  {"void test() { int a = 10; }",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-DeclarationStatement
+| |-int
+| |-a
+| |-=
+| |-10
+| `-;
+`-}
+)txt"},
+  {"void test() { ; }", R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-EmptyStatement
+| `-;
+`-}
+)txt"},
+  // switch, case and default.
+  {R"cpp(
+void test() {
+  switch (true) {
+case 0:
+default:;
+  }
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-SwitchStatement
+| |-switch
+| |-(
+| |-UnknownExpression
+| | `-true
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-CaseStatement
+|   | |-case
+|   | |-UnknownExpression
+|   | | `-0
+|   | |-:
+|   | `-DefaultStatement
+|   |   |-default
+|   |   |-:
+|   |   `-EmptyStatement
+|   | `-;
+|   `-}
+`-}
+)txt"},
+  // while.
+  {R"cpp(
+void test() {
+  while (true) { continue; break; }
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+|-WhileStatement
+| |-while
+| |-(
+| |-UnknownExpression
+| | `-true
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ContinueStatement
+|   | |-continue
+|   | `-;
+|   |-BreakStatement
+|   | |-break
+|   | `-;
+|   `-}
+`-}
+)txt"},
+  // return.
+  {R"cpp(
+int test() { return 1; }
+  )cpp",
+   R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-int
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+|-{
+

[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-08-05 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added inline comments.
This revision is now accepted and ready to land.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:99
 /// An abstract node for C++ statements, e.g. 'while', 'if', etc.
 class Statement : public Tree {
 public:

ilya-biryukov wrote:
> sammccall wrote:
> > Do you want to expose the statement-ending-semicolon here?
> > 
> > (Not all statements have it, but common enough you may want it in the base 
> > class instead of all children)
> Yes, only "leaf" (i.e. the ones not having any statement children) have it.
> I was thinking about:
>   - having a separate class for non-composite statements and providing an 
> accessor there,
>   - providing an accessor in each of the leaf statements (would mean some 
> duplication, but, arguably, better discoverability).
> 
> But, from an offline conversation, we seem to disagree that inheritance is a 
> proper way to model this.
> Would it be ok to do this in a follow-up? I'll add a FIXME for now.
First: yes, let's not do this now.

Second: I'm wary of using standard is-a inheritance to model more than 
alternation in the grammar. That is, ForStatement is-a Statement is fine, 
ForStatement is-a LoopyStatement is suspect. This is to do with the fact that 
LoopyStatement is-a Statement seems obvious, and we may end up with 
diamond-shaped inheritance and some conceptual confusion.

This goes for all traits that aren't natural tree-shaped inheritance: 
HasTrailingSemicolon, LoopyStatement, ...

I think there are two concerns here:
 - we want to be able to get the trailing-semicolon if it exists
 - we want to be able to check if the trailing-semicolon is *expected* 
including via its static type

One way to do this (not the only one...):

```
// generic helper, or callers could even write this directly
Optional trailingSemi(Tree *Node) {
  return firstElement(Node->Children(NodeRole::TrailingSemi));
}

// mixin for trailing semi support. Note: doesn't inherit Statement!
// maybe need/want some CRTP magic
class TrailingSemicolon {
  Optional trailingSemi() const { return trailingSemi((const Node*)this; }
}

// Gets the trailingSemi() accessor.
ExprStmt : public Statement, TrailingSemicolon { ... }

```



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:193
+  syntax::Statement *thenStatement();
+  syntax::Leaf *elseKeyword();
+  syntax::Statement *elseStatement();

ilya-biryukov wrote:
> sammccall wrote:
> > I think throughout it's important to mark which of these are:
> >  - nullable in correct code
> >  - nullable in code generated by recovery
> I would suggest to only mark the nodes that are nullable in the correct code. 
> For recovery, I would assume the following rule (please tell me if I'm wrong):
> 
> On a construct whose parsing involved recovery:
> - if the node has an introducing token (`if`, `try`, etc.), the corresponding 
> child cannot be null.
> - any other child can be null.
Agree with this strategy, and the fact that it doesn't need to be documented on 
every node/occurrence.

But it should definitely be documented somewhere at a high level! (With clang 
AST, this sort of thing feels like tribal knowledge)



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:24
 
 /// A kind of a syntax node, used for implementing casts.
 enum class NodeKind : uint16_t {

Can you add a comment here saying the ordering/blocks must correspond to the 
Node inheritance hierarchy? This is *kind of* common knowledge, but I think 
this is normally handled by tablegen.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:79
+  ExpressionStatement_expression,
+  CompoundStatement_statement
 };

As discussed offline, there's some options about how abstract/concrete these 
roles should be.

e.g. for a list of function args, this could be 
FunctionOpenParen/FunctionArgExpr/FunctionArgComma/FunctionCloseParam 
(specific) <-> OpenParen/Arg/Comma/CloseParen <-> Open/Item/Separator/Close.

The more specific ones are somewhat redundant with the parent/child type (but 
easy to assign systematically), and the more generic ones are more orthogonal 
(but require more design and may by hard to always make consistent).

The concrete advantage of the generic roles is being able to write code like 
`getTrailingSemicolon(Tree*)` or `findLoopBreak(Stmt*)` or 
`removeListItem(Tree*, int)` in a fairly generic way, without resorting to 
adding a `Loop` base class or handling each case with separate code.

This is up to you, though.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:265
+  syntax::Leaf *returnKeyword();
+  syntax::Expression *value();
+};

nullable, marked somehow

Optional is tempting as a systematic and hard-to-ignore way of 
documenting that.
And it reflects the fact that there are three conceptual states for children: 
present, 

[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-07-10 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added a comment.

This is ready for another round




Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:25
 /// A kind of a syntax node, used for implementing casts.
 enum class NodeKind : uint16_t {
   Leaf,

sammccall wrote:
> there are going to be many of these. I'd suggest either sorting them all, or 
> breaking them into blocks (e.g. statements vs declarations vs leaf/tu/etc) 
> and sorting those blocks.
I've added two blocks now - statements and expressions.
Did not sort, though, I find the semantic grouping (loop statements close to 
each other) more useful, but hard to keep consistent.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-07-10 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov updated this revision to Diff 208995.
ilya-biryukov added a comment.

- Mark groups of kinds for statements and expressions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -136,19 +136,326 @@
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 |-TopLevelDeclaration
 | |-void
 | |-foo
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 `-
 )txt"},
-  };
+  // if.
+  {
+  R"cpp(
+int main() {
+  if (true) {}
+  if (true) {} else if (false) {}
+}
+)cpp",
+  R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-int
+| |-main
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-IfStatement
+|   | |-if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   `-}
+|   |-IfStatement
+|   | |-if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | |-CompoundStatement
+|   | | |-{
+|   | | `-}
+|   | |-else
+|   | `-IfStatement
+|   |   |-if
+|   |   |-(
+|   |   |-UnknownExpression
+|   |   | `-false
+|   |   |-)
+|   |   `-CompoundStatement
+|   | |-{
+|   | `-}
+|   `-}
+`-
+)txt"},
+  // for.
+  {R"cpp(
+void test() {
+  for (;;)  {}
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ForStatement
+|   | |-for
+|   | |-(
+|   | |-;
+|   | |-;
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   `-}
+|   `-}
+`-
+)txt"},
+  // declaration statement.
+  {"void test() { int a = 10; }",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-=
+|   | |-10
+|   | `-;
+|   `-}
+`-
+)txt"},
+  {"void test() { ; }", R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-EmptyStatement
+|   | `-;
+|   `-}
+`-
+)txt"},
+  // switch, case and default.
+  {R"cpp(
+void test() {
+  switch (true) {
+case 0:
+default:;
+  }
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-SwitchStatement
+|   | |-switch
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   |-CaseStatement
+|   |   | |-case
+|   |   | |-UnknownExpression
+|   |   | | `-0
+|   |   | |-:
+|   |   | `-DefaultStatement
+|   |   |   |-default
+|   |   |   |-:
+|   |   |   `-EmptyStatement
+|   |   | `-;
+|   |   `-}
+|   `-}
+`-
+)txt"},
+  // while.
+  {R"cpp(
+void test() {
+  while (true) { continue; break; }
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-WhileStatement
+|   | |-while
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   |-ContinueStatement
+|   |   | |-continue
+|   |   | `-;
+|   |   |-BreakStatement
+|   |   | |-break
+|   |   | `-;
+|   |   `-}
+|   `-}
+`-
+)txt"},
+  // return.
+  {R"cpp(
+int test() { return 1; }
+  )cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-int
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ReturnStatement
+|   | |-return
+|   | |-UnknownExpression
+|   | | `-1
+|   | `-;
+|   `-}
+`-
+   )txt"},
+  // Range-based for.
+  {R"cpp(
+void test() {
+  int a[3];
+  for (int x : a) ;
+}
+  )cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-[
+|   | |-3
+|   | |-]
+|   | `-;
+|   |-RangeBasedForStatement
+|   | |-for
+|   | |-(
+|   | |-int
+|   | |-x
+|   | |-:
+|   | |-UnknownExpression
+|   | | `-a
+|   | |-)
+|   | `-EmptyStatement
+|   |   `-;
+|   `-}
+`-
+   )txt"},
+  // Unhandled statements should end up as 'unknown statement'.
+  // This example uses a 'label statement', which does not yet have a syntax
+  // counterpart.
+  {"void main() { foo: return 100; }", R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-main
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-UnknownStatement
+|   | |-foo
+|   | |-:
+|   | 

[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-07-10 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:185
+/// if (cond)  else 
+class IfStatement final : public Statement {
+public:

sammccall wrote:
> I guess the missing cond here (and similar below) are due to complexities 
> around the variable declaring variants?
> 
> Warrants a FIXME I think
Yes. Added a FIXME



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:265
+/// return ;
+class ReturnStatement final : public Statement {
+public:

sammccall wrote:
> (any reason we can't already have the expr here?)
Added a getter for it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-07-10 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov updated this revision to Diff 208937.
ilya-biryukov marked 5 inline comments as done.
ilya-biryukov added a comment.

- Rebase
- Address comments
- Restructure the roles
- Remove the role from tree dumps for now With too many roles it is annoying to 
update the test outputs on incremental changes. I tried using the symbolic role 
names there, but they end up being too verbose.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -136,19 +136,326 @@
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 |-TopLevelDeclaration
 | |-void
 | |-foo
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 `-
 )txt"},
-  };
+  // if.
+  {
+  R"cpp(
+int main() {
+  if (true) {}
+  if (true) {} else if (false) {}
+}
+)cpp",
+  R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-int
+| |-main
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-IfStatement
+|   | |-if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   `-}
+|   |-IfStatement
+|   | |-if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | |-CompoundStatement
+|   | | |-{
+|   | | `-}
+|   | |-else
+|   | `-IfStatement
+|   |   |-if
+|   |   |-(
+|   |   |-UnknownExpression
+|   |   | `-false
+|   |   |-)
+|   |   `-CompoundStatement
+|   | |-{
+|   | `-}
+|   `-}
+`-
+)txt"},
+  // for.
+  {R"cpp(
+void test() {
+  for (;;)  {}
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ForStatement
+|   | |-for
+|   | |-(
+|   | |-;
+|   | |-;
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   `-}
+|   `-}
+`-
+)txt"},
+  // declaration statement.
+  {"void test() { int a = 10; }",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-=
+|   | |-10
+|   | `-;
+|   `-}
+`-
+)txt"},
+  {"void test() { ; }", R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-EmptyStatement
+|   | `-;
+|   `-}
+`-
+)txt"},
+  // switch, case and default.
+  {R"cpp(
+void test() {
+  switch (true) {
+case 0:
+default:;
+  }
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-SwitchStatement
+|   | |-switch
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   |-CaseStatement
+|   |   | |-case
+|   |   | |-UnknownExpression
+|   |   | | `-0
+|   |   | |-:
+|   |   | `-DefaultStatement
+|   |   |   |-default
+|   |   |   |-:
+|   |   |   `-EmptyStatement
+|   |   | `-;
+|   |   `-}
+|   `-}
+`-
+)txt"},
+  // while.
+  {R"cpp(
+void test() {
+  while (true) { continue; break; }
+}
+)cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-WhileStatement
+|   | |-while
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   |-ContinueStatement
+|   |   | |-continue
+|   |   | `-;
+|   |   |-BreakStatement
+|   |   | |-break
+|   |   | `-;
+|   |   `-}
+|   `-}
+`-
+)txt"},
+  // return.
+  {R"cpp(
+int test() { return 1; }
+  )cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-int
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ReturnStatement
+|   | |-return
+|   | |-UnknownExpression
+|   | | `-1
+|   | `-;
+|   `-}
+`-
+   )txt"},
+  // Range-based for.
+  {R"cpp(
+void test() {
+  int a[3];
+  for (int x : a) ;
+}
+  )cpp",
+   R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-[
+|   | |-3
+|   | |-]
+|   | `-;
+|   |-RangeBasedForStatement
+|   | |-for
+|   | |-(
+|   | |-int
+|   | |-x
+|   | |-:
+|   | |-UnknownExpression
+|   | | `-a
+|   | |-)
+|   | `-EmptyStatement
+|   |   `-;
+|   `-}
+`-
+   )txt"},
+  // Unhandled statements should end up as 'unknown statement'.
+  // This example uses a 'label statement', which does not yet have a 

[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-07-09 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov marked 2 inline comments as done.
ilya-biryukov added a comment.

Submitting a few comments to start up the discussions.

The actual changes will follow.




Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:99
 /// An abstract node for C++ statements, e.g. 'while', 'if', etc.
 class Statement : public Tree {
 public:

sammccall wrote:
> Do you want to expose the statement-ending-semicolon here?
> 
> (Not all statements have it, but common enough you may want it in the base 
> class instead of all children)
Yes, only "leaf" (i.e. the ones not having any statement children) have it.
I was thinking about:
  - having a separate class for non-composite statements and providing an 
accessor there,
  - providing an accessor in each of the leaf statements (would mean some 
duplication, but, arguably, better discoverability).

But, from an offline conversation, we seem to disagree that inheritance is a 
proper way to model this.
Would it be ok to do this in a follow-up? I'll add a FIXME for now.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:160
+  syntax::Leaf *caseKeyword();
+  syntax::Statement *body();
+

sammccall wrote:
> syntactically, is it useful to model the body as a single statement? It's not 
> a CompoundStmt as it has no braces. Seems like a sequence...
> 
> Or is the idea that the first following statement is the body (might be 
> nothing), and subsequent ones aren't part of the body? Why is this more 
> useful than making the body a sibling?
This models the structure of the C++ grammar (and clang AST).
Getting from a switch statements to all its `case` and `default` labels seems 
useful, but should be addressed by a separate API that traverses the 
corresponding syntax tree nodes.

Marking as done, from an offline conversation we seem to agree here.
Feel free to reopen if needed.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:176
+  syntax::Leaf *defaultKeyword();
+  syntax::Statement *body();
+

sammccall wrote:
> might be handy to unify this with CaseStatement somehow (a base class, or 
> make it literally a CaseStatement with a null body and a bool isDefaultCase() 
> that looks at the keyword token)
> 
> Mostly thinking about code that's going to iterate over case statements.
I would model with with a base class, but let's agree whether that's the right 
way to approach this.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:193
+  syntax::Statement *thenStatement();
+  syntax::Leaf *elseKeyword();
+  syntax::Statement *elseStatement();

sammccall wrote:
> I think throughout it's important to mark which of these are:
>  - nullable in correct code
>  - nullable in code generated by recovery
I would suggest to only mark the nodes that are nullable in the correct code. 
For recovery, I would assume the following rule (please tell me if I'm wrong):

On a construct whose parsing involved recovery:
- if the node has an introducing token (`if`, `try`, etc.), the corresponding 
child cannot be null.
- any other child can be null.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-07-08 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:25
 /// A kind of a syntax node, used for implementing casts.
 enum class NodeKind : uint16_t {
   Leaf,

there are going to be many of these. I'd suggest either sorting them all, or 
breaking them into blocks (e.g. statements vs declarations vs leaf/tu/etc) and 
sorting those blocks.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:99
 /// An abstract node for C++ statements, e.g. 'while', 'if', etc.
 class Statement : public Tree {
 public:

Do you want to expose the statement-ending-semicolon here?

(Not all statements have it, but common enough you may want it in the base 
class instead of all children)



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:144
+  syntax::Leaf *switchKeyword();
+  syntax::Statement *body();
+

The fact that this can be an arbitrary statement is kind of shocking. But 
apparently true!

In the long run, we're probably going to be able to find the case statements 
somehow, even though they're not part of the immediate grammar. Not sure 
whether this should be via the regular AST or by adding links here. Anyway, 
problem for another day.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:159
+  }
+  syntax::Leaf *caseKeyword();
+  syntax::Statement *body();

expression for the value?



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:160
+  syntax::Leaf *caseKeyword();
+  syntax::Statement *body();
+

syntactically, is it useful to model the body as a single statement? It's not a 
CompoundStmt as it has no braces. Seems like a sequence...

Or is the idea that the first following statement is the body (might be 
nothing), and subsequent ones aren't part of the body? Why is this more useful 
than making the body a sibling?



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:176
+  syntax::Leaf *defaultKeyword();
+  syntax::Statement *body();
+

might be handy to unify this with CaseStatement somehow (a base class, or make 
it literally a CaseStatement with a null body and a bool isDefaultCase() that 
looks at the keyword token)

Mostly thinking about code that's going to iterate over case statements.



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:185
+/// if (cond)  else 
+class IfStatement final : public Statement {
+public:

I guess the missing cond here (and similar below) are due to complexities 
around the variable declaring variants?

Warrants a FIXME I think



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:193
+  syntax::Statement *thenStatement();
+  syntax::Leaf *elseKeyword();
+  syntax::Statement *elseStatement();

I think throughout it's important to mark which of these are:
 - nullable in correct code
 - nullable in code generated by recovery



Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:265
+/// return ;
+class ReturnStatement final : public Statement {
+public:

(any reason we can't already have the expr here?)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-06-26 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added a comment.

This change mostly aims to illustrate that `TreeBuilder` seems to be powerful 
enough to go beyond basic nodes.
But it also introduces enough nodes to make the syntax trees minimally useful 
for traversing statement nodes. Hopefully that could become a good basis to 
define other APIs (mutations, etc).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63835: [Syntax] Add nodes for most common statements

2019-06-26 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov created this revision.
ilya-biryukov added a reviewer: sammccall.
Herald added a project: clang.
ilya-biryukov added a parent revision: D61637: [Syntax] Introduce syntax trees.

Most of the statements mirror the ones provided by clang AST.
Major differences are:

- expressions are wrapped into 'ExpressionStatement' instead of being a 
subclass of statement,
- semicolons are always consumed by the leaf expressions (return, expression 
satement, etc),
- some clang statements are not handled yet, we wrap those into an 
UnknownStatement class, which is not present in clang.

We also define an 'Expression' and 'UnknownExpression' classes in order
to produce 'ExpressionStatement' where needed. The actual implementation
of expressions is not yet ready, it will follow later.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D63835

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -137,7 +137,7 @@
 | |-)
 | `-CompoundStatement
 |   |-1: {
-|   `-2: }
+|   `-3: }
 |-TopLevelDeclaration
 | |-void
 | |-foo
@@ -145,10 +145,317 @@
 | |-)
 | `-CompoundStatement
 |   |-1: {
-|   `-2: }
+|   `-3: }
 `-1: 
 )txt"},
-  };
+  // if.
+  {
+  R"cpp(
+int main() {
+  if (true) {}
+  if (true) {} else if (false) {}
+}
+)cpp",
+  R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-int
+| |-main
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: IfStatement
+|   | |-1: if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-2: CompoundStatement
+|   |   |-1: {
+|   |   `-3: }
+|   |-2: IfStatement
+|   | |-1: if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | |-2: CompoundStatement
+|   | | |-1: {
+|   | | `-3: }
+|   | |-3: else
+|   | `-4: IfStatement
+|   |   |-1: if
+|   |   |-(
+|   |   |-UnknownExpression
+|   |   | `-false
+|   |   |-)
+|   |   `-2: CompoundStatement
+|   | |-1: {
+|   | `-3: }
+|   `-3: }
+`-1: 
+)txt"},
+  // for.
+  {R"cpp(
+void test() {
+  for (;;)  {}
+}
+)cpp",
+   R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: ForStatement
+|   | |-1: for
+|   | |-(
+|   | |-;
+|   | |-;
+|   | |-)
+|   | `-2: CompoundStatement
+|   |   |-1: {
+|   |   `-3: }
+|   `-3: }
+`-1: 
+)txt"},
+  // declaration statement.
+  {"void test() { int a = 10; }",
+   R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-=
+|   | |-10
+|   | `-;
+|   `-3: }
+`-1: 
+)txt"},
+  {"void test() { ; }", R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: EmptyStatement
+|   | `-;
+|   `-3: }
+`-1: 
+)txt"},
+  // switch, case and default.
+  {R"cpp(
+void test() {
+  switch (true) {
+case 0:
+default:;
+  }
+}
+)cpp",
+   R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: SwitchStatement
+|   | |-1: switch
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-2: CompoundStatement
+|   |   |-1: {
+|   |   |-2: CaseStatement
+|   |   | |-1: case
+|   |   | |-UnknownExpression
+|   |   | | `-0
+|   |   | |-:
+|   |   | `-2: DefaultStatement
+|   |   |   |-1: default
+|   |   |   |-:
+|   |   |   `-2: EmptyStatement
+|   |   | `-;
+|   |   `-3: }
+|   `-3: }
+`-1: 
+)txt"},
+  // while.
+  {R"cpp(
+void test() {
+  while (true) { continue; break; }
+}
+)cpp",
+   R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: WhileStatement
+|   | |-1: while
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-2: CompoundStatement
+|   |   |-1: {
+|   |   |-2: ContinueStatement
+|   |   | |-1: continue
+|   |   | `-;
+|   |   |-2: BreakStatement
+|   |   | |-1: break
+|   |   | `-;
+|   |   `-3: }
+|   `-3: }
+`-1: 
+)txt"},
+  // return.
+  {R"cpp(
+int test() { return 1; }
+  )cpp",
+   R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-int
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: ReturnStatement
+|   | |-1: return
+|   | |-UnknownExpression
+|   | | `-1
+|   | `-;
+|   `-3: }
+`-1: 
+   )txt"},
+  // Range-based for.
+  {R"cpp(
+void test() {
+  int a[3];
+  for (int x : a) ;
+}
+  )cpp",
+