Re: [PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-28 Thread Sam McCall via cfe-commits
Ah, thanks a lot, sorry for missing that!

On Wed, Oct 28, 2020 at 4:41 PM Simon Pilgrim via Phabricator <
revi...@reviews.llvm.org> wrote:

> RKSimon added a comment.
>
> @sammccall I've pushed rGc0053c62d9a0 <
> https://reviews.llvm.org/rGc0053c62d9a0b798b42686499de9bb2e7391b111> to
> fix MSVC builds which were struggling to find a default constructor for
> ConstChildIterator
>
> http://lab.llvm.org:8011/#/builders/83
>
>
> Repository:
>   rG LLVM Github Monorepo
>
> CHANGES SINCE LAST ACTION
>   https://reviews.llvm.org/D90023/new/
>
> https://reviews.llvm.org/D90023
>
>
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-28 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@sammccall I've pushed rGc0053c62d9a0 
 to fix 
MSVC builds which were struggling to find a default constructor for 
ConstChildIterator

http://lab.llvm.org:8011/#/builders/83


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90023/new/

https://reviews.llvm.org/D90023

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-28 Thread Sam McCall via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd4934eb5f876: [Syntax] Add iterators over children of syntax 
trees. (authored by sammccall).

Changed prior to commit:
  https://reviews.llvm.org/D90023?vs=300691&id=301239#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90023/new/

https://reviews.llvm.org/D90023

Files:
  clang/include/clang/Tooling/Syntax/Tree.h
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp
  clang/unittests/Tooling/Syntax/TreeTestBase.h

Index: clang/unittests/Tooling/Syntax/TreeTestBase.h
===
--- clang/unittests/Tooling/Syntax/TreeTestBase.h
+++ clang/unittests/Tooling/Syntax/TreeTestBase.h
@@ -20,7 +20,9 @@
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Testing/Support/Annotations.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang {
@@ -53,6 +55,14 @@
 };
 
 std::vector allTestClangConfigs();
+
+MATCHER_P(role, R, "") {
+  if (arg.getRole() == R)
+return true;
+  *result_listener << "role is " << llvm::to_string(arg.getRole());
+  return false;
+}
+
 } // namespace syntax
 } // namespace clang
 #endif // LLVM_CLANG_UNITTESTS_TOOLING_SYNTAX_TREETESTBASE_H
Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Tooling/Syntax/Tree.h"
 #include "TreeTestBase.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Tooling/Syntax/BuildTree.h"
 #include "clang/Tooling/Syntax/Nodes.h"
 #include "llvm/ADT/STLExtras.h"
@@ -17,6 +18,7 @@
 using namespace clang::syntax;
 
 namespace {
+using testing::ElementsAre;
 
 class TreeTest : public SyntaxTreeTest {
 private:
@@ -124,6 +126,56 @@
   }
 }
 
+TEST_F(TreeTest, Iterators) {
+  buildTree("", allTestClangConfigs().front());
+  std::vector Children = {createLeaf(*Arena, tok::identifier, "a"),
+  createLeaf(*Arena, tok::identifier, "b"),
+  createLeaf(*Arena, tok::identifier, "c")};
+  auto *Tree = syntax::createTree(*Arena,
+  {{Children[0], NodeRole::LeftHandSide},
+   {Children[1], NodeRole::OperatorToken},
+   {Children[2], NodeRole::RightHandSide}},
+  NodeKind::TranslationUnit);
+  const auto *ConstTree = Tree;
+
+  auto Range = Tree->getChildren();
+  EXPECT_THAT(Range, ElementsAre(role(NodeRole::LeftHandSide),
+ role(NodeRole::OperatorToken),
+ role(NodeRole::RightHandSide)));
+
+  auto ConstRange = ConstTree->getChildren();
+  EXPECT_THAT(ConstRange, ElementsAre(role(NodeRole::LeftHandSide),
+  role(NodeRole::OperatorToken),
+  role(NodeRole::RightHandSide)));
+
+  // FIXME: mutate and observe no invalidation. Mutations are private for now...
+  auto It = Range.begin();
+  auto CIt = ConstRange.begin();
+  static_assert(std::is_same::value,
+"mutable range");
+  static_assert(std::is_same::value,
+"const range");
+
+  for (unsigned I = 0; I < 3; ++I) {
+EXPECT_EQ(It, CIt);
+EXPECT_TRUE(It);
+EXPECT_TRUE(CIt);
+EXPECT_EQ(It.asPointer(), Children[I]);
+EXPECT_EQ(CIt.asPointer(), Children[I]);
+EXPECT_EQ(&*It, Children[I]);
+EXPECT_EQ(&*CIt, Children[I]);
+++It;
+++CIt;
+  }
+  EXPECT_EQ(It, CIt);
+  EXPECT_EQ(It, Tree::ChildIterator());
+  EXPECT_EQ(CIt, Tree::ConstChildIterator());
+  EXPECT_FALSE(It);
+  EXPECT_FALSE(CIt);
+  EXPECT_EQ(nullptr, It.asPointer());
+  EXPECT_EQ(nullptr, CIt.asPointer());
+}
+
 class ListTest : public SyntaxTreeTest {
 private:
   std::string dumpQuotedTokensOrNull(const Node *N) {
Index: clang/lib/Tooling/Syntax/Tree.cpp
===
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -19,8 +19,8 @@
 static void traverse(const syntax::Node *N,
  llvm::function_ref Visit) {
   if (auto *T = dyn_cast(N)) {
-for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
-  traverse(C, Visit);
+for (const syntax::Node &C : T->getChildren())
+  traverse(&C, Visit);
   }
   Visit(N);
 }
@@ -194,21 +194,21 @@
   DumpExtraInfo(N);
   OS << "\n";
 
-  for (const auto *It = T->getFirstChild(); It; It = It->getNextSibling()) {
+  for (const syntax::Node &It : T->getChildren()) {
 for (bool Filled : IndentMask) {

[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-26 Thread Eduardo Caldas via Phabricator via cfe-commits
eduucaldas accepted this revision.
eduucaldas added a comment.

Thanks for the instructive replies


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90023/new/

https://reviews.llvm.org/D90023

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-26 Thread Dmitri Gribenko via Phabricator via cfe-commits
gribozavr2 added inline comments.



Comment at: clang/include/clang/Tooling/Syntax/Tree.h:182
+/// The element, or nullptr if past-the-end.
+NodeT *asPointer() const { return N; }
+  };

sammccall wrote:
> gribozavr2 wrote:
> > "nodePointer()" ?
> I find this a little confusing; it suggests to me there's also something 
> *other* than the node here.
> What aspect does it help with?
> 
> I can live with it, though currently would prefer `pointer()`, `getPointer()` 
> or `asPointer()`.
> it suggests to me there's also something *other* than the node here.

Not in this case, but there often are other pointers -- LLVM types often have 
an "opaque pointer" representation, and that was the first thing that I thought 
about when I read `asPointer()`. However, it is strongly typed, so one can 
figure it out. Up to you.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90023/new/

https://reviews.llvm.org/D90023

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-26 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 300691.
sammccall marked an inline comment as done.
sammccall added a comment.

Simplify ConstChildIterator cross-constructor


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90023/new/

https://reviews.llvm.org/D90023

Files:
  clang/include/clang/Tooling/Syntax/Tree.h
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp
  clang/unittests/Tooling/Syntax/TreeTestBase.h

Index: clang/unittests/Tooling/Syntax/TreeTestBase.h
===
--- clang/unittests/Tooling/Syntax/TreeTestBase.h
+++ clang/unittests/Tooling/Syntax/TreeTestBase.h
@@ -20,7 +20,9 @@
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Testing/Support/Annotations.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang {
@@ -53,6 +55,14 @@
 };
 
 std::vector allTestClangConfigs();
+
+MATCHER_P(role, R, "") {
+  if (arg.getRole() == R)
+return true;
+  *result_listener << "role is " << llvm::to_string(arg.getRole());
+  return false;
+}
+
 } // namespace syntax
 } // namespace clang
 #endif // LLVM_CLANG_UNITTESTS_TOOLING_SYNTAX_TREETESTBASE_H
Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Tooling/Syntax/Tree.h"
 #include "TreeTestBase.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Tooling/Syntax/BuildTree.h"
 #include "clang/Tooling/Syntax/Nodes.h"
 #include "llvm/ADT/STLExtras.h"
@@ -17,6 +18,7 @@
 using namespace clang::syntax;
 
 namespace {
+using testing::ElementsAre;
 
 class TreeTest : public SyntaxTreeTest {
 private:
@@ -124,6 +126,56 @@
   }
 }
 
+TEST_F(TreeTest, Iterators) {
+  buildTree("", allTestClangConfigs().front());
+  std::vector Children = {createLeaf(*Arena, tok::identifier, "a"),
+  createLeaf(*Arena, tok::identifier, "b"),
+  createLeaf(*Arena, tok::identifier, "c")};
+  auto *Tree = syntax::createTree(*Arena,
+  {{Children[0], NodeRole::LeftHandSide},
+   {Children[1], NodeRole::OperatorToken},
+   {Children[2], NodeRole::RightHandSide}},
+  NodeKind::TranslationUnit);
+  const auto *ConstTree = Tree;
+
+  auto Range = Tree->getChildren();
+  EXPECT_THAT(Range, ElementsAre(role(NodeRole::LeftHandSide),
+ role(NodeRole::OperatorToken),
+ role(NodeRole::RightHandSide)));
+
+  auto ConstRange = ConstTree->getChildren();
+  EXPECT_THAT(ConstRange, ElementsAre(role(NodeRole::LeftHandSide),
+  role(NodeRole::OperatorToken),
+  role(NodeRole::RightHandSide)));
+
+  // FIXME: mutate and observe no invalidation. Mutations are private for now...
+  auto It = Range.begin();
+  auto CIt = ConstRange.begin();
+  static_assert(std::is_same::value,
+"mutable range");
+  static_assert(std::is_same::value,
+"const range");
+
+  for (unsigned I = 0; I < 3; ++I) {
+EXPECT_EQ(It, CIt);
+EXPECT_TRUE(It);
+EXPECT_TRUE(CIt);
+EXPECT_EQ(It.asPointer(), Children[I]);
+EXPECT_EQ(CIt.asPointer(), Children[I]);
+EXPECT_EQ(&*It, Children[I]);
+EXPECT_EQ(&*CIt, Children[I]);
+++It;
+++CIt;
+  }
+  EXPECT_EQ(It, CIt);
+  EXPECT_EQ(It, Tree::ChildIterator());
+  EXPECT_EQ(CIt, Tree::ConstChildIterator());
+  EXPECT_FALSE(It);
+  EXPECT_FALSE(CIt);
+  EXPECT_EQ(nullptr, It.asPointer());
+  EXPECT_EQ(nullptr, CIt.asPointer());
+}
+
 class ListTest : public SyntaxTreeTest {
 private:
   std::string dumpQuotedTokensOrNull(const Node *N) {
Index: clang/lib/Tooling/Syntax/Tree.cpp
===
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -19,8 +19,8 @@
 static void traverse(const syntax::Node *N,
  llvm::function_ref Visit) {
   if (auto *T = dyn_cast(N)) {
-for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
-  traverse(C, Visit);
+for (const syntax::Node &C : T->getChildren())
+  traverse(&C, Visit);
   }
   Visit(N);
 }
@@ -194,21 +194,21 @@
   DumpExtraInfo(N);
   OS << "\n";
 
-  for (const auto *It = T->getFirstChild(); It; It = It->getNextSibling()) {
+  for (const syntax::Node &It : T->getChildren()) {
 for (bool Filled : IndentMask) {
   if (Filled)
 OS << "| ";
   else
 OS << "  ";
 }
-if (!It->getNextSibling()) {
+if (!It.getNextSibling()) {
   OS << "`-"

[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-26 Thread Sam McCall via Phabricator via cfe-commits
sammccall marked 2 inline comments as done.
sammccall added inline comments.



Comment at: clang/include/clang/Tooling/Syntax/Tree.h:182
+/// The element, or nullptr if past-the-end.
+NodeT *asPointer() const { return N; }
+  };

gribozavr2 wrote:
> "nodePointer()" ?
I find this a little confusing; it suggests to me there's also something 
*other* than the node here.
What aspect does it help with?

I can live with it, though currently would prefer `pointer()`, `getPointer()` 
or `asPointer()`.



Comment at: clang/include/clang/Tooling/Syntax/Tree.h:202-219
+  /// child_iterator is not invalidated by mutations.
+  struct child_iterator : child_iterator_base {
+using Base::Base;
+  };
+  struct const_child_iterator
+  : child_iterator_base {
+using Base::Base;

gribozavr2 wrote:
> sammccall wrote:
> > eduucaldas wrote:
> > > TL;DR:
> > > `child_iterator` -> `ChildIterator`
> > > `const_child_iterator` -> `ConstChildIterator`
> > > `children` -> `getChildren`
> > > 
> > > I see you followed the naming convention of the ClangAST, which makes 
> > > loads of sense. 
> > > 
> > > In syntax trees we follow the naming conventions of the [conding 
> > > standard](https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly)
> > >  -- just because we want consistency and we had to pick a guideline to 
> > > follow -- according to that, functions should be verb like and names 
> > > should be camel case.
> > > 
> > > If like us you chose `const_child_iterator` and `children` kind of "just 
> > > because", could you change it to follow `ConstChildIterator` and 
> > > `getChildren` respectively. 
> > > In syntax trees we follow the naming conventions of the conding standard
> > 
> > I see that's been used in some of the newer classes, and that you changed 
> > this file to follow that style in 4c14ee61b73746b314d83e7c52e03d6527b78105. 
> > However it's not the original style we used here and elsewhere in libSyntax.
> > It's a bit frustrating to hear the argument about consistency, because we 
> > were quite careful and deliberate about this style, which was IMO fairly 
> > consistent prior to that change.
> > 
> > I'm willing to change these to match the local style in this file. However 
> > `*_iterator`, `*_begin/end/range()` is more common than FooIterator etc in 
> > LLVM overall (I think for consistency with `iterator` etc, which is 
> > endorsed by the style guide). So I don't think this change is particularly 
> > an improvement, even in terms of consistency.
> It can go either way:
> 
> > As an exception, classes that mimic STL classes can have member names in 
> > STL’s style of lower-case words separated by underscores
> 
Yeah, though read narrowly that doesn't apply (Tree doesn't really mimic STL, 
and the rule applies to Tree::ChildIterator's members rather than the class 
itself).

I think it's rare enough to spell out iterator class names that those don't 
matter at all, and getChildren() is necessary given getParent() etc.



Comment at: clang/lib/Tooling/Syntax/Tree.cpp:22-23
   if (auto *T = dyn_cast(N)) {
-for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
-  traverse(C, Visit);
+for (const syntax::Node &C : T->children())
+  traverse(&C, Visit);
   }

gribozavr2 wrote:
> sammccall wrote:
> > eduucaldas wrote:
> > > Hmm... 
> > > That looks funny, if the user uses a range-based for loop and forgets the 
> > > `&`, then we would be copying the Node???
> > > 
> > > Also in many places we had to get the address to the node. That is not 
> > > really consistent with how the syntax trees API's were designed, they 
> > > generally take pointers instead of references. (that said we could 
> > > obviously reconsider those design choices)
> > > That looks funny, if the user uses a range-based for loop and forgets the 
> > > &, then we would be copying the Node???
> > 
> > It doesn't look funny to me - foreach usually iterates over values rather 
> > than pointers. This raises a good point - it looks like Node is copyable 
> > and shouldn't be. (The default copy operation violates the tree invariants, 
> > e.g. the copied node will not be a child of its parent). I'll send a 
> > patch...
> > 
> > (That said this is always the case with copyable objects in range-based for 
> > loops, and indeed using references - that doesn't usually mean we avoid 
> > using them)
> > 
> > > Also in many places we had to get the address to the node. That is not 
> > > really consistent with how the syntax trees API's were designed, they 
> > > generally take pointers instead of references
> > 
> > I'm not convinced that taking the address is itself a burden, any more than 
> > using `->` instead of `.` is a burden.
> > Sometimes the use of pointer vs reference intends to convey something about 
> > address stability, but here this goes with t

[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-26 Thread Dmitri Gribenko via Phabricator via cfe-commits
gribozavr2 accepted this revision.
gribozavr2 added inline comments.
This revision is now accepted and ready to land.



Comment at: clang/include/clang/Tooling/Syntax/Tree.h:182
+/// The element, or nullptr if past-the-end.
+NodeT *asPointer() const { return N; }
+  };

"nodePointer()" ?



Comment at: clang/include/clang/Tooling/Syntax/Tree.h:202-219
+  /// child_iterator is not invalidated by mutations.
+  struct child_iterator : child_iterator_base {
+using Base::Base;
+  };
+  struct const_child_iterator
+  : child_iterator_base {
+using Base::Base;

sammccall wrote:
> eduucaldas wrote:
> > TL;DR:
> > `child_iterator` -> `ChildIterator`
> > `const_child_iterator` -> `ConstChildIterator`
> > `children` -> `getChildren`
> > 
> > I see you followed the naming convention of the ClangAST, which makes loads 
> > of sense. 
> > 
> > In syntax trees we follow the naming conventions of the [conding 
> > standard](https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly)
> >  -- just because we want consistency and we had to pick a guideline to 
> > follow -- according to that, functions should be verb like and names should 
> > be camel case.
> > 
> > If like us you chose `const_child_iterator` and `children` kind of "just 
> > because", could you change it to follow `ConstChildIterator` and 
> > `getChildren` respectively. 
> > In syntax trees we follow the naming conventions of the conding standard
> 
> I see that's been used in some of the newer classes, and that you changed 
> this file to follow that style in 4c14ee61b73746b314d83e7c52e03d6527b78105. 
> However it's not the original style we used here and elsewhere in libSyntax.
> It's a bit frustrating to hear the argument about consistency, because we 
> were quite careful and deliberate about this style, which was IMO fairly 
> consistent prior to that change.
> 
> I'm willing to change these to match the local style in this file. However 
> `*_iterator`, `*_begin/end/range()` is more common than FooIterator etc in 
> LLVM overall (I think for consistency with `iterator` etc, which is endorsed 
> by the style guide). So I don't think this change is particularly an 
> improvement, even in terms of consistency.
It can go either way:

> As an exception, classes that mimic STL classes can have member names in 
> STL’s style of lower-case words separated by underscores




Comment at: clang/include/clang/Tooling/Syntax/Tree.h:210
+const_child_iterator(const child_iterator &I)
+: Base(I == child_iterator() ? nullptr : &*I) {}
+  };





Comment at: clang/lib/Tooling/Syntax/Tree.cpp:22-23
   if (auto *T = dyn_cast(N)) {
-for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
-  traverse(C, Visit);
+for (const syntax::Node &C : T->children())
+  traverse(&C, Visit);
   }

sammccall wrote:
> eduucaldas wrote:
> > Hmm... 
> > That looks funny, if the user uses a range-based for loop and forgets the 
> > `&`, then we would be copying the Node???
> > 
> > Also in many places we had to get the address to the node. That is not 
> > really consistent with how the syntax trees API's were designed, they 
> > generally take pointers instead of references. (that said we could 
> > obviously reconsider those design choices)
> > That looks funny, if the user uses a range-based for loop and forgets the 
> > &, then we would be copying the Node???
> 
> It doesn't look funny to me - foreach usually iterates over values rather 
> than pointers. This raises a good point - it looks like Node is copyable and 
> shouldn't be. (The default copy operation violates the tree invariants, e.g. 
> the copied node will not be a child of its parent). I'll send a patch...
> 
> (That said this is always the case with copyable objects in range-based for 
> loops, and indeed using references - that doesn't usually mean we avoid using 
> them)
> 
> > Also in many places we had to get the address to the node. That is not 
> > really consistent with how the syntax trees API's were designed, they 
> > generally take pointers instead of references
> 
> I'm not convinced that taking the address is itself a burden, any more than 
> using `->` instead of `.` is a burden.
> Sometimes the use of pointer vs reference intends to convey something about 
> address stability, but here this goes with the type as all nodes are 
> allocated on the arena. (IMO this is a part of the AST design that works 
> well).
> The other thing it conveys is nullability, and this seems valuable enough 
> that IMO APIs that take *non-nullable* nodes by pointer should be 
> reconsidered.
> 
> For the iterators specifically, the main alternative here I guess is having 
> Tree::children() act as a container of *pointers to* children, instead of the 
> children themselves. This *is* highly unconventiona

[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-26 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 300674.
sammccall marked an inline comment as done.
sammccall added a comment.

Style changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90023/new/

https://reviews.llvm.org/D90023

Files:
  clang/include/clang/Tooling/Syntax/Tree.h
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp
  clang/unittests/Tooling/Syntax/TreeTestBase.h

Index: clang/unittests/Tooling/Syntax/TreeTestBase.h
===
--- clang/unittests/Tooling/Syntax/TreeTestBase.h
+++ clang/unittests/Tooling/Syntax/TreeTestBase.h
@@ -20,7 +20,9 @@
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Testing/Support/Annotations.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang {
@@ -53,6 +55,14 @@
 };
 
 std::vector allTestClangConfigs();
+
+MATCHER_P(role, R, "") {
+  if (arg.getRole() == R)
+return true;
+  *result_listener << "role is " << llvm::to_string(arg.getRole());
+  return false;
+}
+
 } // namespace syntax
 } // namespace clang
 #endif // LLVM_CLANG_UNITTESTS_TOOLING_SYNTAX_TREETESTBASE_H
Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Tooling/Syntax/Tree.h"
 #include "TreeTestBase.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Tooling/Syntax/BuildTree.h"
 #include "clang/Tooling/Syntax/Nodes.h"
 #include "llvm/ADT/STLExtras.h"
@@ -17,6 +18,7 @@
 using namespace clang::syntax;
 
 namespace {
+using testing::ElementsAre;
 
 class TreeTest : public SyntaxTreeTest {
 private:
@@ -124,6 +126,56 @@
   }
 }
 
+TEST_F(TreeTest, Iterators) {
+  buildTree("", allTestClangConfigs().front());
+  std::vector Children = {createLeaf(*Arena, tok::identifier, "a"),
+  createLeaf(*Arena, tok::identifier, "b"),
+  createLeaf(*Arena, tok::identifier, "c")};
+  auto *Tree = syntax::createTree(*Arena,
+  {{Children[0], NodeRole::LeftHandSide},
+   {Children[1], NodeRole::OperatorToken},
+   {Children[2], NodeRole::RightHandSide}},
+  NodeKind::TranslationUnit);
+  const auto *ConstTree = Tree;
+
+  auto Range = Tree->getChildren();
+  EXPECT_THAT(Range, ElementsAre(role(NodeRole::LeftHandSide),
+ role(NodeRole::OperatorToken),
+ role(NodeRole::RightHandSide)));
+
+  auto ConstRange = ConstTree->getChildren();
+  EXPECT_THAT(ConstRange, ElementsAre(role(NodeRole::LeftHandSide),
+  role(NodeRole::OperatorToken),
+  role(NodeRole::RightHandSide)));
+
+  // FIXME: mutate and observe no invalidation. Mutations are private for now...
+  auto It = Range.begin();
+  auto CIt = ConstRange.begin();
+  static_assert(std::is_same::value,
+"mutable range");
+  static_assert(std::is_same::value,
+"const range");
+
+  for (unsigned I = 0; I < 3; ++I) {
+EXPECT_EQ(It, CIt);
+EXPECT_TRUE(It);
+EXPECT_TRUE(CIt);
+EXPECT_EQ(It.asPointer(), Children[I]);
+EXPECT_EQ(CIt.asPointer(), Children[I]);
+EXPECT_EQ(&*It, Children[I]);
+EXPECT_EQ(&*CIt, Children[I]);
+++It;
+++CIt;
+  }
+  EXPECT_EQ(It, CIt);
+  EXPECT_EQ(It, Tree::ChildIterator());
+  EXPECT_EQ(CIt, Tree::ConstChildIterator());
+  EXPECT_FALSE(It);
+  EXPECT_FALSE(CIt);
+  EXPECT_EQ(nullptr, It.asPointer());
+  EXPECT_EQ(nullptr, CIt.asPointer());
+}
+
 class ListTest : public SyntaxTreeTest {
 private:
   std::string dumpQuotedTokensOrNull(const Node *N) {
Index: clang/lib/Tooling/Syntax/Tree.cpp
===
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -19,8 +19,8 @@
 static void traverse(const syntax::Node *N,
  llvm::function_ref Visit) {
   if (auto *T = dyn_cast(N)) {
-for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
-  traverse(C, Visit);
+for (const syntax::Node &C : T->getChildren())
+  traverse(&C, Visit);
   }
   Visit(N);
 }
@@ -194,21 +194,21 @@
   DumpExtraInfo(N);
   OS << "\n";
 
-  for (const auto *It = T->getFirstChild(); It; It = It->getNextSibling()) {
+  for (const syntax::Node &It : T->getChildren()) {
 for (bool Filled : IndentMask) {
   if (Filled)
 OS << "| ";
   else
 OS << "  ";
 }
-if (!It->getNextSibling()) {
+if (!It.getNextSibling()) {
   OS << "`-";
   IndentMask.push_back(fa

[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-26 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: clang/include/clang/Tooling/Syntax/Tree.h:157-184
+  /// Iterator over children (common base for const/non-const).
+  /// Not invalidated by tree mutations (holds a stable node pointer).
+  template 
+  class child_iterator_base
+  : public llvm::iterator_facade_base {
+  protected:

eduucaldas wrote:
> I think we should only have an iterator through `Node`s, not a general one 
> like this.
> 
> In general `Tree` has *heterogeneous* children:
> For instance, `1+2` yields the syntax tree:
> ```
> BinaryOperatorExpression
> |-  IntegerLiteralExpression
> |-  Leaf
> `- IntegerLiteralExpression
> ```
> 
> Very rarely will syntax trees have all their children of the same kind, so I 
> don't think it makes sense to try an provide an iterator template. That makes 
> sense for lists , because generally they *have* the same kind. But in that 
> case we provide special iterators for lists only.
> 
This does only iterate over nodes, note this class is private.
This is a common pattern when implementing iterators: you need `iterator` and 
`const_iterator`, they have almost identical implementations but operate on 
different types (`const T` vs `T`). Thus a private template.

Sometimes it's used directly (`using iterator = iterator_base`) but that 
means the template needs to be visible and also leads to worse compiler 
diagnostics when the canonical type is printed. Inheritance lets us prevent 
direct use of the base class and give the iterator classes distinct names.

---

That said, in future, I think we should (later) have a facility to iterate over 
children of a certain type. This would explicitly be a filtering operation, 
ignoring nodes not of that type.
Providing these typed operations for concrete subclasses of List makes sense, 
but in clangd outside of specific refactorings we've found generic facilities 
to often be more useful. When we designed syntax trees, providing a simple and 
generic data structure that could be used directly was an explicit goal.



Comment at: clang/include/clang/Tooling/Syntax/Tree.h:202-219
+  /// child_iterator is not invalidated by mutations.
+  struct child_iterator : child_iterator_base {
+using Base::Base;
+  };
+  struct const_child_iterator
+  : child_iterator_base {
+using Base::Base;

eduucaldas wrote:
> TL;DR:
> `child_iterator` -> `ChildIterator`
> `const_child_iterator` -> `ConstChildIterator`
> `children` -> `getChildren`
> 
> I see you followed the naming convention of the ClangAST, which makes loads 
> of sense. 
> 
> In syntax trees we follow the naming conventions of the [conding 
> standard](https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly)
>  -- just because we want consistency and we had to pick a guideline to follow 
> -- according to that, functions should be verb like and names should be camel 
> case.
> 
> If like us you chose `const_child_iterator` and `children` kind of "just 
> because", could you change it to follow `ConstChildIterator` and 
> `getChildren` respectively. 
> In syntax trees we follow the naming conventions of the conding standard

I see that's been used in some of the newer classes, and that you changed this 
file to follow that style in 4c14ee61b73746b314d83e7c52e03d6527b78105. However 
it's not the original style we used here and elsewhere in libSyntax.
It's a bit frustrating to hear the argument about consistency, because we were 
quite careful and deliberate about this style, which was IMO fairly consistent 
prior to that change.

I'm willing to change these to match the local style in this file. However 
`*_iterator`, `*_begin/end/range()` is more common than FooIterator etc in LLVM 
overall (I think for consistency with `iterator` etc, which is endorsed by the 
style guide). So I don't think this change is particularly an improvement, even 
in terms of consistency.



Comment at: clang/lib/Tooling/Syntax/Tree.cpp:22-23
   if (auto *T = dyn_cast(N)) {
-for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
-  traverse(C, Visit);
+for (const syntax::Node &C : T->children())
+  traverse(&C, Visit);
   }

eduucaldas wrote:
> Hmm... 
> That looks funny, if the user uses a range-based for loop and forgets the 
> `&`, then we would be copying the Node???
> 
> Also in many places we had to get the address to the node. That is not really 
> consistent with how the syntax trees API's were designed, they generally take 
> pointers instead of references. (that said we could obviously reconsider 
> those design choices)
> That looks funny, if the user uses a range-based for loop and forgets the &, 
> then we would be copying the Node???

It doesn't look funny to me - foreach usually iterates over values rather than 
pointers. This raises a good point - it looks like Node is copyable and 
shouldn't be. (The 

[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-23 Thread Eduardo Caldas via Phabricator via cfe-commits
eduucaldas added a comment.

Thanks Sam! I learned a lot from your patch ^^




Comment at: clang/include/clang/Tooling/Syntax/Tree.h:157-184
+  /// Iterator over children (common base for const/non-const).
+  /// Not invalidated by tree mutations (holds a stable node pointer).
+  template 
+  class child_iterator_base
+  : public llvm::iterator_facade_base {
+  protected:

I think we should only have an iterator through `Node`s, not a general one like 
this.

In general `Tree` has *heterogeneous* children:
For instance, `1+2` yields the syntax tree:
```
BinaryOperatorExpression
|-  IntegerLiteralExpression
|-  Leaf
`- IntegerLiteralExpression
```

Very rarely will syntax trees have all their children of the same kind, so I 
don't think it makes sense to try an provide an iterator template. That makes 
sense for lists , because generally they *have* the same kind. But in that case 
we provide special iterators for lists only.




Comment at: clang/include/clang/Tooling/Syntax/Tree.h:202-219
+  /// child_iterator is not invalidated by mutations.
+  struct child_iterator : child_iterator_base {
+using Base::Base;
+  };
+  struct const_child_iterator
+  : child_iterator_base {
+using Base::Base;

TL;DR:
`child_iterator` -> `ChildIterator`
`const_child_iterator` -> `ConstChildIterator`
`children` -> `getChildren`

I see you followed the naming convention of the ClangAST, which makes loads of 
sense. 

In syntax trees we follow the naming conventions of the [conding 
standard](https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly)
 -- just because we want consistency and we had to pick a guideline to follow 
-- according to that, functions should be verb like and names should be camel 
case.

If like us you chose `const_child_iterator` and `children` kind of "just 
because", could you change it to follow `ConstChildIterator` and `getChildren` 
respectively. 



Comment at: clang/lib/Tooling/Syntax/Tree.cpp:22-23
   if (auto *T = dyn_cast(N)) {
-for (const auto *C = T->getFirstChild(); C; C = C->getNextSibling())
-  traverse(C, Visit);
+for (const syntax::Node &C : T->children())
+  traverse(&C, Visit);
   }

Hmm... 
That looks funny, if the user uses a range-based for loop and forgets the `&`, 
then we would be copying the Node???

Also in many places we had to get the address to the node. That is not really 
consistent with how the syntax trees API's were designed, they generally take 
pointers instead of references. (that said we could obviously reconsider those 
design choices)



Comment at: clang/unittests/Tooling/Syntax/TreeTest.cpp:129
 
+TEST_F(TreeTest, Iterators) {
+  buildTree("", allTestClangConfigs().front());

Suggestion:
I would split this into `Iterators` and `ConstIterators`.



Comment at: clang/unittests/Tooling/Syntax/TreeTest.cpp:139
+  NodeKind::TranslationUnit);
+  const syntax::Tree *ConstTree = Tree;
+

nit, feel free to ignore.



Comment at: clang/unittests/Tooling/Syntax/TreeTest.cpp:159-169
+  for (unsigned I = 0; I < 3; ++I) {
+EXPECT_EQ(It, CIt);
+EXPECT_TRUE(It);
+EXPECT_TRUE(CIt);
+EXPECT_EQ(It.asPointer(), Children[I]);
+EXPECT_EQ(CIt.asPointer(), Children[I]);
+EXPECT_EQ(&*It, Children[I]);

Is there a reason why you didn't use a range-based for loop over Children?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90023/new/

https://reviews.llvm.org/D90023

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90023: [Syntax] Add iterators over children of syntax trees.

2020-10-23 Thread Sam McCall via Phabricator via cfe-commits
sammccall created this revision.
sammccall added reviewers: gribozavr, eduucaldas.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
sammccall requested review of this revision.

This gives us slightly nicer syntax (foreach) for idioms currently expressed
as a loop, and the option to use range algorithms where it makes sense
(e.g. llvm::all_of et al encapsulate the needed flow control in a useful way).

It's also a building block for iteration over filtered views (e.g. iterate over
all Stmt children, with the right type):
for (const Statement &S : filter(N.children()))

  ...

I realize the recent direction has been mostly towards strongly-typed
node-specific facilities, but I think it's important we have convenient
generic facilities too.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D90023

Files:
  clang/include/clang/Tooling/Syntax/Tree.h
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp
  clang/unittests/Tooling/Syntax/TreeTestBase.h

Index: clang/unittests/Tooling/Syntax/TreeTestBase.h
===
--- clang/unittests/Tooling/Syntax/TreeTestBase.h
+++ clang/unittests/Tooling/Syntax/TreeTestBase.h
@@ -20,7 +20,9 @@
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Testing/Support/Annotations.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang {
@@ -53,6 +55,14 @@
 };
 
 std::vector allTestClangConfigs();
+
+MATCHER_P(role, R, "") {
+  if (arg.getRole() == R)
+return true;
+  *result_listener << "role is " << llvm::to_string(arg.getRole());
+  return false;
+}
+
 } // namespace syntax
 } // namespace clang
 #endif // LLVM_CLANG_UNITTESTS_TOOLING_SYNTAX_TREETESTBASE_H
Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Tooling/Syntax/Tree.h"
 #include "TreeTestBase.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Tooling/Syntax/BuildTree.h"
 #include "clang/Tooling/Syntax/Nodes.h"
 #include "llvm/ADT/STLExtras.h"
@@ -17,6 +18,7 @@
 using namespace clang::syntax;
 
 namespace {
+using testing::ElementsAre;
 
 class TreeTest : public SyntaxTreeTest {
 private:
@@ -124,6 +126,56 @@
   }
 }
 
+TEST_F(TreeTest, Iterators) {
+  buildTree("", allTestClangConfigs().front());
+  std::vector Children = {createLeaf(*Arena, tok::identifier, "a"),
+  createLeaf(*Arena, tok::identifier, "b"),
+  createLeaf(*Arena, tok::identifier, "c")};
+  auto *Tree = syntax::createTree(*Arena,
+  {{Children[0], NodeRole::LeftHandSide},
+   {Children[1], NodeRole::OperatorToken},
+   {Children[2], NodeRole::RightHandSide}},
+  NodeKind::TranslationUnit);
+  const syntax::Tree *ConstTree = Tree;
+
+  auto Range = Tree->children();
+  EXPECT_THAT(Range, ElementsAre(role(NodeRole::LeftHandSide),
+ role(NodeRole::OperatorToken),
+ role(NodeRole::RightHandSide)));
+
+  auto ConstRange = ConstTree->children();
+  EXPECT_THAT(ConstRange, ElementsAre(role(NodeRole::LeftHandSide),
+  role(NodeRole::OperatorToken),
+  role(NodeRole::RightHandSide)));
+
+  // FIXME: mutate and observe no invalidation. Mutations are private for now...
+  auto It = Range.begin();
+  auto CIt = ConstRange.begin();
+  static_assert(std::is_same::value,
+"mutable range");
+  static_assert(std::is_same::value,
+"const range");
+
+  for (unsigned I = 0; I < 3; ++I) {
+EXPECT_EQ(It, CIt);
+EXPECT_TRUE(It);
+EXPECT_TRUE(CIt);
+EXPECT_EQ(It.asPointer(), Children[I]);
+EXPECT_EQ(CIt.asPointer(), Children[I]);
+EXPECT_EQ(&*It, Children[I]);
+EXPECT_EQ(&*CIt, Children[I]);
+++It;
+++CIt;
+  }
+  EXPECT_EQ(It, CIt);
+  EXPECT_EQ(It, Tree::child_iterator());
+  EXPECT_EQ(CIt, Tree::const_child_iterator());
+  EXPECT_FALSE(It);
+  EXPECT_FALSE(CIt);
+  EXPECT_EQ(nullptr, It.asPointer());
+  EXPECT_EQ(nullptr, CIt.asPointer());
+}
+
 class ListTest : public SyntaxTreeTest {
 private:
   std::string dumpQuotedTokensOrNull(const Node *N) {
Index: clang/lib/Tooling/Syntax/Tree.cpp
===
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -19,8 +19,8 @@
 static void traverse(const syntax::Node *N,
  llvm::function_ref Visit) {
   if (auto *T = dyn_cast(N)) {
-for (const auto *C = T->getFirstChild(); C; C