[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-25 Thread Sam McCall via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
sammccall marked an inline comment as done.
Closed by commit rGe9fb1506b83d: [clangd] Config: Fragments and parsing from 
YAML (authored by sammccall).

Changed prior to commit:
  https://reviews.llvm.org/D82386?vs=273254&id=273512#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82386/new/

https://reviews.llvm.org/D82386

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/ConfigFragment.h
  clang-tools-extra/clangd/ConfigYAML.cpp
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp

Index: clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
@@ -0,0 +1,160 @@
+//===-- ConfigYAMLTests.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "Annotations.h"
+#include "ConfigFragment.h"
+#include "Matchers.h"
+#include "Protocol.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/internal/gtest-internal.h"
+
+namespace clang {
+namespace clangd {
+namespace config {
+template  void PrintTo(const Located &V, std::ostream *OS) {
+  *OS << ::testing::PrintToString(*V);
+}
+
+namespace {
+using ::testing::AllOf;
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+MATCHER_P(Val, Value, "") {
+  if (*arg == Value)
+return true;
+  *result_listener << "value is " << *arg;
+  return false;
+}
+
+Position toPosition(llvm::SMLoc L, const llvm::SourceMgr &SM) {
+  auto LineCol = SM.getLineAndColumn(L);
+  Position P;
+  P.line = LineCol.first - 1;
+  P.character = LineCol.second - 1;
+  return P;
+}
+
+Range toRange(llvm::SMRange R, const llvm::SourceMgr &SM) {
+  return Range{toPosition(R.Start, SM), toPosition(R.End, SM)};
+}
+
+struct CapturedDiags {
+  std::function callback() {
+return [this](const llvm::SMDiagnostic &D) {
+  Diagnostics.emplace_back();
+  Diag &Out = Diagnostics.back();
+  Out.Message = D.getMessage().str();
+  Out.Kind = D.getKind();
+  Out.Pos.line = D.getLineNo() - 1;
+  Out.Pos.character = D.getColumnNo(); // Zero-based - bug in SourceMgr?
+  if (!D.getRanges().empty()) {
+const auto &R = D.getRanges().front();
+Out.Range.emplace();
+Out.Range->start.line = Out.Range->end.line = Out.Pos.line;
+Out.Range->start.character = R.first;
+Out.Range->end.character = R.second;
+  }
+};
+  }
+  struct Diag {
+std::string Message;
+llvm::SourceMgr::DiagKind Kind;
+Position Pos;
+llvm::Optional Range;
+
+friend void PrintTo(const Diag &D, std::ostream *OS) {
+  *OS << (D.Kind == llvm::SourceMgr::DK_Error ? "error: " : "warning: ")
+  << D.Message << "@" << llvm::to_string(D.Pos);
+}
+  };
+  std::vector Diagnostics;
+};
+
+MATCHER_P(DiagMessage, M, "") { return arg.Message == M; }
+MATCHER_P(DiagKind, K, "") { return arg.Kind == K; }
+MATCHER_P(DiagPos, P, "") { return arg.Pos == P; }
+MATCHER_P(DiagRange, R, "") { return arg.Range == R; }
+
+TEST(ParseYAML, SyntacticForms) {
+  CapturedDiags Diags;
+  const char *YAML = R"yaml(
+If:
+  PathMatch:
+- 'abc'
+CompileFlags: { Add: [foo, bar] }
+---
+CompileFlags:
+  Add: |
+b
+az
+  )yaml";
+  auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 2u);
+  EXPECT_FALSE(Results.front().Condition.HasUnrecognizedCondition);
+  EXPECT_THAT(Results.front().Condition.PathMatch, ElementsAre(Val("abc")));
+  EXPECT_THAT(Results.front().CompileFlags.Add,
+  ElementsAre(Val("foo"), Val("bar")));
+
+  EXPECT_THAT(Results.back().CompileFlags.Add, ElementsAre(Val("b\naz\n")));
+}
+
+TEST(ParseYAML, Locations) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+If:
+  PathMatch: [['???bad***regex(((']]
+  )yaml");
+  auto Results =
+  Fragment::parseYAML(YAML.code(), "config.yaml", Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 1u);
+  ASSERT_NE(Results.front().Source.Manager, nullptr);
+  EXPECT_EQ(toRange(Results.front().Condition.PathMatch.front().Range,
+*Results.front().Source.Manager),
+YAML.range());
+}
+
+TEST(ParseYAML, Diagnostics) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+If:
+  [[UnknownCondition]]: "foo"
+CompileFlags:
+  Add: 'first'
+---
+CompileF

[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-25 Thread Sam McCall via Phabricator via cfe-commits
sammccall marked 9 inline comments as done.
sammccall added a comment.

Thanks for the careful review!




Comment at: clang-tools-extra/clangd/ConfigFragment.h:73
+/// Only valid if SourceManager is set.
+llvm::SMLoc Location;
+  };

kadircet wrote:
> sammccall wrote:
> > kadircet wrote:
> > > what is the use of this ?
> > There may be multiple fragments in a file.
> > 
> > When tracking how fragments are used in the logs, there should be enough 
> > information to identify the particular fragment, without having to guess.
> >  - during parsing, we can log in some source-specific way
> >  - in the Fragment form, we can log this location to identify the fragment
> >  - during compilation, we log this location and the pointer address of the 
> > new CompiledFragment
> >  - subsequently we can just log the CompiledFragment address
> right, I was thinking that we already have location information available for 
> the start of the fragment during parsing and wasn't sure how useful it would 
> be in later on stages as we know the exact location of the relevant entries 
> (keys, scalars etc.) for logging.
> 
> I wasn't against the idea of storing it, just wanted to learn if it has any 
> significant importance for the upcoming stages. thanks for the info!
Yeah, storing the start of the fragment given a slightly more correct location 
to represent the fragment itself, but more importantly we don't have to hunt 
around the config to find something that has a location.



Comment at: clang-tools-extra/clangd/ConfigFragment.h:77
+
+  struct ConditionFragment {
+std::vector> PathMatch;

kadircet wrote:
> sammccall wrote:
> > kadircet wrote:
> > > comments?
> > Whoops, done. There's an argument to be made that we shouldn't maintain 
> > docs in two places, and the user docs should be canonical.
> > Something to think about later, maybe we can generate them.
> > 
> > I'm always unsure whether to put the doc for "blocks" like this on the 
> > struct or the field...
> I suppose it makes sense to put the part starting with `  /// Conditions 
> based on a file's path use the following form:` above the field rather than 
> the struct, especially if we are planning to generate docs.
> 
> also:
> s/ConfigFragment/Fragment/
Yeah - the counter-argument is that if you *are* reading the code then having 
the general documentation above the specific stuff reads better. And we can 
teach a doc generator/extractor to grab the right text easily enough. I'll 
leave it for now, I think.

> s/ConfigFragment/Fragment/

Doh, I did update this everywhere, but apparently not in my brain, so it keeps 
coming back...



Comment at: clang-tools-extra/clangd/ConfigFragment.h:81
+/// The condition will evaluate to false.
+bool UnrecognizedCondition;
+  };

kadircet wrote:
> sammccall wrote:
> > kadircet wrote:
> > > `HasUnrecognizedCondition` ?
> > > 
> > > also default init to `true` maybe?
> > Renamed. I guess you meant `false` here? (an empty condition should not 
> > have this set)
> I was actually suggesting `true` to make sure this is explicitly set by the 
> parser in case of success and not forgotten when something slips away. but 
> not that important.
The problem is the only sensible implementation for a parser is to set it to 
false again, then set it to true once you hit an unknown key.
Particularly now that condition is not Optional. I think the invariant that a 
default-initialized Fragment corresponds to an empty document is more valuable.



Comment at: clang-tools-extra/clangd/ConfigFragment.h:87
+std::vector> Add;
+  } CompileFlags;
+};

kadircet wrote:
> sammccall wrote:
> > kadircet wrote:
> > > maybe make this an llvm:Optional too. Even though emptiness of `Add` 
> > > would be OK in this case, for non-container "blocks" we might need to 
> > > introduce an optional, and homogeneity would be nice when it happens.
> > I'd rather push this the other way: the presence of an empty block like 
> > this shouldn't have any semantic effect - I think we can be consistent 
> > about this and it helps the schema feel regular/extensible/predictable.
> > 
> > Condition was the only tricky case, but I don't think we need it, so I've 
> > removed the Optional on the condition instead. it's an AND, so an empty set 
> > of conditions evaluates to "unconditionally true" anyway.
> sure SG, I was rather afraid of introducing an `int` option one day, then we 
> might need to signal it's existence with an `Optional` but one might also 
> argue that we would likely have a "default" value for such options, hence 
> instead of marking it as `None` we could just make use of the default in 
> absence of that option.
Oh definitely I think we'll want `Optional` for singular leaf fields, like 
`Optional BackgroundIndexBlock::Enabled` or whatever.
You want a tristate of yes/no/inherit-existing and Optional

[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-25 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet marked 5 inline comments as done.
kadircet added a comment.

LGTM with couple of nits.

Regarding clang-format, I was mostly confused by `template  class 
Located {` being on a single line, but apparently that's the style :D




Comment at: clang-tools-extra/clangd/ConfigFragment.h:61
+  /// BufferName is used for the SourceMgr and diagnostics.
+  static std::vector parseYAML(llvm::StringRef YAML,
+ llvm::StringRef BufferName,

sammccall wrote:
> kadircet wrote:
> > what about `fromYAML` and returning `vector` ?
> > what about fromYAML
> 
> I can live with it if you like, but I prefer parse:
>  - Fragment::fromYAML reads funny as it returns multiple fragments in general
>  - parse describes the action more clearly and so hints toward thinking about 
> error handling etc
> 
> > returning vector ?
> why? the resulting vector is slower (has to copy instead of move on 
> reallocation).
> It prevents us from moving from the fragment when we compile it (which we do 
> in some places)
> I can live with it if you like, but I prefer parse:

I was mainly unhappy about it because `parse` doesn't reflect the construction 
bit of the process, but it is clear from the signature and you are right about 
returning multiple fragments contradicting with the idea of `from`. I am more 
inclined towards `from` but I am fine with `parse` too if it feels more natural 
to you.

> why? the resulting vector is slower (has to copy instead of move on 
> reallocation).

It was to signal the fact that these correspond to some `immutable` input 
received, but I giving up on move semantics sounds like a nasty consequence. So 
let's keep it that way (unless you want to hide the members and provide only 
const accessors, which is some plumbing for a simple struct like this and might 
hinder readability)



Comment at: clang-tools-extra/clangd/ConfigFragment.h:73
+/// Only valid if SourceManager is set.
+llvm::SMLoc Location;
+  };

sammccall wrote:
> kadircet wrote:
> > what is the use of this ?
> There may be multiple fragments in a file.
> 
> When tracking how fragments are used in the logs, there should be enough 
> information to identify the particular fragment, without having to guess.
>  - during parsing, we can log in some source-specific way
>  - in the Fragment form, we can log this location to identify the fragment
>  - during compilation, we log this location and the pointer address of the 
> new CompiledFragment
>  - subsequently we can just log the CompiledFragment address
right, I was thinking that we already have location information available for 
the start of the fragment during parsing and wasn't sure how useful it would be 
in later on stages as we know the exact location of the relevant entries (keys, 
scalars etc.) for logging.

I wasn't against the idea of storing it, just wanted to learn if it has any 
significant importance for the upcoming stages. thanks for the info!



Comment at: clang-tools-extra/clangd/ConfigFragment.h:75
+  };
+  SourceInfo Source;
+

sammccall wrote:
> kadircet wrote:
> > why not make this const ? i don't think it makes sense to modify these 
> > after creation.
> const members are a pain, as they prevent moving the object (and vectors of 
> them are doing copies, etc). For that reason I generally don't find "I don't 
> semantically need to modify this" a good enough reason to mark a member const.
> 
> Adding a constructor muddles the idea of a dumb struct and presents an 
> inconsistent API - the other fields *also* don't really change during the 
> lifetime (parseYAML() is basically a big constructor)
this was in parallel with the idea of returning `vector`. so 
agreed.



Comment at: clang-tools-extra/clangd/ConfigFragment.h:77
+
+  struct ConditionFragment {
+std::vector> PathMatch;

sammccall wrote:
> kadircet wrote:
> > comments?
> Whoops, done. There's an argument to be made that we shouldn't maintain docs 
> in two places, and the user docs should be canonical.
> Something to think about later, maybe we can generate them.
> 
> I'm always unsure whether to put the doc for "blocks" like this on the struct 
> or the field...
I suppose it makes sense to put the part starting with `  /// Conditions based 
on a file's path use the following form:` above the field rather than the 
struct, especially if we are planning to generate docs.

also:
s/ConfigFragment/Fragment/



Comment at: clang-tools-extra/clangd/ConfigFragment.h:81
+/// The condition will evaluate to false.
+bool UnrecognizedCondition;
+  };

sammccall wrote:
> kadircet wrote:
> > `HasUnrecognizedCondition` ?
> > 
> > also default init to `true` maybe?
> Renamed. I guess you meant `false` here? (an empty condition should not have 
> this set)
I was actually suggesting `true` to make sure this is explic

[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-25 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

Thanks!

> Apart from the comments(IIRC which are mostly minor), it felt like there were 
> a few lines that aren't clang-format'd. Please make sure to run clang-format 
> on the files at some point.

git-clang-format thinks it's clean, so please point out any examples you see!




Comment at: clang-tools-extra/clangd/ConfigFragment.h:9
+//
+// Various clangd features have configurable behaviour (or can be disabled).
+// The configuration system allows users to control this:

kadircet wrote:
> i think this paragraph belongs to `Config.h`
This first sentence does, and I'll put it there too, but I think one redundant 
sentence to establish the context is reasonable.

The next three lines belong in this file because they describe how users 
control config (which they do through ConfigFragment, not through Config).



Comment at: clang-tools-extra/clangd/ConfigFragment.h:16
+// configuration as obtained from a source like a file.
+// This is distinct from how the config is interpreted (CompiledFragment),
+// combined (ConfigProvider) and exposed to the rest of clangd (Config).

kadircet wrote:
> again i don't think these are relevant here.
> 
> maybe provide more details about any new config option should start 
> propagating from here, as this is the closest layer to user written config,
> again i don't think these are relevant here.

I think how this code relates to the rest of the system is a pretty important 
thing for documentation to cover! I wish docs in general did a better job of 
this.
Relying on xrefs in the code is pretty noisy and low-level, and guessing at 
filenames doesn't scale well.

> maybe provide more details about any new config option should start 
> propagating from here, as this is the closest layer to user written config,

Yeah - it depends whether you're looking at "here is a feature I'd like to make 
configurable" vs "here is some configuration syntax I want to support".
I've moved it here, but I'm not 100% sure it's the right call.



Comment at: clang-tools-extra/clangd/ConfigFragment.h:61
+  /// BufferName is used for the SourceMgr and diagnostics.
+  static std::vector parseYAML(llvm::StringRef YAML,
+ llvm::StringRef BufferName,

kadircet wrote:
> what about `fromYAML` and returning `vector` ?
> what about fromYAML

I can live with it if you like, but I prefer parse:
 - Fragment::fromYAML reads funny as it returns multiple fragments in general
 - parse describes the action more clearly and so hints toward thinking about 
error handling etc

> returning vector ?
why? the resulting vector is slower (has to copy instead of move on 
reallocation).
It prevents us from moving from the fragment when we compile it (which we do in 
some places)



Comment at: clang-tools-extra/clangd/ConfigFragment.h:65
+
+  struct SourceInfo {
+/// Retains a buffer of the original source this fragment was parsed from.

kadircet wrote:
> why the bundling?
Oops, this is so I could give them a collective comment, and I forgot to do 
it...

The idea is everything else represents the directly-user-specified "body" of 
the configuration, but these fields are different. (The associated directory 
also goes here, though wasn't needed in this patch).
I wanted to wall these off a bit as the outer struct will gain many fields and 
so this may become less obvious. (Also makes it easier to skip over if we're 
generating docs etc)



Comment at: clang-tools-extra/clangd/ConfigFragment.h:69
+/// Shared because multiple fragments are often parsed from one (YAML) 
file.
+/// May be null, then all locations are ignored.
+std::shared_ptr Manager;

kadircet wrote:
> maybe `invalid/absent` rather than `ignored` (or change it to say should be 
> ignored) ? I am assuming this is referring to locations of config parameters 
> like `Add`s and conditions.
Changed to "should be ignored". guaranteeing that the SMRanges is stronger but 
not actually useful.



Comment at: clang-tools-extra/clangd/ConfigFragment.h:73
+/// Only valid if SourceManager is set.
+llvm::SMLoc Location;
+  };

kadircet wrote:
> what is the use of this ?
There may be multiple fragments in a file.

When tracking how fragments are used in the logs, there should be enough 
information to identify the particular fragment, without having to guess.
 - during parsing, we can log in some source-specific way
 - in the Fragment form, we can log this location to identify the fragment
 - during compilation, we log this location and the pointer address of the new 
CompiledFragment
 - subsequently we can just log the CompiledFragment address



Comment at: clang-tools-extra/clangd/ConfigFragment.h:75
+  };
+  SourceInfo Source;
+

kadircet wrote:
> why not mak

[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-25 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 273254.
sammccall marked 38 inline comments as done.
sammccall added a comment.

Address review comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82386/new/

https://reviews.llvm.org/D82386

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/ConfigFragment.h
  clang-tools-extra/clangd/ConfigYAML.cpp
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp

Index: clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
@@ -0,0 +1,160 @@
+//===-- ConfigYAMLTests.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "Annotations.h"
+#include "ConfigFragment.h"
+#include "Matchers.h"
+#include "Protocol.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/internal/gtest-internal.h"
+
+namespace clang {
+namespace clangd {
+namespace config {
+template  void PrintTo(const Located &V, std::ostream *OS) {
+  *OS << ::testing::PrintToString(*V);
+}
+
+namespace {
+using ::testing::AllOf;
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+MATCHER_P(Val, Value, "") {
+  if (*arg == Value)
+return true;
+  *result_listener << "value is " << *arg;
+  return false;
+}
+
+Position toPosition(llvm::SMLoc L, const llvm::SourceMgr &SM) {
+  auto LineCol = SM.getLineAndColumn(L);
+  Position P;
+  P.line = LineCol.first - 1;
+  P.character = LineCol.second - 1;
+  return P;
+}
+
+Range toRange(llvm::SMRange R, const llvm::SourceMgr &SM) {
+  return Range{toPosition(R.Start, SM), toPosition(R.End, SM)};
+}
+
+struct CapturedDiags {
+  std::function callback() {
+return [this](const llvm::SMDiagnostic &D) {
+  Diagnostics.emplace_back();
+  Diag &Out = Diagnostics.back();
+  Out.Message = D.getMessage().str();
+  Out.Kind = D.getKind();
+  Out.Pos.line = D.getLineNo() - 1;
+  Out.Pos.character = D.getColumnNo(); // Zero-based - bug in SourceMgr?
+  if (!D.getRanges().empty()) {
+const auto &R = D.getRanges().front();
+Out.Range.emplace();
+Out.Range->start.line = Out.Range->end.line = Out.Pos.line;
+Out.Range->start.character = R.first;
+Out.Range->end.character = R.second;
+  }
+};
+  }
+  struct Diag {
+std::string Message;
+llvm::SourceMgr::DiagKind Kind;
+Position Pos;
+llvm::Optional Range;
+
+friend void PrintTo(const Diag &D, std::ostream *OS) {
+  *OS << (D.Kind == llvm::SourceMgr::DK_Error ? "error: " : "warning: ")
+  << D.Message << "@" << llvm::to_string(D.Pos);
+}
+  };
+  std::vector Diagnostics;
+};
+
+MATCHER_P(DiagMessage, M, "") { return arg.Message == M; }
+MATCHER_P(DiagKind, K, "") { return arg.Kind == K; }
+MATCHER_P(DiagPos, P, "") { return arg.Pos == P; }
+MATCHER_P(DiagRange, R, "") { return arg.Range == R; }
+
+TEST(ParseYAML, SyntacticForms) {
+  CapturedDiags Diags;
+  const char *YAML = R"yaml(
+If:
+  PathMatch:
+- 'abc'
+CompileFlags: { Add: [foo, bar] }
+---
+CompileFlags:
+  Add: |
+b
+az
+  )yaml";
+  auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 2u);
+  EXPECT_FALSE(Results.front().Condition.HasUnrecognizedCondition);
+  EXPECT_THAT(Results.front().Condition.PathMatch, ElementsAre(Val("abc")));
+  EXPECT_THAT(Results.front().CompileFlags.Add,
+  ElementsAre(Val("foo"), Val("bar")));
+
+  EXPECT_THAT(Results.back().CompileFlags.Add, ElementsAre(Val("b\naz\n")));
+}
+
+TEST(ParseYAML, Locations) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+If:
+  PathMatch: [['???bad***regex(((']]
+  )yaml");
+  auto Results =
+  Fragment::parseYAML(YAML.code(), "config.yaml", Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 1u);
+  ASSERT_NE(Results.front().Source.Manager, nullptr);
+  EXPECT_EQ(toRange(Results.front().Condition.PathMatch.front().Range,
+*Results.front().Source.Manager),
+YAML.range());
+}
+
+TEST(ParseYAML, Diagnostics) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+If:
+  [[UnknownCondition]]: "foo"
+CompileFlags:
+  Add: 'first'
+---
+CompileFlags: {^
+)yaml");
+  auto Results =
+  Fragment::parseYAML(YAML.code(), "config.yaml", Diags.callback());
+
+  ASSERT_THAT(
+  Diags.Diagnostics,
+  El

[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-24 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet added a comment.

Thanks! Looks great, I've finally managed to finish all my iterations :D

Apart from the comments(IIRC which are mostly minor), it felt like there were a 
few lines that aren't clang-format'd. Please make sure to run clang-format on 
the files at some point.




Comment at: clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp:58
+  Out.Pos.character = D.getColumnNo(); // Zero-based - bug in SourceMgr?
+  if (!D.getRanges().empty()) {
+const auto &R = D.getRanges().front();

is there an explanation to what these ranges are ?

All I could find is somewhere in the implementation making sure these are 
relative to the `D.getLineNo` and nothing else :/



Comment at: clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp:73
+
+friend void PrintTo(const Diag &D, std::ostream *OS) { *OS << D.Message; }
+  };

why no print others ?

`Kind: D.Message @ Pos - Range`



Comment at: clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp:133
+  ASSERT_THAT(Diags.Diagnostics,
+  ElementsAre(DiagMessage("Unknown Condition key 
UnknownCondition"),
+  DiagMessage("Unexpected token. Expected Key, Flow "

i think having a full matcher(i.e with kind, pos and ranges) might be more 
expressive here, up to you though


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82386/new/

https://reviews.llvm.org/D82386



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-24 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet added inline comments.



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:29
+  llvm::SourceMgr &SM;
+  llvm::SmallString<256> Buf;
+

a comment for this buf, especially the reason why it's a member rather than a 
function-local when needed. (I suppose to get rid of const/dest penalties?)



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:40
+Dict.handle("If", [&](Node &N) {
+  F.Condition.emplace();
+  return parse(*F.Condition, N);

It is a little bit subtle that "at most once" execution of this handler is 
assured by DictParser. Can we put some comments explaining it to either 
DictParser itself or to `handle` member ?



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:74
+llvm::StringRef Description;
+std::vector>> Keys;
+std::function Unknown;

maybe a comment for these two, suggesting the latter is used for unknown keys.

and also mention it is handlers responsibility emit any diagnostics in case of 
failures?



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:84
+void handle(llvm::StringLiteral Key, std::function Parse) {
+  Keys.emplace_back(Key, std::move(Parse));
+}

maybe assert on duplicate keys in debug builds?



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:100
+if (!K)
+  return false;
+auto Key = Outer->scalarValue(*K, "Dictionary key");

i suppose yamlparser has already emitted a diagnostic by then ? can you add a 
comment about it if that's the case.

and if not, shouldn't we emit something about a broken stream?

same goes for `KV.getValue()` below.



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:105
+if (!Seen.insert(**Key).second) {
+  Outer->warning("Duplicate key " + **Key, *K);
+  continue;

maybe `Ignoring duplicate key` instead of just `Duplicate key` ?



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:136
+else if (auto *BS = llvm::dyn_cast(&N))
+  return Located(S->getValue(Buf).str(), N.getSourceRange());
+warning(Desc + " should be scalar", N);

s/S->getValue(Buf)/BS->getValue()/  (or change `auto *BS` to `auto *S`)

and a test



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:150
+  for (auto &Child : *S) {
+if (auto Value = scalarValue(Child, "List item"))
+  Result.push_back(std::move(*Value));

shouldn't we error/warn out if not ?



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:161
+  // Report a "hard" error, reflecting a config file that can never be valid.
+  bool error(const llvm::Twine &Msg, const Node &N) {
+SM.PrintMessage(N.getSourceRange().Start, llvm::SourceMgr::DK_Error, Msg,

do we really want to return a boolean here (and not at the warning ?)

I can see that it's nice to `return error("xxx")` but it looks surprising as we 
are not returning the error but just false.
I would make this void, up to you though.



Comment at: clang-tools-extra/clangd/ConfigYAML.cpp:202
+  // SM has two entries: "main" non-owning buffer, and ignored owning buffer.
+  SM->AddNewSourceBuffer(std::move(Buf), llvm::SMLoc());
+  return Result;

*sigh*

what a mess, Scanner in YamlParser already adds this buffer. It is unclear why 
though, as it adds a non-owning buffer and that buffer is never accessed.  
(well apart from SMLoc to Buffer conversions when emitting diags)
It rather makes use of pointers to the underlying data, obtained before adding 
that buffer. So it is the caller keeping the data alive anyways.

should we consider having an entry point that takes an SMLoc and a SourceMgr 
for `yaml::Stream` instead of this? That way we can add the owning entry to 
sourcemanager and start the stream at beginning of that buffer.

i am afraid this might bite us in the future, as SM is exposed publicly and one 
might try to iterate over buffers, which might come as a shock. we can postpone 
until that happens though, so feel free to leave it as it is.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82386/new/

https://reviews.llvm.org/D82386



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-23 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet added inline comments.



Comment at: clang-tools-extra/clangd/ConfigFragment.h:9
+//
+// Various clangd features have configurable behaviour (or can be disabled).
+// The configuration system allows users to control this:

i think this paragraph belongs to `Config.h`



Comment at: clang-tools-extra/clangd/ConfigFragment.h:14
+//
+// This file defines the config::Fragment structure which is models one piece 
of
+// configuration as obtained from a source like a file.

s/which is models/which models/



Comment at: clang-tools-extra/clangd/ConfigFragment.h:16
+// configuration as obtained from a source like a file.
+// This is distinct from how the config is interpreted (CompiledFragment),
+// combined (ConfigProvider) and exposed to the rest of clangd (Config).

again i don't think these are relevant here.

maybe provide more details about any new config option should start propagating 
from here, as this is the closest layer to user written config,



Comment at: clang-tools-extra/clangd/ConfigFragment.h:61
+  /// BufferName is used for the SourceMgr and diagnostics.
+  static std::vector parseYAML(llvm::StringRef YAML,
+ llvm::StringRef BufferName,

what about `fromYAML` and returning `vector` ?



Comment at: clang-tools-extra/clangd/ConfigFragment.h:65
+
+  struct SourceInfo {
+/// Retains a buffer of the original source this fragment was parsed from.

why the bundling?



Comment at: clang-tools-extra/clangd/ConfigFragment.h:69
+/// Shared because multiple fragments are often parsed from one (YAML) 
file.
+/// May be null, then all locations are ignored.
+std::shared_ptr Manager;

maybe `invalid/absent` rather than `ignored` (or change it to say should be 
ignored) ? I am assuming this is referring to locations of config parameters 
like `Add`s and conditions.



Comment at: clang-tools-extra/clangd/ConfigFragment.h:73
+/// Only valid if SourceManager is set.
+llvm::SMLoc Location;
+  };

what is the use of this ?



Comment at: clang-tools-extra/clangd/ConfigFragment.h:75
+  };
+  SourceInfo Source;
+

why not make this const ? i don't think it makes sense to modify these after 
creation.



Comment at: clang-tools-extra/clangd/ConfigFragment.h:77
+
+  struct ConditionFragment {
+std::vector> PathMatch;

comments?



Comment at: clang-tools-extra/clangd/ConfigFragment.h:78
+  struct ConditionFragment {
+std::vector> PathMatch;
+/// An unrecognized key was found while parsing the condition.

some comments? especially around `fragment applies to file matching all (or 
any) of the pathmatches`



Comment at: clang-tools-extra/clangd/ConfigFragment.h:81
+/// The condition will evaluate to false.
+bool UnrecognizedCondition;
+  };

`HasUnrecognizedCondition` ?

also default init to `true` maybe?



Comment at: clang-tools-extra/clangd/ConfigFragment.h:85
+
+  struct CompileFlagsFragment {
+std::vector> Add;

some comments.

I am not sure if putting `Fragment` suffix to all of these makes sense, as they 
already reside inside `Fragment`. Maybe `section` or `block` ? (same goes for 
ConditionFragment)



Comment at: clang-tools-extra/clangd/ConfigFragment.h:87
+std::vector> Add;
+  } CompileFlags;
+};

maybe make this an llvm:Optional too. Even though emptiness of `Add` would be 
OK in this case, for non-container "blocks" we might need to introduce an 
optional, and homogeneity would be nice when it happens.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82386/new/

https://reviews.llvm.org/D82386



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82386: [clangd] Config: Fragments and parsing from YAML

2020-06-23 Thread Sam McCall via Phabricator via cfe-commits
sammccall created this revision.
sammccall added reviewers: kadircet, adamcz.
Herald added subscribers: cfe-commits, usaxena95, arphaman, jkorous, MaskRay, 
ilya-biryukov, mgorny.
Herald added a project: clang.

This is a piece from the design in https://tinyurl.com/clangd-config
https://reviews.llvm.org/D82335 is a draft with all the bits linked together.
It doesn't do anything yet, it's just a library + tests.

It deliberately implements a minimal set of actual configuration options.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82386

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/ConfigFragment.h
  clang-tools-extra/clangd/ConfigYAML.cpp
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp

Index: clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
@@ -0,0 +1,154 @@
+//===-- ConfigYAMLTests.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "Annotations.h"
+#include "ConfigFragment.h"
+#include "Matchers.h"
+#include "Protocol.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/internal/gtest-internal.h"
+
+namespace clang {
+namespace clangd {
+namespace config {
+template  void PrintTo(const Located &V, std::ostream *OS) {
+  *OS << ::testing::PrintToString(*V);
+}
+
+namespace {
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+MATCHER_P(Val, Value, "") {
+  if (*arg == Value)
+return true;
+  *result_listener << "value is " << *arg;
+  return false;
+}
+
+Position toPosition(llvm::SMLoc L, const llvm::SourceMgr &SM) {
+  auto LineCol = SM.getLineAndColumn(L);
+  Position P;
+  P.line = LineCol.first - 1;
+  P.character = LineCol.second - 1;
+  return P;
+}
+
+Range toRange(llvm::SMRange R, const llvm::SourceMgr &SM) {
+  return Range{toPosition(R.Start, SM), toPosition(R.End, SM)};
+}
+
+struct CapturedDiags {
+  std::function callback() {
+return [this](const llvm::SMDiagnostic &D) {
+  Diagnostics.emplace_back();
+  Diag &Out = Diagnostics.back();
+  Out.Message = D.getMessage().str();
+  Out.Kind = D.getKind();
+  Out.Pos.line = D.getLineNo() - 1;
+  Out.Pos.character = D.getColumnNo(); // Zero-based - bug in SourceMgr?
+  if (!D.getRanges().empty()) {
+const auto &R = D.getRanges().front();
+Out.Range.emplace();
+Out.Range->start.line = Out.Range->end.line = Out.Pos.line;
+Out.Range->start.character = R.first;
+Out.Range->end.character = R.second;
+  }
+};
+  }
+  struct Diag {
+std::string Message;
+llvm::SourceMgr::DiagKind Kind;
+Position Pos;
+llvm::Optional Range;
+
+friend void PrintTo(const Diag &D, std::ostream *OS) { *OS << D.Message; }
+  };
+  std::vector Diagnostics;
+};
+
+MATCHER_P(DiagMessage, M, "") { return arg.Message == M; }
+
+TEST(ParseYAML, SemanticForms) {
+  CapturedDiags Diags;
+  const char *YAML = R"yaml(
+If:
+  PathMatch:
+- 'abc'
+CompileFlags: { Add: [foo, bar] }
+---
+CompileFlags:
+  Add: baz
+  )yaml";
+  auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 2u);
+  EXPECT_FALSE(Results.front().Condition->UnrecognizedCondition);
+  EXPECT_THAT(Results.front().Condition->PathMatch, ElementsAre(Val("abc")));
+  EXPECT_THAT(Results.front().CompileFlags.Add,
+  ElementsAre(Val("foo"), Val("bar")));
+
+  EXPECT_FALSE(Results.back().Condition);
+  EXPECT_THAT(Results.back().CompileFlags.Add, ElementsAre(Val("baz")));
+}
+
+TEST(ParseYAML, Locations) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+If:
+  PathMatch: [['???bad***regex(((']]
+  )yaml");
+  auto Results =
+  Fragment::parseYAML(YAML.code(), "config.yaml", Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 1u);
+  ASSERT_NE(Results.front().Source.Manager, nullptr);
+  EXPECT_EQ(toRange(Results.front().Condition->PathMatch.front().Range,
+*Results.front().Source.Manager),
+YAML.range());
+}
+
+TEST(ParseYAML, Diagnostics) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+If:
+  [[UnknownCondition]]: "foo"
+CompileFlags:
+  Add: 'first'
+---
+CompileFlags: {^
+)yaml");
+  auto Results =
+  Fragment::parseYAML(YAML.code(), "config.yaml", Diags.callback());
+
+  ASSERT_THAT(Diags.Diagnostics,
+  ElementsAre(DiagMessage("Unknown Condition key U