[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-18 Thread UTKARSH SAXENA via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG9b6765e784b3: [clangd] Add Random Forest runtime for code 
completion. (authored by usaxena95).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/decision_forest_model/features.json
  clang-tools-extra/clangd/unittests/decision_forest_model/forest.json

Index: clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"kind": "NUMBER"
+},
+{
+"name": "AFloat",
+"kind": "NUMBER"
+},
+{
+"name": "ACategorical",
+"kind": "ENUM",
+"type": "ns1::ns2::TestEnum",
+"header": "decision_forest_model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "decision_forest_model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-18 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 292828.
usaxena95 added a comment.

Fixed output_dir cmake variable. Clean build succeeds now.
Ready to land.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/decision_forest_model/features.json
  clang-tools-extra/clangd/unittests/decision_forest_model/forest.json

Index: clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"kind": "NUMBER"
+},
+{
+"name": "AFloat",
+"kind": "NUMBER"
+},
+{
+"name": "ACategorical",
+"kind": "ENUM",
+"type": "ns1::ns2::TestEnum",
+"header": "decision_forest_model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "decision_forest_model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 #include "Matchers.h"
 #include "Protocol.h"
 #include "Quality.h"
@@ -47,6 +48,7 @@
 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-18 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 292818.
usaxena95 added a comment.

Removed generated (for review) files.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/decision_forest_model/features.json
  clang-tools-extra/clangd/unittests/decision_forest_model/forest.json

Index: clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"kind": "NUMBER"
+},
+{
+"name": "AFloat",
+"kind": "NUMBER"
+},
+{
+"name": "ACategorical",
+"kind": "ENUM",
+"type": "ns1::ns2::TestEnum",
+"header": "decision_forest_model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "decision_forest_model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 #include "Matchers.h"
 #include "Protocol.h"
 #include "Quality.h"
@@ -47,6 +48,7 @@
 using ::testing::IsEmpty;
 using 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-18 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added a comment.
This revision is now accepted and ready to land.

LG from my side!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-18 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 292717.
usaxena95 marked 12 inline comments as done.
usaxena95 added a comment.

Addressed comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/for-review-only/CompletionModel.cpp
  clang-tools-extra/clangd/for-review-only/CompletionModel.h
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/decision_forest_model/features.json
  clang-tools-extra/clangd/unittests/decision_forest_model/forest.json

Index: clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"kind": "NUMBER"
+},
+{
+"name": "AFloat",
+"kind": "NUMBER"
+},
+{
+"name": "ACategorical",
+"kind": "ENUM",
+"type": "ns1::ns2::TestEnum",
+"header": "decision_forest_model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "decision_forest_model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-18 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 marked an inline comment as done.
usaxena95 added inline comments.



Comment at: clang-tools-extra/clangd/quality/CompletionModel.cmake:13
+  
+  set(output_dir ${CMAKE_BINARY_DIR}/generated/decision_forest)
+  set(header_file ${output_dir}/${filename}.h)

sammccall wrote:
> /generated/decision_forest seems redundant considering ${CMAKE_BINARY_DIR} is 
> already the generated-files tree for the directory of the calling CMakeLists.
> 
> Can't we just use ${CMAKE_BINARY_DIR} directly and avoid the 
> DECISION_FOREST_OUTPUT_DIR variable?
Changed `CMAKE_BINARY_DIR` to `CMAKE_CURRENT_BINARY_DIR` and removed 
/generated/decision_forest to avoid the `DECISION_FOREST_OUTPUT_DIR` variable.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-16 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

Just some naming and doc nits. This looks really solid now, nice job!

In D83814#2261458 , @jkorous wrote:

> Hi @usaxena95 and @sammccall,
>
> I am wondering about couple high-level things.
>
> Do you guys intend to open-source also the training part of the model 
> pipeline or publish a model trained on generic-enough training set so it 
> could be reasonably used on "any" codebase?

@adamcz and I were talking about this too... I think it's important we do as 
much of this as possible. I was the one not finding time to do it though, and I 
think Adam may do better :-)

- the existing training stuff is using internal tech, but AFAIK it's nothing 
LightGBM can't do (it trains on a single machine). So we should be able to 
open-source the training setup and actually use that.
- training data generation is harder to open, because it involves sampling a 
large diverse body of code and parsing lots of it. The core code that embeds 
clangd and extracts completion candidates should be very doable, so one could 
run over LLVM on one machine. The framework to run at a larger scale is coupled 
to internal infra though, and we're currently training on both public and 
non-public code.




Comment at: clang-tools-extra/clangd/quality/CompletionModel.cmake:10
+
+  set(model_json ${model}/forest.json)
+  set(model_features ${model}/features.json)

these vars are used only once, I'd suggest inlining them for readability



Comment at: clang-tools-extra/clangd/quality/CompletionModel.cmake:13
+  
+  set(output_dir ${CMAKE_BINARY_DIR}/generated/decision_forest)
+  set(header_file ${output_dir}/${filename}.h)

/generated/decision_forest seems redundant considering ${CMAKE_BINARY_DIR} is 
already the generated-files tree for the directory of the calling CMakeLists.

Can't we just use ${CMAKE_BINARY_DIR} directly and avoid the 
DECISION_FOREST_OUTPUT_DIR variable?



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:150
+# Class definition.
+code += f"class {cpp_class.name} {{\n"
+code += "public:\n"

you can use triple-quoted f-strings, which I think would be more readable than 
blocks of "code +="

```
code += f"""class {cpp_class.name} {{
public:
  {"\n   ".join(setters)}

private:
  {"\n  ".join(class_members)}
"""
```

etc

may even do the whole thing in one go.



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:217
+
+code = "\n".join(f'#include <{h}>' for h in angled_include) + "\n\n"
+code += "\n".join(f'#include "{h}"' for h in quoted_include) + "\n\n"

again, making this one big triple-quoted f-string may be nicer, up to you



Comment at: clang-tools-extra/clangd/quality/README.md:4
+## Decision Forest
+A **decision forest** is a collection of many decision trees. A **decision 
tree** is a full binary tree where every non-leaf node has exactly **2** 
children. 
+

The second half of this sentence simply restates the first.

Maybe we can combine this with the second paragraph: "A decision tree is a full 
binary tree that provides a quality prediction for an input (code completion 
item). Internal nodes represent a binary decision based on the input data, and 
leaf nodes represent a prediction."



Comment at: clang-tools-extra/clangd/quality/README.md:8
+
+At every non-leaf node, we evaluate the condition present in the node.  The 
condition refers to exactly one **feature**. It uses the value of this 
attribute from the code completion item to evaluate the condition.
+Based on the condition, we move to its true child or the false child.

Nit: I think it's worth separating out defining features vs conditions.

e.g.
"An input (code completion candidate) is characterized as a set of 
**features**, such as the type of symbol or the number of existing references

At every non-leaf node, we evaluate the condition to decide whether to go left 
or right. The condition compares one feature of the input against a constant. 
It is either:
...".



Comment at: clang-tools-extra/clangd/quality/README.md:16
+A leaf node only contains the value **score**.
+Once we know the set of leaves (one from each decision tree), we add the 
**score** values from each of the leaves to get the final relevance score.
+

nit: rather than alternating between describing traversing all trees and one 
tree, I'd just say "To compute an overall quality score, we traverse each tree 
in this way and add up the scores".



Comment at: clang-tools-extra/clangd/quality/README.md:26
+  "name": "a_numerical_feature",
+  "type": "NUMBER"
+}

This is a little prone to confusion with C++ type.

Consider "kind" instead?



Comment at: 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-16 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 added inline comments.



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:39
+'''Returns the header guard for the generated header.'''
+return "GENERATED_DECISION_FOREST_MODEL_{}_H".format(filename.upper())
+

adamcz wrote:
> Why GENERATED_DECISON_FOREST_MODEL instead of output_dir, to be consistent 
> with header guards for other files? Doesn't matter much for generated code, 
> but if someone opens this in vim they'll see warnings.
The output_dir is the absolute path and not a project relative path.
I tried to stick with a special prefix for header guard as done in other 
Generated headers (e.g. protos)

If someone opens this in vim, there would many other warnings that they would 
see like "unused_label" ;)
I don't think that would be a concern since it would be opened for inspection 
and not for editing.



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:177
+
+code += "  friend float Evaluate(const {}&);\n".format(cpp_class.name)
+code += "};\n"

adamcz wrote:
> Is there a reason to make this a friend free-function instead of static 
> method on the Example class? The problem is that you now end up with 
> clang::clangd::Evaluate, so if we every re-use this code gen for another 
> model we'll have a name collision.
The class name ("Example" currently) would be different for a different model 
and therefore there would be another overload for `Evaluate(const 
AnotherClass&)` even if the namespaces are same (`clang::clangd`).



Comment at: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h:1
+namespace ns1 {
+namespace ns2 {

adamcz wrote:
> Can we rename this directory? quality/model makes some sense (although it 
> would be better to include something about code completion there), but 
> unittests/model is not very descriptive - what model?
> 
> How about unittests/decision_forest_model/ or something like that? Or go with 
> the Input/TEST_NAME pattern.
You are right "quality" wasn't indicative of code completion here but we 
decided to be consistent with the current naming. The current heuristics for 
the ranking are in Quality.h and Quality.cpp ;-)

changed the dir name in unittests.




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-16 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 292188.
usaxena95 marked 8 inline comments as done.
usaxena95 added a comment.

Addressed comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/for-review-only/CompletionModel.cpp
  clang-tools-extra/clangd/for-review-only/CompletionModel.h
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/decision_forest_model/features.json
  clang-tools-extra/clangd/unittests/decision_forest_model/forest.json

Index: clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"type": "NUMBER"
+},
+{
+"name": "AFloat",
+"type": "NUMBER"
+},
+{
+"name": "ACategorical",
+"type": "ENUM",
+"enum": "ns1::ns2::TestEnum",
+"header": "decision_forest_model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "decision_forest_model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-15 Thread Adam Czachorowski via Phabricator via cfe-commits
adamcz added a comment.

Looks good to me overall, some minor style comments included ;-)

Do we expect this to be a generic model code generator that gets reused for 
other things? If not, maybe we can hardcode more (like the namespace, class 
name, etc), but if you think there's other use cases for this then this LGTM.




Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:39
+'''Returns the header guard for the generated header.'''
+return "GENERATED_DECISION_FOREST_MODEL_{}_H".format(filename.upper())
+

Why GENERATED_DECISON_FOREST_MODEL instead of output_dir, to be consistent with 
header guards for other files? Doesn't matter much for generated code, but if 
someone opens this in vim they'll see warnings.



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:57
+Control falls through if condition is evaluated to false."""
+return "{label}: if(E.{feature} >= {encoded} /*{threshold}*/) goto 
{true_label};".format(
+label=label,

nit: add space after if for readability (also below)



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:93
+def tree(t, tree_num: int, node_num: int):
+"""Returns code for inferencing a Decision Tree.
+

Please extend the comment to mention the second return value (size of the tree)



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:105
+"""
+label = "t{tree}_n{node}".format(tree=tree_num, node=node_num)
+code = []

This is a good place to use an Python's f-string.

Also in few places below.



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:126
+
+return code + false_code + true_code, 1 + false_size+true_size
+

style nit: be consistent with spaces around +



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:177
+
+code += "  friend float Evaluate(const {}&);\n".format(cpp_class.name)
+code += "};\n"

Is there a reason to make this a friend free-function instead of static method 
on the Example class? The problem is that you now end up with 
clang::clangd::Evaluate, so if we every re-use this code gen for another model 
we'll have a name collision.



Comment at: clang-tools-extra/clangd/quality/CompletionModelCodegen.py:271
+parser = argparse.ArgumentParser('DecisionForestCodegen')
+parser.add_argument('--filename', help='output file name.')
+parser.add_argument('--output_dir', help='output directory')

nit: be consistent about putting a "." at the end of the help text or not.



Comment at: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h:1
+namespace ns1 {
+namespace ns2 {

Can we rename this directory? quality/model makes some sense (although it would 
be better to include something about code completion there), but 
unittests/model is not very descriptive - what model?

How about unittests/decision_forest_model/ or something like that? Or go with 
the Input/TEST_NAME pattern.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-15 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 291909.
usaxena95 added a comment.

Fixed namespace ending.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/for-review-only/CompletionModel.cpp
  clang-tools-extra/clangd/for-review-only/CompletionModel.h
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/model/features.json
  clang-tools-extra/clangd/unittests/model/forest.json

Index: clang-tools-extra/clangd/unittests/model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"type": "NUMBER"
+},
+{
+"name": "AFloat",
+"type": "NUMBER"
+},
+{
+"name": "ACategorical",
+"type": "ENUM",
+"enum": "ns1::ns2::TestEnum",
+"header": "model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 #include "Matchers.h"
 #include 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-15 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 291813.
usaxena95 added a comment.

Fixed namespace.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/for-review-only/CompletionModel.cpp
  clang-tools-extra/clangd/for-review-only/CompletionModel.h
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/model/features.json
  clang-tools-extra/clangd/unittests/model/forest.json

Index: clang-tools-extra/clangd/unittests/model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"type": "NUMBER"
+},
+{
+"name": "AFloat",
+"type": "NUMBER"
+},
+{
+"name": "ACategorical",
+"type": "ENUM",
+"enum": "ns1::ns2::TestEnum",
+"header": "model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clangd
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 #include "Matchers.h"
 #include "Protocol.h"
 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-14 Thread Adam Czachorowski via Phabricator via cfe-commits
adamcz added inline comments.



Comment at: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp:5
+
+namespace clangd {
+namespace clangd {

This is supposed to be "namespace clang", right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-11 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 291205.
usaxena95 added a comment.

Added README.md for the code completion model.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/for-review-only/CompletionModel.cpp
  clang-tools-extra/clangd/for-review-only/CompletionModel.h
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/model/features.json
  clang-tools-extra/clangd/unittests/model/forest.json

Index: clang-tools-extra/clangd/unittests/model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"type": "NUMBER"
+},
+{
+"name": "AFloat",
+"type": "NUMBER"
+},
+{
+"name": "ACategorical",
+"type": "ENUM",
+"enum": "ns1::ns2::TestEnum",
+"header": "model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clangd {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clangd
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 #include 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-10 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 added a comment.

Hi @jkorous

> Do you guys intend to open-source also the training part of the model 
> pipeline ?

Open sourcing the training part (both dataset generation and using an open 
sourced DecisionForest based framework for training) has been on our radar. 
Although gathering capacity for this task has been difficult lately.

> Publish a model trained on generic-enough training set so it could be 
> reasonably used on "any" codebase?

Although the current model has not been trained on a generic codebase, but 
since the features involved doesn't capture code style/conventions/variable 
names, it is likely that it performs well on generic code bases as well. This 
remains to be tested.

> Do you still intend to support the heuristic that is currently powering 
> clangd in the future?

Currently we are planning to use this model behind a flag. Initially we would 
be focusing on comparing the two. Since maintaining and developing signals is 
easier for an ML model, we might end up deprecating the heuristics.

Thanks,
Utkarsh.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-10 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 added inline comments.



Comment at: clang-tools-extra/clangd/CMakeLists.txt:30
+  
+set(DF_COMPILER ${CMAKE_CURRENT_SOURCE_DIR}/CompletionModelCodegen.py)
+include(${CMAKE_CURRENT_SOURCE_DIR}/CompletionModel.cmake)

sammccall wrote:
> if you want the compiler script to be a parameter, make it an argument to the 
> function rather than a magic variable. 
> 
> But really, I think the CompletionModel.cmake is tightly coupled to the 
> python script, I think it can be hardcoded there.
Hardcoded it in .cmake file.



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:1
+# Run the Completion Model Codegenerator on the model in the 
+# ${model} directory.

sammccall wrote:
> I think there's some confusion in the naming.
> 
> You've got the code split into two locations: the generic generator and the 
> specific code completion model.
> 
> However the directory named just "model" contains the specific stuff, and the 
> generic parts are named "completionmodel!".
> 
> I'd suggest either:
>  - don't generalize, and put everything in clangd/quality or so
>  - split into clangd/quality/ and clangd/forest/ for the specific/generic 
> parts
Moved .cmake, codegen and model in quality dir.



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:5
+# ${CMAKE_BINARY_DIR}/generated/decision_forest. The generated header
+# will define a C++ class called ${cpp_class} - which may be a
+# namespace-qualified class name.

sammccall wrote:
> what does the class do?
The class specifies the name and scope of the Feature class. 
`clang::clangd::Example` in this case.



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:17
+COMMAND "${Python3_EXECUTABLE}" ${DF_COMPILER} 
+  --model ${model}
+  --output_dir ${output_dir}

sammccall wrote:
> I'd suggest passing the component filenames explicitly here since you're 
> computing them anyway
This allows the generated cc file to include the header using "filename.h".
If we give the filepath as input, we would have to strip out the filename from 
it. 

Although I like the current notion of being explicit that the output_dir 
contains the two files. We need to add output_dir to include path to use this 
library.



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:29
+GENERATED 1
+COMPILE_FLAGS -Wno-unused-label)
+

sammccall wrote:
> this needs to be guarded based on the compiler - other compilers use 
> different flags
> 
> I'd suggest just -Wno-usuned
only MSVC needed a different flag. `-Wno-unused` works with Clang and GCC.
https://godbolt.org/z/Gvdne7




Comment at: clang-tools-extra/clangd/CompletionModel.cmake:31
+
+  set(GENERATED_CC ${df_cpp} PARENT_SCOPE)
+  set(DF_INCLUDE ${output_dir} PARENT_SCOPE)  

sammccall wrote:
> It'd be nice to avoid passing data out by setting magic variables with 
> generic names.
> 
> The caller is already passing in the filename they want, so they know what it 
> is.
We can avoid `GENERATED_CC`. But I still wanted to keep the output directory as 
a detail in this function itself and not as an input parameter.
Changed the name to more specific name `DECISION_FOREST_OUTPUT_DIR`. 




Comment at: clang-tools-extra/clangd/CompletionModelCodegen.py:9
+
+class Feature:
+class Type(Enum):

sammccall wrote:
> Hmm, do these classes really pay for themselves compared to just using the 
> JSON-decoded data structures directly and writing functions?
> 
> e.g.
> 
> ```
> def setter(feature):
>   if (feature['kind'] == KIND_CATEGORICAL)
> return "void Set{feature}(float V) {{ {feature} = OrderEncode(V); 
> }}".format(feature['name'])
>   ...
> ```
Removed the Feature class and Tree.
CppClass calculates and holds the namespaces which I felt convenient. 



Comment at: clang-tools-extra/clangd/CompletionModelCodegen.py:52
+self.fname = fname
+assert not cpp_class.startswith(
+"::"), "Do not fully qualify class name."

sammccall wrote:
> why not assert this on self.ns so that "::Foo" will work fine?
Allowed fully qualified names of classes.



Comment at: clang-tools-extra/clangd/CompletionModelCodegen.py:70
+return "GENERATED_CODE_COMPLETION_MODEL_{}_H".format(
+reduce(lambda x, y: x + ('_' if y.isupper() else '') + y,
+   self.fname).upper())

sammccall wrote:
> `''.join('_' if x in '-' else x.upper() for x in self.fname)` ?
Sorry for making this complicated. filename was assumed to be in PascalCase 
(and not contain '-' at all).
I wanted to convert it to UPPER_SNAKE_CASE. To avoid unnecessary complexity, 
lets simply convert it to upper case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-10 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 291024.
usaxena95 marked 23 inline comments as done.
usaxena95 added a comment.

Addressed comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/for-review-only/CompletionModel.cpp
  clang-tools-extra/clangd/for-review-only/CompletionModel.h
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h
  clang-tools-extra/clangd/quality/CompletionModel.cmake
  clang-tools-extra/clangd/quality/CompletionModelCodegen.py
  clang-tools-extra/clangd/quality/README.md
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/model/features.json
  clang-tools-extra/clangd/unittests/model/forest.json

Index: clang-tools-extra/clangd/unittests/model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"type": "NUMBER"
+},
+{
+"name": "AFloat",
+"type": "NUMBER"
+},
+{
+"name": "ACategorical",
+"type": "ENUM",
+"enum": "ns1::ns2::TestEnum",
+"header": "model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clangd {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200); // True
+  E.setAFloat(0);// True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200); // True
+  E.setAFloat(-2.5); // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100); // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clangd
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-09-08 Thread Jan Korous via Phabricator via cfe-commits
jkorous added subscribers: dexonsmith, akyrtzi.
jkorous added a comment.

Hi @usaxena95 and @sammccall,

I am wondering about couple high-level things.

Do you guys intend to open-source also the training part of the model pipeline 
or publish a model trained on generic-enough training set so it could be 
reasonably used on "any" codebase?

Do you still intend to support the heuristic that is currently powering clangd 
in the future?

Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-07-24 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

In D83814#2166349 , @usaxena95 wrote:

> This is still WIP and I will provide more details in the description once 
> this is finalized.


This really should have high level documentation - I don't think Nathan will be 
the only one to ask these questions.
We need a description of what problem this solves, what the concepts are 
(trees, features, scores), inference, training, data sources, generated API.

Given that the implementation is necessarily split across CMake, generated 
code, generator, and data (but not a lot of hand-written C++) I think it's 
probably best documented in a README.md or so in the model/ dir.

Seems fine to do that in this patch, or ahead of time (before the 
implementation)... I wouldn't wait to do it after, it'll help with the review.

---

One question we haven't discussed is how workspace/symbol should work. (This 
uses the hand-tuned score function with some different logic). This is relevant 
to naming: it's not the "completion model" if it also has other features.




Comment at: clang-tools-extra/clangd/CMakeLists.txt:30
+  
+set(DF_COMPILER ${CMAKE_CURRENT_SOURCE_DIR}/CompletionModelCodegen.py)
+include(${CMAKE_CURRENT_SOURCE_DIR}/CompletionModel.cmake)

if you want the compiler script to be a parameter, make it an argument to the 
function rather than a magic variable. 

But really, I think the CompletionModel.cmake is tightly coupled to the python 
script, I think it can be hardcoded there.



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:1
+# Run the Completion Model Codegenerator on the model in the 
+# ${model} directory.

I think there's some confusion in the naming.

You've got the code split into two locations: the generic generator and the 
specific code completion model.

However the directory named just "model" contains the specific stuff, and the 
generic parts are named "completionmodel!".

I'd suggest either:
 - don't generalize, and put everything in clangd/quality or so
 - split into clangd/quality/ and clangd/forest/ for the specific/generic parts



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:5
+# ${CMAKE_BINARY_DIR}/generated/decision_forest. The generated header
+# will define a C++ class called ${cpp_class} - which may be a
+# namespace-qualified class name.

what does the class do?



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:7
+# namespace-qualified class name.
+function(df_compile model fname cpp_class)  
+  set(model_json ${model}/forest.json)

df is cryptic.
decision_forest or gen_decision_forest?



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:17
+COMMAND "${Python3_EXECUTABLE}" ${DF_COMPILER} 
+  --model ${model}
+  --output_dir ${output_dir}

I'd suggest passing the component filenames explicitly here since you're 
computing them anyway



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:19
+  --output_dir ${output_dir}
+  --fname ${fname}
+  --cpp_class ${cpp_class}

fname -> filename



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:29
+GENERATED 1
+COMPILE_FLAGS -Wno-unused-label)
+

this needs to be guarded based on the compiler - other compilers use different 
flags

I'd suggest just -Wno-usuned



Comment at: clang-tools-extra/clangd/CompletionModel.cmake:31
+
+  set(GENERATED_CC ${df_cpp} PARENT_SCOPE)
+  set(DF_INCLUDE ${output_dir} PARENT_SCOPE)  

It'd be nice to avoid passing data out by setting magic variables with generic 
names.

The caller is already passing in the filename they want, so they know what it 
is.



Comment at: clang-tools-extra/clangd/CompletionModelCodegen.py:1
+import argparse
+import json

this needs documentation throughout



Comment at: clang-tools-extra/clangd/CompletionModelCodegen.py:9
+
+class Feature:
+class Type(Enum):

Hmm, do these classes really pay for themselves compared to just using the 
JSON-decoded data structures directly and writing functions?

e.g.

```
def setter(feature):
  if (feature['kind'] == KIND_CATEGORICAL)
return "void Set{feature}(float V) {{ {feature} = OrderEncode(V); 
}}".format(feature['name'])
  ...
```



Comment at: clang-tools-extra/clangd/CompletionModelCodegen.py:10
+class Feature:
+class Type(Enum):
+NUMERICAL = 1

These labels seem a bit abstract, what do you think about "number"/"enum"?



Comment at: clang-tools-extra/clangd/CompletionModelCodegen.py:21
+if self.type == Feature.Type.CATEGORICAL:
+assert 'header' in feature_json, "Header not found in categorical 
feature."
+

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-07-24 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 280455.
usaxena95 edited the summary of this revision.
usaxena95 added a comment.

Better formatting in generated files.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/CompletionModel.cmake
  clang-tools-extra/clangd/CompletionModelCodegen.py
  clang-tools-extra/clangd/for-review-only/CompletionModel.cpp
  clang-tools-extra/clangd/for-review-only/CompletionModel.h
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h
  clang-tools-extra/clangd/model/features.json
  clang-tools-extra/clangd/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/model/features.json
  clang-tools-extra/clangd/unittests/model/forest.json

Index: clang-tools-extra/clangd/unittests/model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"type": "NUMERICAL"
+},
+{
+"name": "AFloat",
+"type": "NUMERICAL"
+},
+{
+"name": "ACategorical",
+"type": "CATEGORICAL",
+"enum": "ns1::ns2::TestEnum",
+"header": "model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clangd {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.SetANumber(200); // True
+  E.SetAFloat(0);// True: +10.0
+  E.SetACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.SetANumber(200); // True
+  E.SetAFloat(-2.5); // False: -20.0
+  E.SetACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.SetANumber(100); // False
+  E.SetACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clangd
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 #include "Matchers.h"
 #include "Protocol.h"

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-07-24 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 280406.
usaxena95 added a comment.

Addressed offline comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/CompletionModel.cmake
  clang-tools-extra/clangd/CompletionModelCodegen.py
  clang-tools-extra/clangd/for-review-only/CompletionModel.cpp
  clang-tools-extra/clangd/for-review-only/CompletionModel.h
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp
  clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h
  clang-tools-extra/clangd/model/features.json
  clang-tools-extra/clangd/model/forest.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
  clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
  clang-tools-extra/clangd/unittests/model/features.json
  clang-tools-extra/clangd/unittests/model/forest.json

Index: clang-tools-extra/clangd/unittests/model/forest.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/forest.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "ANumber",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "AFloat",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"C"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ACategorical",
+"set": [
+"A",
+"B"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/features.json
@@ -0,0 +1,16 @@
+[
+{
+"name": "ANumber",
+"type": "NUMERICAL"
+},
+{
+"name": "AFloat",
+"type": "NUMERICAL"
+},
+{
+"name": "ACategorical",
+"type": "CATEGORICAL",
+"enum": "ns1::ns2::TestEnum",
+"header": "model/CategoricalFeature.h"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,30 @@
+
+#include "DecisionForestRuntimeTest.h"
+#include "model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clangd {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.SetANumber(200); // True
+  E.SetAFloat(0);// True: +10.0
+  E.SetACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.SetANumber(200); // True
+  E.SetAFloat(-2.5); // False: -20.0
+  E.SetACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.SetANumber(100); // False
+  E.SetACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clangd
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 #include "Matchers.h"
 #include 

[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-07-22 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 added a comment.

The features refers to the code completion signals in 
https://github.com/llvm/llvm-project/blob/master/clang-tools-extra/clangd/Quality.h
These signals are currently used to map the code completion candidates to a 
relevance score using hand-coded heuristics. 
We intend to replace the heuristics with a Decision forest model. This patch 
introduces a dummy model and corresponding runtime that will be used to 
inference this model.

This is still WIP and I will provide more details in the description once this 
is finalized.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-07-21 Thread Nathan Ridge via Phabricator via cfe-commits
nridge added a comment.

Is there some additional context for what this patch is aiming to do? The 
description sounds interesting but I don't really understand it.

What is a "feature" in this context?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83814/new/

https://reviews.llvm.org/D83814



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83814: [clangd] Add Random Forest runtime for code completion.

2020-07-14 Thread UTKARSH SAXENA via Phabricator via cfe-commits
usaxena95 created this revision.
Herald added subscribers: cfe-commits, kadircet, arphaman, jkorous, MaskRay, 
ilya-biryukov, mgorny.
Herald added a project: clang.

[WIP]

- Proposes a json format for representing Random Forest model.
- Proposes a way to test the generated runtime using a test model.

TODO:

- Add generated source code snippet for easier review.
- Fix unused label warning.
- Figure out required using declarations for CATEGORICAL columns from 
Features.json.
- Necessary Google3 internal modifications for blaze before landing.
- Add documentation for format of the model.
- Document more.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83814

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/CompletionModelCodegen.py
  clang-tools-extra/clangd/model/features.json
  clang-tools-extra/clangd/model/tree.json
  clang-tools-extra/clangd/unittests/CMakeLists.txt
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
  clang-tools-extra/clangd/unittests/model/features.json
  clang-tools-extra/clangd/unittests/model/tree.json

Index: clang-tools-extra/clangd/unittests/model/tree.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/tree.json
@@ -0,0 +1,52 @@
+[
+{
+"operation": "if_greater",
+"feature": "NumReferences",
+"threshold": 200.0,
+"then": {
+"operation": "if_greater",
+"feature": "FileProximityDistance",
+"threshold": -1,
+"then": {
+"operation": "boost",
+"score": 10.0
+},
+"else": {
+"operation": "boost",
+"score": -20.0
+}
+},
+"else": {
+"operation": "if_member",
+"feature": "ContextKind",
+"set": [
+"Kind::CCC_DotMemberAccess",
+"Kind::CCC_ArrowMemberAccess"
+],
+"then": {
+"operation": "boost",
+"score": 3.0
+},
+"else": {
+"operation": "boost",
+"score": -4.0
+}
+}
+},
+{
+"operation": "if_member",
+"feature": "ContextKind",
+"set": [
+"Kind::CCC_Namespace",
+"Kind::CCC_ArrowMemberAccess"
+],
+"then": {
+"operation": "boost",
+"score": 5.0
+},
+"else": {
+"operation": "boost",
+"score": -6.0
+}
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/model/features.json
===
--- /dev/null
+++ clang-tools-extra/clangd/unittests/model/features.json
@@ -0,0 +1,14 @@
+[
+{
+"name": "NumReferences",
+"type": "NUMERICAL"
+},
+{
+"name": "FileProximityDistance",
+"type": "NUMERICAL"
+},
+{
+"name": "ContextKind",
+"type": "CATEGORICAL"
+}
+]
\ No newline at end of file
Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,8 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
+#include "CompletionModelTest.h"
 #include "Matchers.h"
 #include "Protocol.h"
 #include "Quality.h"
@@ -47,6 +49,7 @@
 using ::testing::IsEmpty;
 using ::testing::Not;
 using ::testing::UnorderedElementsAre;
+using ContextKind = CodeCompletionContext::Kind;
 
 // GMock helpers for matching completion items.
 MATCHER_P(Named, Name, "") { return arg.Name == Name; }
@@ -161,6 +164,36 @@
   return S;
 }
 
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = clangd::test::Example;
+  using clangd::test::Evaluate;
+
+  Example E;
+  E.SetNumReferences(200);  // True
+  E.SetFileProximityDistance(0);// True: +10.0
+  E.SetContextKind(ContextKind::CCC_ArrowMemberAccess); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.SetNumReferences(200);  // True
+  E.SetFileProximityDistance(-2);   // False: -20.0
+  E.SetContextKind(ContextKind::CCC_Namespace); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.SetNumReferences(100);// False
+  E.SetContextKind(ContextKind::CCC_DotMemberAccess); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+
+TEST(DecisionForestRuntime, SanityTest) {
+  using Example = clangd::Example;
+  using clangd::Evaluate;
+  Example E1;
+  E1.SetContextKind(ContextKind::CCC_ArrowMemberAccess);
+  Example E2;
+  E2.SetContextKind(ContextKind::CCC_SymbolOrNewName);
+