[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-05 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375235671
 
 

 ##
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include 
+#include 
+#include 
+#include 
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector& data) {
+  std::random_device rd;
+  std::mt19937 eng(rd());
+
+  std::uniform_int_distribution<> distr(std::numeric_limits::min(), 
std::numeric_limits::max());
+  auto rand = std::bind(distr, eng);
+  std::generate_n(data.begin(), data.size(), rand);
+}
+
+void provisionRepo(minifi::provenance::ProvenanceRepository& repo, size_t 
count, size_t size) {
+  for (int i = 0; i < count; ++i) {
+std::vector v(size);
+generateData(v);
+REQUIRE(repo.Put(std::to_string(i), reinterpret_cast(v.data()), v.size()));
+  }
+}
+
+void verifyMaxKeyCount(const minifi::provenance::ProvenanceRepository& repo, 
uint64_t keyCount) {
+  uint64_t k = keyCount;
+
+  for (int i = 0; i < 5; ++i) {
+std::this_thread::sleep_for(std::chrono::seconds(1));
+k = std::min(k, repo.getKeyCount());
+if (k < keyCount) {
+  break;
+}
+  }
+
+  REQUIRE(k < keyCount);
+}
+
+TEST_CASE("Test size limit", "[sizeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
+  MAX_PROVENANCE_ENTRY_LIFE_TIME, TEST_PROVENANCE_STORAGE_SIZE, 1000);
+
+  auto configuration = 
std::make_shared();
+  
configuration->set(minifi::Configure::nifi_dbcontent_repository_directory_default,
 temp_dir);
+
+  REQUIRE(provdb.initialize(configuration));
+
+  uint64_t keyCount = 500;
+
+  provisionRepo(provdb, keyCount, 10240);
+
+  verifyMaxKeyCount(provdb, 200);
+}
+
+TEST_CASE("Test time limit", "[timeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
+  
TEST_PROVENANCE_ENTRY_LIFE_TIME, TEST_MAX_PROVENANCE_STORAGE_SIZE, 1000);
+
+  auto configuration = 
std::make_shared();
+  
configuration->set(minifi::Configure::nifi_dbcontent_repository_directory_default,
 temp_dir);
+
+  REQUIRE(provdb.initialize(configuration));
+
+  uint64_t keyCount = 500;
+
+  provisionRepo(provdb, keyCount / 2, 102400);
+
+  REQUIRE(provdb.getKeyCount() == 250);
+
+  /**
+   * Magic: TTL-based DB cleanup only triggers when writeBuffers are 
serialized to storage
+   * To achieve this 250 entries are put to DB with a total size that ensures 
at least one buffer is serialized
+   * Wait a sec to make sure the serialized records expire
 
 Review comment:
   Yes and no. 
   In theory, yes, on the other hand the smaller timeouts we use the higher the 
risk of becoming unstable. 
   200ms is something that can easily be overrun by a simple IO operation. For 
eg. assume parallel execution of a lot of testcases, there are many executing 
IO operations. 
   
   Changed the comment to reflect the code. 
   
   The execution of the two testcases together takes a bit more than 4 secs on 
my notebook. That means negligible additional time in case of parallel test 
execution (mostly because we have many testsuites taking 10+ secs). Without any 
visible gain I wouldn't risk lower timeouts and the effort spent on finetuning 
would simply be wasted. 


This is an automated message from the Apache Git 

[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-05 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375235671
 
 

 ##
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include 
+#include 
+#include 
+#include 
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector& data) {
+  std::random_device rd;
+  std::mt19937 eng(rd());
+
+  std::uniform_int_distribution<> distr(std::numeric_limits::min(), 
std::numeric_limits::max());
+  auto rand = std::bind(distr, eng);
+  std::generate_n(data.begin(), data.size(), rand);
+}
+
+void provisionRepo(minifi::provenance::ProvenanceRepository& repo, size_t 
count, size_t size) {
+  for (int i = 0; i < count; ++i) {
+std::vector v(size);
+generateData(v);
+REQUIRE(repo.Put(std::to_string(i), reinterpret_cast(v.data()), v.size()));
+  }
+}
+
+void verifyMaxKeyCount(const minifi::provenance::ProvenanceRepository& repo, 
uint64_t keyCount) {
+  uint64_t k = keyCount;
+
+  for (int i = 0; i < 5; ++i) {
+std::this_thread::sleep_for(std::chrono::seconds(1));
+k = std::min(k, repo.getKeyCount());
+if (k < keyCount) {
+  break;
+}
+  }
+
+  REQUIRE(k < keyCount);
+}
+
+TEST_CASE("Test size limit", "[sizeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
+  MAX_PROVENANCE_ENTRY_LIFE_TIME, TEST_PROVENANCE_STORAGE_SIZE, 1000);
+
+  auto configuration = 
std::make_shared();
+  
configuration->set(minifi::Configure::nifi_dbcontent_repository_directory_default,
 temp_dir);
+
+  REQUIRE(provdb.initialize(configuration));
+
+  uint64_t keyCount = 500;
+
+  provisionRepo(provdb, keyCount, 10240);
+
+  verifyMaxKeyCount(provdb, 200);
+}
+
+TEST_CASE("Test time limit", "[timeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
+  
TEST_PROVENANCE_ENTRY_LIFE_TIME, TEST_MAX_PROVENANCE_STORAGE_SIZE, 1000);
+
+  auto configuration = 
std::make_shared();
+  
configuration->set(minifi::Configure::nifi_dbcontent_repository_directory_default,
 temp_dir);
+
+  REQUIRE(provdb.initialize(configuration));
+
+  uint64_t keyCount = 500;
+
+  provisionRepo(provdb, keyCount / 2, 102400);
+
+  REQUIRE(provdb.getKeyCount() == 250);
+
+  /**
+   * Magic: TTL-based DB cleanup only triggers when writeBuffers are 
serialized to storage
+   * To achieve this 250 entries are put to DB with a total size that ensures 
at least one buffer is serialized
+   * Wait a sec to make sure the serialized records expire
 
 Review comment:
   Yes and no. 
   In theory, yes, on the other hand the smaller timeouts we use the higher the 
risk of becoming unstable. 
   200ms is something that can easily be overrun by a simple IO operation. For 
eg. assume parallel execution of a lot of testcases, there are many executing 
IO operations. 
   
   Changed the comment to reflect the code. 
   
   The execution of the two testcases together takes a bit more than 4 secs on 
my notebook. That means negligible additional time in case of parallel test 
execution (mostly because we have many testsuites taking 10+ secs). Without any 
visible gain I wouldn't risk lower timeouts and the effort wasted on finetuning 
would simply be wasted. 


This is an automated message from the Apache 

[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-05 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375229974
 
 

 ##
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include 
+#include 
+#include 
+#include 
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector& data) {
+  std::random_device rd;
+  std::mt19937 eng(rd());
+
+  std::uniform_int_distribution<> distr(std::numeric_limits::min(), 
std::numeric_limits::max());
+  auto rand = std::bind(distr, eng);
+  std::generate_n(data.begin(), data.size(), rand);
+}
+
+void provisionRepo(minifi::provenance::ProvenanceRepository& repo, size_t 
count, size_t size) {
+  for (int i = 0; i < count; ++i) {
+std::vector v(size);
+generateData(v);
+REQUIRE(repo.Put(std::to_string(i), reinterpret_cast(v.data()), v.size()));
+  }
+}
+
+void verifyMaxKeyCount(const minifi::provenance::ProvenanceRepository& repo, 
uint64_t keyCount) {
+  uint64_t k = keyCount;
+
+  for (int i = 0; i < 5; ++i) {
+std::this_thread::sleep_for(std::chrono::seconds(1));
 
 Review comment:
   Done


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-05 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375230020
 
 

 ##
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include 
+#include 
+#include 
+#include 
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector& data) {
+  std::random_device rd;
+  std::mt19937 eng(rd());
+
+  std::uniform_int_distribution<> distr(std::numeric_limits::min(), 
std::numeric_limits::max());
+  auto rand = std::bind(distr, eng);
+  std::generate_n(data.begin(), data.size(), rand);
+}
+
+void provisionRepo(minifi::provenance::ProvenanceRepository& repo, size_t 
count, size_t size) {
 
 Review comment:
   Done


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-05 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375228737
 
 

 ##
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include 
+#include 
+#include 
+#include 
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector& data) {
 
 Review comment:
   The contract of this implementation implies that the caller is responsible 
for allocation by setting the size of the vector. In case the vector is 
returned an additional param would be needed to provide the size of the vector 
to return. 
   
   That could work as well, but in my opinion it was way more simple for the 
given use-case. 
   
   In nutshell: the current contract is just about filling the provided vector 
without taking care of allocation. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-05 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375228737
 
 

 ##
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include 
+#include 
+#include 
+#include 
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector& data) {
 
 Review comment:
   The contract of this implementation implies that the caller is responsible 
for allocation by setting the size of the vector. In case the vector is 
returned an additional param would be needed to provide the size of the vector 
to return. 
   
   That could work as well, but in my opinion it was way more simple for the 
given use-case. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-05 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375216809
 
 

 ##
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include 
+#include 
+#include 
+#include 
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector& data) {
+  std::random_device rd;
+  std::mt19937 eng(rd());
+
+  std::uniform_int_distribution<> distr(std::numeric_limits::min(), 
std::numeric_limits::max());
+  auto rand = std::bind(distr, eng);
+  std::generate_n(data.begin(), data.size(), rand);
+}
+
+void provisionRepo(minifi::provenance::ProvenanceRepository& repo, size_t 
count, size_t size) {
+  for (int i = 0; i < count; ++i) {
+std::vector v(size);
+generateData(v);
+REQUIRE(repo.Put(std::to_string(i), reinterpret_cast(v.data()), v.size()));
+  }
+}
+
+void verifyMaxKeyCount(const minifi::provenance::ProvenanceRepository& repo, 
uint64_t keyCount) {
+  uint64_t k = keyCount;
+
+  for (int i = 0; i < 5; ++i) {
+std::this_thread::sleep_for(std::chrono::seconds(1));
+k = std::min(k, repo.getKeyCount());
+if (k < keyCount) {
+  break;
+}
+  }
+
+  REQUIRE(k < keyCount);
+}
+
+TEST_CASE("Test size limit", "[sizeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
 
 Review comment:
   Nowhere, wrong comment, fixed. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-05 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375216482
 
 

 ##
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include 
+#include 
+#include 
+#include 
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector& data) {
+  std::random_device rd;
+  std::mt19937 eng(rd());
+
+  std::uniform_int_distribution<> distr(std::numeric_limits::min(), 
std::numeric_limits::max());
+  auto rand = std::bind(distr, eng);
+  std::generate_n(data.begin(), data.size(), rand);
+}
+
+void provisionRepo(minifi::provenance::ProvenanceRepository& repo, size_t 
count, size_t size) {
+  for (int i = 0; i < count; ++i) {
+std::vector v(size);
+generateData(v);
+REQUIRE(repo.Put(std::to_string(i), reinterpret_cast(v.data()), v.size()));
+  }
+}
+
+void verifyMaxKeyCount(const minifi::provenance::ProvenanceRepository& repo, 
uint64_t keyCount) {
+  uint64_t k = keyCount;
+
+  for (int i = 0; i < 5; ++i) {
+std::this_thread::sleep_for(std::chrono::seconds(1));
+k = std::min(k, repo.getKeyCount());
+if (k < keyCount) {
+  break;
+}
+  }
+
+  REQUIRE(k < keyCount);
+}
+
+TEST_CASE("Test size limit", "[sizeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
+  MAX_PROVENANCE_ENTRY_LIFE_TIME, TEST_PROVENANCE_STORAGE_SIZE, 1000);
+
+  auto configuration = 
std::make_shared();
+  
configuration->set(minifi::Configure::nifi_dbcontent_repository_directory_default,
 temp_dir);
+
+  REQUIRE(provdb.initialize(configuration));
+
+  uint64_t keyCount = 500;
+
+  provisionRepo(provdb, keyCount, 10240);
+
+  verifyMaxKeyCount(provdb, 200);
+}
+
+TEST_CASE("Test time limit", "[timeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
 
 Review comment:
   Copy-pasted my own comment that was obsolete even by the time I wrote it. :)
   Fixed. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-04 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r374813056
 
 

 ##
 File path: extensions/rocksdb-repos/ProvenanceRepository.cpp
 ##
 @@ -17,72 +17,31 @@
  */
 
 #include "ProvenanceRepository.h"
-#include "rocksdb/write_batch.h"
 #include 
-#include 
-#include "rocksdb/options.h"
-#include "provenance/Provenance.h"
 namespace org {
 namespace apache {
 namespace nifi {
 namespace minifi {
 namespace provenance {
 
-void ProvenanceRepository::flush() {
-  rocksdb::WriteBatch batch;
-  std::string key;
-  std::string value;
-  rocksdb::ReadOptions options;
-  uint64_t decrement_total = 0;
-  while (keys_to_delete.size_approx() > 0) {
-if (keys_to_delete.try_dequeue(key)) {
-  db_->Get(options, key, );
-  decrement_total += value.size();
-  batch.Delete(key);
-  logger_->log_debug("Removing %s", key);
-}
-  }
-  if (db_->Write(rocksdb::WriteOptions(), ).ok()) {
-logger_->log_debug("Decrementing %u from a repo size of %u", 
decrement_total, repo_size_.load());
-if (decrement_total > repo_size_.load()) {
-  repo_size_ = 0;
-} else {
-  repo_size_ -= decrement_total;
-}
-  }
+void ProvenanceRepository::printStats() {
+  std::string key_count;
+  db_->GetProperty("rocksdb.estimate-num-keys", _count);
+
+  std::string table_readers;
+  db_->GetProperty("rocksdb.estimate-table-readers-mem", _readers);
+
+  std::string all_memtables;
+  db_->GetProperty("rocksdb.cur-size-all-mem-tables", _memtables);
+
+  logger_->log_info("Repository stats: key count: %zu, table readers size: 
%zu, all memory tables size: %zu",
+key_count, table_readers, all_memtables);
 }
 
 void ProvenanceRepository::run() {
   while (running_) {
-std::this_thread::sleep_for(std::chrono::milliseconds(purge_period_));
-uint64_t curTime = getTimeMillis();
-// threshold for purge
-uint64_t purgeThreshold = max_partition_bytes_ * 3 / 4;
-
-uint64_t size = getRepoSize();
-
-if (size >= purgeThreshold) {
-  rocksdb::Iterator* it = db_->NewIterator(rocksdb::ReadOptions());
-  for (it->SeekToFirst(); it->Valid(); it->Next()) {
-ProvenanceEventRecord eventRead;
-std::string key = it->key().ToString();
-uint64_t eventTime = 
eventRead.getEventTime(reinterpret_cast(const_cast(it->value().data())),
 it->value().size());
-if (eventTime > 0) {
-  if ((curTime - eventTime) > (uint64_t)max_partition_millis_)
-Delete(key);
-} else {
-  logger_->log_debug("NiFi Provenance retrieve event %s fail", key);
-  Delete(key);
-}
-  }
-  delete it;
-}
-flush();
-size = getRepoSize();
-if (size > (uint64_t)max_partition_bytes_)
-  repo_full_ = true;
-else
-  repo_full_ = false;
+std::this_thread::sleep_for(std::chrono::seconds(30));
 
 Review comment:
   Okay, added a temporary hack


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-04 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r374678534
 
 

 ##
 File path: extensions/rocksdb-repos/ProvenanceRepository.cpp
 ##
 @@ -17,72 +17,31 @@
  */
 
 #include "ProvenanceRepository.h"
-#include "rocksdb/write_batch.h"
 #include 
-#include 
-#include "rocksdb/options.h"
-#include "provenance/Provenance.h"
 namespace org {
 namespace apache {
 namespace nifi {
 namespace minifi {
 namespace provenance {
 
-void ProvenanceRepository::flush() {
-  rocksdb::WriteBatch batch;
-  std::string key;
-  std::string value;
-  rocksdb::ReadOptions options;
-  uint64_t decrement_total = 0;
-  while (keys_to_delete.size_approx() > 0) {
-if (keys_to_delete.try_dequeue(key)) {
-  db_->Get(options, key, );
-  decrement_total += value.size();
-  batch.Delete(key);
-  logger_->log_debug("Removing %s", key);
-}
-  }
-  if (db_->Write(rocksdb::WriteOptions(), ).ok()) {
-logger_->log_debug("Decrementing %u from a repo size of %u", 
decrement_total, repo_size_.load());
-if (decrement_total > repo_size_.load()) {
-  repo_size_ = 0;
-} else {
-  repo_size_ -= decrement_total;
-}
-  }
+void ProvenanceRepository::printStats() {
+  std::string key_count;
+  db_->GetProperty("rocksdb.estimate-num-keys", _count);
+
+  std::string table_readers;
+  db_->GetProperty("rocksdb.estimate-table-readers-mem", _readers);
+
+  std::string all_memtables;
+  db_->GetProperty("rocksdb.cur-size-all-mem-tables", _memtables);
+
+  logger_->log_info("Repository stats: key count: %zu, table readers size: 
%zu, all memory tables size: %zu",
+key_count, table_readers, all_memtables);
 }
 
 void ProvenanceRepository::run() {
   while (running_) {
-std::this_thread::sleep_for(std::chrono::milliseconds(purge_period_));
-uint64_t curTime = getTimeMillis();
-// threshold for purge
-uint64_t purgeThreshold = max_partition_bytes_ * 3 / 4;
-
-uint64_t size = getRepoSize();
-
-if (size >= purgeThreshold) {
-  rocksdb::Iterator* it = db_->NewIterator(rocksdb::ReadOptions());
-  for (it->SeekToFirst(); it->Valid(); it->Next()) {
-ProvenanceEventRecord eventRead;
-std::string key = it->key().ToString();
-uint64_t eventTime = 
eventRead.getEventTime(reinterpret_cast(const_cast(it->value().data())),
 it->value().size());
-if (eventTime > 0) {
-  if ((curTime - eventTime) > (uint64_t)max_partition_millis_)
-Delete(key);
-} else {
-  logger_->log_debug("NiFi Provenance retrieve event %s fail", key);
-  Delete(key);
-}
-  }
-  delete it;
-}
-flush();
-size = getRepoSize();
-if (size > (uint64_t)max_partition_bytes_)
-  repo_full_ = true;
-else
-  repo_full_ = false;
+std::this_thread::sleep_for(std::chrono::seconds(30));
 
 Review comment:
   Yes, the same applied for previous implementation and still applies on 
average for 5 secs in FF repo (cleanup). 
   
   I also think that we should use callback timers, but I would prefer to do it 
in a unified way for all rocksdb repos in scope of 
https://issues.apache.org/jira/browse/MINIFICPP-1145


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-04 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r374670873
 
 

 ##
 File path: libminifi/include/core/Repository.h
 ##
 @@ -85,6 +85,10 @@ class Repository : public virtual 
core::SerializableComponent, public core::Trac
 
   virtual void flush();
 
+  virtual void printStats() {
+return;
+  }
+
 
 Review comment:
   Wanted to remove and follow up in 
https://issues.apache.org/jira/browse/MINIFICPP-1145.
   
   Done. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-04 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r374664752
 
 

 ##
 File path: extensions/rocksdb-repos/ProvenanceRepository.h
 ##
 @@ -99,9 +90,26 @@ class ProvenanceRepository : public core::Repository, 
public std::enable_shared_
 options.create_if_missing = true;
 options.use_direct_io_for_flush_and_compaction = true;
 options.use_direct_reads = true;
-rocksdb::Status status = rocksdb::DB::Open(options, directory_, _);
+int64_t max_buffer_size = 16 << 20;
 
 Review comment:
   Added comment. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-04 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r374664831
 
 

 ##
 File path: extensions/rocksdb-repos/ProvenanceRepository.h
 ##
 @@ -56,21 +55,13 @@ class ProvenanceRepository : public core::Repository, 
public std::enable_shared_
 db_ = NULL;
   }
 
-  // Destructor
-  virtual ~ProvenanceRepository() {
-if (db_)
-  delete db_;
-  }
-
-  virtual void flush();
+  virtual void printStats();
 
 Review comment:
   Removed. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [nifi-minifi-cpp] arpadboda commented on a change in pull request #716: MINIFICPP-1127 - Provenance repo performance should be improved

2020-02-04 Thread GitBox
arpadboda commented on a change in pull request #716: MINIFICPP-1127 - 
Provenance repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r374664868
 
 

 ##
 File path: extensions/rocksdb-repos/ProvenanceRepository.cpp
 ##
 @@ -17,72 +17,31 @@
  */
 
 #include "ProvenanceRepository.h"
-#include "rocksdb/write_batch.h"
 #include 
-#include 
-#include "rocksdb/options.h"
-#include "provenance/Provenance.h"
 namespace org {
 namespace apache {
 namespace nifi {
 namespace minifi {
 namespace provenance {
 
-void ProvenanceRepository::flush() {
-  rocksdb::WriteBatch batch;
-  std::string key;
-  std::string value;
-  rocksdb::ReadOptions options;
-  uint64_t decrement_total = 0;
-  while (keys_to_delete.size_approx() > 0) {
-if (keys_to_delete.try_dequeue(key)) {
-  db_->Get(options, key, );
-  decrement_total += value.size();
-  batch.Delete(key);
-  logger_->log_debug("Removing %s", key);
-}
-  }
-  if (db_->Write(rocksdb::WriteOptions(), ).ok()) {
-logger_->log_debug("Decrementing %u from a repo size of %u", 
decrement_total, repo_size_.load());
-if (decrement_total > repo_size_.load()) {
-  repo_size_ = 0;
-} else {
-  repo_size_ -= decrement_total;
-}
-  }
+void ProvenanceRepository::printStats() {
+  logger_->log_info("ProvenanceRepository stats:");
+
+  std::string out;
+  db_->GetProperty("rocksdb.estimate-num-keys", );
+  logger_->log_info("\\--Estimated key count: %s", out);
+
+  db_->GetProperty("rocksdb.estimate-table-readers-mem", );
+  logger_->log_info("\\--Estimated table readers memory consumption: %s", out);
+
+  db_->GetProperty("rocksdb.cur-size-all-mem-tables", );
+  logger_->log_info("\\--Size of all memory tables: %s", out);
 
 Review comment:
   Done


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services