szaszm commented on a change in pull request #716: MINIFICPP-1127 - Provenance 
repo performance should be improved
URL: https://github.com/apache/nifi-minifi-cpp/pull/716#discussion_r375226770
 
 

 ##########
 File path: libminifi/test/rocksdb-tests/DBProvenanceRepositoryTests.cpp
 ##########
 @@ -0,0 +1,122 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ProvenanceRepository.h"
+#include "../TestBase.h"
+#include <array>
+#include <chrono>
+#include <vector>
+#include <random>
+
+#define TEST_PROVENANCE_STORAGE_SIZE (1024*100)  // 100 KB
+#define TEST_MAX_PROVENANCE_STORAGE_SIZE (100*1024*1024)  // 100 MB
+
+#define TEST_PROVENANCE_ENTRY_LIFE_TIME (1000)  // 1 sec
+
+void generateData(std::vector<char>& data) {
+  std::random_device rd;
+  std::mt19937 eng(rd());
+
+  std::uniform_int_distribution<> distr(std::numeric_limits<char>::min(), 
std::numeric_limits<char>::max());
+  auto rand = std::bind(distr, eng);
+  std::generate_n(data.begin(), data.size(), rand);
+}
+
+void provisionRepo(minifi::provenance::ProvenanceRepository& repo, size_t 
count, size_t size) {
+  for (int i = 0; i < count; ++i) {
+    std::vector<char> v(size);
+    generateData(v);
+    REQUIRE(repo.Put(std::to_string(i), reinterpret_cast<const 
uint8_t*>(v.data()), v.size()));
+  }
+}
+
+void verifyMaxKeyCount(const minifi::provenance::ProvenanceRepository& repo, 
uint64_t keyCount) {
+  uint64_t k = keyCount;
+
+  for (int i = 0; i < 5; ++i) {
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+    k = std::min(k, repo.getKeyCount());
+    if (k < keyCount) {
+      break;
+    }
+  }
+
+  REQUIRE(k < keyCount);
+}
+
+TEST_CASE("Test size limit", "[sizeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XXXXXX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
+      MAX_PROVENANCE_ENTRY_LIFE_TIME, TEST_PROVENANCE_STORAGE_SIZE, 1000);
+
+  auto configuration = 
std::make_shared<org::apache::nifi::minifi::Configure>();
+  
configuration->set(minifi::Configure::nifi_dbcontent_repository_directory_default,
 temp_dir);
+
+  REQUIRE(provdb.initialize(configuration));
+
+  uint64_t keyCount = 500;
+
+  provisionRepo(provdb, keyCount, 10240);
+
+  verifyMaxKeyCount(provdb, 200);
+}
+
+TEST_CASE("Test time limit", "[timeLimitTest]") {
+  TestController testController;
+
+  char dirtemplate[] = "/tmp/db.XXXXXX";
+  auto temp_dir = testController.createTempDirectory(dirtemplate);
+  REQUIRE(!temp_dir.empty());
+
+  // 20 sec, 100kb - going to exceed the latter
+  minifi::provenance::ProvenanceRepository provdb("TestProvRepo", temp_dir,
+                                                  
TEST_PROVENANCE_ENTRY_LIFE_TIME, TEST_MAX_PROVENANCE_STORAGE_SIZE, 1000);
+
+  auto configuration = 
std::make_shared<org::apache::nifi::minifi::Configure>();
+  
configuration->set(minifi::Configure::nifi_dbcontent_repository_directory_default,
 temp_dir);
+
+  REQUIRE(provdb.initialize(configuration));
+
+  uint64_t keyCount = 500;
+
+  provisionRepo(provdb, keyCount / 2, 102400);
+
+  REQUIRE(provdb.getKeyCount() == 250);
+
+  /**
+   * Magic: TTL-based DB cleanup only triggers when writeBuffers are 
serialized to storage
+   * To achieve this 250 entries are put to DB with a total size that ensures 
at least one buffer is serialized
+   * Wait a sec to make sure the serialized records expire
 
 Review comment:
   "Wait a sec" but we wait 1500ms.
   Also, if we can reconfigure the cleanup logic to make the test run faster 
(i.e. not having to wait for 1500ms, but only like 200ms) without sacrificing 
stability, I'm strongly in favor of that. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to