szaszm commented on a change in pull request #1090: URL: https://github.com/apache/nifi-minifi-cpp/pull/1090#discussion_r663928848
########## File path: extensions/rocksdb-repos/encryption/RocksDbEncryptionProvider.cpp ########## @@ -0,0 +1,123 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "RocksDbEncryptionProvider.h" +#include "utils/crypto/ciphers/Aes256Ecb.h" +#include "logging/LoggerConfiguration.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace core { +namespace repository { + +using utils::crypto::Bytes; +using utils::crypto::Aes256EcbCipher; + +namespace { + +class AES256BlockCipher final : public rocksdb::BlockCipher { + static std::shared_ptr<logging::Logger> logger_; + public: + AES256BlockCipher(std::string database, Aes256EcbCipher cipher_impl) + : database_(std::move(database)), + cipher_impl_(std::move(cipher_impl)) {} + + const char *Name() const override { + return "AES256BlockCipher"; + } + + size_t BlockSize() override { + return Aes256EcbCipher::BLOCK_SIZE; + } + + bool equals(const AES256BlockCipher& other) const { + return cipher_impl_.equals(other.cipher_impl_); + } + + rocksdb::Status Encrypt(char *data) override; + + rocksdb::Status Decrypt(char *data) override; Review comment: I prefer using separate buffers for input and output, unless we can realize significant efficiency gains by reusing mutable memory. How do we ensure that the buffer size is sufficient for the output? Plaintext and cipher size can be different in either direction. Tip: use `gsl::span` for the input ########## File path: extensions/rocksdb-repos/DatabaseContentRepository.cpp ########## @@ -42,14 +43,31 @@ bool DatabaseContentRepository::initialize(const std::shared_ptr<minifi::Configu } else { directory_ = configuration->getHome() + "/dbcontentrepository"; } - auto set_db_opts = [] (internal::Writable<rocksdb::DBOptions>& db_opts) { + std::shared_ptr<rocksdb::Env> encrypted_env = [&] { + DbEncryptionOptions encryption_opts; + encryption_opts.database = directory_; + encryption_opts.encryption_key_name = ENCRYPTION_KEY_NAME; + auto env = createEncryptingEnv(utils::crypto::EncryptionManager{configuration->getHome()}, encryption_opts); + if (env) { + logger_->log_info("Using encrypted DatabaseContentRepository"); + } else { + logger_->log_info("Using plaintext DatabaseContentRepository"); + } + return env; + }(); Review comment: This looks like a lot of boilerplate for two lines of meaning. My preference is more dense code, but admittedly it results in longer lines. I find 2 lines repeated 5x less scary than 5 lines repeated 5x. ```suggestion const auto encrypted_env = createEncryptingEnv(utils::crypto::EncryptionManager{configuration->getHome()}, DbEncryptionOptions{directory_, ENCRYPTION_KEY_NAME}); logger_->log_info("Using %s DatabaseContentRepository", env ? "encrypted" : "plaintext"); ``` ########## File path: extensions/rocksdb-repos/FlowFileRepository.cpp ########## @@ -220,17 +240,21 @@ void FlowFileRepository::initialize_repository() { logger_->log_trace("Do not need checkpoint"); return; } - rocksdb::Checkpoint *checkpoint; // delete any previous copy - if (utils::file::FileUtils::delete_dir(checkpoint_dir_) >= 0 && opendb->NewCheckpoint(&checkpoint).ok()) { - if (checkpoint->CreateCheckpoint(checkpoint_dir_).ok()) { + if (utils::file::FileUtils::delete_dir(checkpoint_dir_) >= 0) { + rocksdb::Checkpoint* checkpoint = nullptr; + rocksdb::Status checkpoint_status = opendb->NewCheckpoint(&checkpoint); + if (checkpoint_status.ok()) { + checkpoint_status = checkpoint->CreateCheckpoint(checkpoint_dir_); + } + if (checkpoint_status.ok()) { checkpoint_ = std::unique_ptr<rocksdb::Checkpoint>(checkpoint); - logger_->log_trace("Created checkpoint directory"); + logger_->log_trace("Created checkpoint in directory '%s'", checkpoint_dir_); } else { - logger_->log_trace("Could not create checkpoint. Corrupt?"); + logger_->log_error("Could not create checkpoint: %s", checkpoint_status.ToString()); } } else - logger_->log_trace("Could not create checkpoint directory. Not properly deleted?"); + logger_->log_error("Could not delete existing checkpoint directory '%s'", checkpoint_dir_); Review comment: Did you consider extracting this to a function with early returns on errors in each step? I think it would further improve the flow of the code, keeping the happy path operations below each other on the same indentation level. ########## File path: libminifi/test/rocksdb-tests/EncryptionTests.cpp ########## @@ -0,0 +1,108 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../TestBase.h" +#include "utils/TestUtils.h" +#include "FlowFileRepository.h" +#include "utils/IntegrationTestUtils.h" + +using utils::Path; +using core::repository::FlowFileRepository; + +class FFRepoFixture : public TestController { + public: + FFRepoFixture() { + LogTestController::getInstance().setDebug<minifi::FlowFileRecord>(); + LogTestController::getInstance().setDebug<minifi::Connection>(); + LogTestController::getInstance().setTrace<FlowFileRepository>(); + home_ = createTempDirectory("/var/tmp/testRepo.XXXXXX"); + repo_dir_ = home_ / "flowfile_repo"; + checkpoint_dir_ = home_ / "checkpoint_dir"; + config_ = std::make_shared<minifi::Configure>(); + config_->setHome(home_.str()); + container_ = std::make_shared<minifi::Connection>(nullptr, nullptr, "container"); + content_repo_ = std::make_shared<core::repository::VolatileContentRepository>(); + content_repo_->initialize(config_); + } + + static void putFlowFile(const std::shared_ptr<minifi::FlowFileRecord>& flowfile, const std::shared_ptr<core::repository::FlowFileRepository>& repo) { + minifi::io::BufferStream buffer; + flowfile->Serialize(buffer); + REQUIRE(repo->Put(flowfile->getUUIDStr(), buffer.getBuffer(), buffer.size())); + } + + template<typename Fn> + void runWithNewRepository(Fn&& fn) { + auto repository = std::make_shared<FlowFileRepository>("ff", checkpoint_dir_.str(), repo_dir_.str()); + repository->initialize(config_); + std::map<std::string, std::shared_ptr<core::Connectable>> container_map; + container_map[container_->getUUIDStr()] = container_; + repository->setContainers(container_map); + repository->loadComponent(content_repo_); + repository->start(); + std::forward<Fn>(fn)(repository); + repository->stop(); + } + + protected: + std::shared_ptr<minifi::Connection> container_; + Path home_; + Path repo_dir_; + Path checkpoint_dir_; + std::shared_ptr<minifi::Configure> config_; + std::shared_ptr<core::repository::VolatileContentRepository> content_repo_; +}; + +TEST_CASE_METHOD(FFRepoFixture, "FlowFileRepository creates checkpoint and loads flowfiles") { + SECTION("Without encryption") { + // pass + } + SECTION("With encryption") { + utils::file::FileUtils::create_dir((home_ / "conf").str()); + std::ofstream{(home_ / "conf" / "bootstrap.conf").str()} + << static_cast<const char*>(FlowFileRepository::ENCRYPTION_KEY_NAME) << "=" + << "805D7B95EF44DC27C87FFBC4DFDE376DAE604D55DB2C5496DEEF5236362DE62E" + << "\n"; + } + + + runWithNewRepository([&] (const std::shared_ptr<core::repository::FlowFileRepository>& repo) { + auto flowfile = std::make_shared<minifi::FlowFileRecord>(); + flowfile->setAttribute("my little pony", "my horse is amazing"); + flowfile->setConnection(container_); + putFlowFile(flowfile, repo); + }); + + REQUIRE(container_->isEmpty()); + + runWithNewRepository([&] (const std::shared_ptr<core::repository::FlowFileRepository>& /*repo*/) { + // wait for the flowfiles to be loaded from the checkpoint + bool success = utils::verifyEventHappenedInPollTime(std::chrono::seconds{5}, [&] { + return !container_->isEmpty(); + }); + REQUIRE(success); + REQUIRE(utils::verifyLogLinePresenceInPollTime( + std::chrono::seconds{5}, + "Successfully opened checkpoint database at '" + checkpoint_dir_.str() + "'")); + std::set<std::shared_ptr<core::FlowFile>> expired; + auto flowfile = container_->poll(expired); + REQUIRE(expired.empty()); + REQUIRE(flowfile); + REQUIRE(flowfile->getAttribute("my little pony") == "my horse is amazing"); Review comment: Can we have an assertion to the ciphertext value in the repository? It would be a great proof that everything is indeed encrypted on disk and not just working. ########## File path: extensions/rocksdb-repos/encryption/RocksDbEncryptionProvider.cpp ########## @@ -0,0 +1,123 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "RocksDbEncryptionProvider.h" +#include "utils/crypto/ciphers/Aes256Ecb.h" +#include "logging/LoggerConfiguration.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace core { +namespace repository { + +using utils::crypto::Bytes; +using utils::crypto::Aes256EcbCipher; + +namespace { + +class AES256BlockCipher final : public rocksdb::BlockCipher { Review comment: [`systemd::libwrapper::DlopenJournal`](https://github.com/apache/nifi-minifi-cpp/blob/main/extensions/systemd/libwrapper/DlopenWrapper.cpp#L35) also started out with static linkage. The reason I changed it was because debuggers have a hard time showing symbols that are not there and this can make debugging much harder. I think I had to step through asm instructions at one point to debug something. If you think having type info for these types while debugging is worth the overhead, consider moving the classes out of the anonymous namespace. ########## File path: extensions/rocksdb-repos/encryption/RocksDbEncryptionProvider.cpp ########## @@ -0,0 +1,123 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "RocksDbEncryptionProvider.h" +#include "utils/crypto/ciphers/Aes256Ecb.h" +#include "logging/LoggerConfiguration.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace core { +namespace repository { + +using utils::crypto::Bytes; +using utils::crypto::Aes256EcbCipher; + +namespace { + +class AES256BlockCipher final : public rocksdb::BlockCipher { + static std::shared_ptr<logging::Logger> logger_; + public: + AES256BlockCipher(std::string database, Aes256EcbCipher cipher_impl) + : database_(std::move(database)), + cipher_impl_(std::move(cipher_impl)) {} + + const char *Name() const override { + return "AES256BlockCipher"; + } + + size_t BlockSize() override { + return Aes256EcbCipher::BLOCK_SIZE; + } + + bool equals(const AES256BlockCipher& other) const { + return cipher_impl_.equals(other.cipher_impl_); + } Review comment: Why not `operator==`? Same with `EncryptingEnv`. ########## File path: libminifi/include/utils/crypto/ciphers/Aes256Ecb.h ########## @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <string> +#include <memory> +#include <utility> + +#include "utils/crypto/EncryptionUtils.h" +#include "Exception.h" +#include "core/logging/Logger.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace utils { +namespace crypto { + +class CipherError : public Exception { + public: + explicit CipherError(const std::string& error_msg) : Exception(ExceptionType::GENERAL_EXCEPTION, error_msg) {} +}; + +class Aes256EcbCipher { + static std::shared_ptr<core::logging::Logger> logger_; + public: + static constexpr size_t BLOCK_SIZE = 16; + static constexpr size_t KEY_SIZE = 32; + + explicit Aes256EcbCipher(Bytes encryption_key); + void encrypt(unsigned char* data) const; + void decrypt(unsigned char* data) const; + + static Bytes generateKey(); + + bool equals(const Aes256EcbCipher& other) const; + + private: + template<typename ...Args> + static void handleError(Args&& ...args) { + std::string error_msg = core::logging::format_string(-1, "%s", std::forward<Args>(args)...); + logger_->log_error("%s", error_msg); + throw CipherError(error_msg); + } Review comment: This seems to just print the passed first argument string as is, but it's passed a format string in the constructor. If this function is supposed to expect format strings, line 57 shouldn't have the `"%s"` arguments. Consider making this function take a c string and handle the formatting in the constructor. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
