This is an automated email from the ASF dual-hosted git repository. szaszm pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit 2c7f989aea41d801ba22507b703664cae9dac1cc Author: Gabor Gyimesi <[email protected]> AuthorDate: Fri Feb 17 01:16:13 2023 +0100 MINIFICPP-2007 Add rocksdb compression options - Add bundled zstd and lz4 thirdparty libraries - Upgrade rocksdb to version 7.7.3 - Add compression options bzip2, zlib, zstd and lz4 on Unix and xpress on Windows Closes #1480 Signed-off-by: Marton Szasz <[email protected]> --- CMakeLists.txt | 6 ++ CONFIGURE.md | 9 ++ LICENSE | 102 ++++++++++++++++----- NOTICE | 2 + cmake/BundledRocksDB.cmake | 35 ++++++- cmake/BundledZLIB.cmake | 1 - cmake/LZ4.cmake | 53 +++++++++++ cmake/Zstd.cmake | 56 +++++++++++ cmake/lz4/dummy/Findlz4.cmake | 33 +++++++ cmake/zstd/dummy/Findzstd.cmake | 33 +++++++ conf/minifi.properties | 2 + extensions/libarchive/CMakeLists.txt | 6 -- .../rocksdb-repos/DatabaseContentRepository.cpp | 5 +- extensions/rocksdb-repos/FlowFileRepository.cpp | 5 +- extensions/rocksdb-repos/database/RocksDbUtils.cpp | 54 +++++++++++ extensions/rocksdb-repos/database/RocksDbUtils.h | 21 ++--- .../rocksdb-repos/database/StringAppender.cpp | 2 +- .../standard-processors/tests/unit/PutTCPTests.cpp | 4 +- libminifi/include/properties/Configuration.h | 2 + libminifi/src/Configuration.cpp | 2 + ...e_gcc_clang_compiler_options_from_windows.patch | 37 ++++++++ 21 files changed, 420 insertions(+), 50 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 513afe7ae..3eac599b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,6 +214,12 @@ else() message(VERBOSE "No custom malloc implementation") endif() +if (NOT DISABLE_BZIP2 AND (NOT DISABLE_LIBARCHIVE OR (NOT DISABLE_ROCKSDB AND NOT WIN32))) + include(BundledBZip2) + use_bundled_bzip2(${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR}) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/bzip2/dummy") +endif() + if(NOT WIN32) if (ENABLE_JNI) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_JNI") diff --git a/CONFIGURE.md b/CONFIGURE.md index 304407ecb..c9beff2c1 100644 --- a/CONFIGURE.md +++ b/CONFIGURE.md @@ -162,6 +162,15 @@ folder. You may specify your own path in place of these defaults. nifi.flowfile.repository.directory.default=${MINIFI_HOME}/flowfile_repository nifi.database.content.repository.directory.default=${MINIFI_HOME}/content_repository +### Configuring compression for rocksdb database + +Rocksdb has an option to set compression type for its database to use less disk space. +If content repository or flow file repository is set to use the rocksdb database as their storage, then we have the option to compress those repositories. On Unix operating systems `zlib`, `bzip2`, `zstd`, `lz4` and `lz4hc` compression types and on Windows `xpress` compression type is supported by MiNiFi C++. If the property is set to `auto` then `xpress` will be used on Windows, `zstd` on Unix operating systems. These options can be set in the minifi.properies file with the following pr [...] + + in minifi.properties + nifi.flowfile.repository.rocksdb.compression=zlib + nifi.content.repository.rocksdb.compression=auto + #### Shared database It is also possible to use a single database to store multiple repositories with the `minifidb://` scheme. diff --git a/LICENSE b/LICENSE index 823ac8688..e2697b9d3 100644 --- a/LICENSE +++ b/LICENSE @@ -3407,32 +3407,90 @@ For these and/or other purposes and motivations, and without any expectation of Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. ``` --------------------------------------------------------------------------- - This product bundles 'prometheus-cpp' which is available under an MIT license. - MIT License + MIT License - Copyright (c) 2016-2021 Jupp Mueller - Copyright (c) 2017-2022 Gregor Jasny + Copyright (c) 2016-2021 Jupp Mueller + Copyright (c) 2017-2022 Gregor Jasny - And many contributors, see - https://github.com/jupp0r/prometheus-cpp/graphs/contributors + And many contributors, see + https://github.com/jupp0r/prometheus-cpp/graphs/contributors - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +This product bundles 'Zstandard' which is available under a BSD License. + + BSD License + + For Zstandard software + + Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +This product bundles 'LZ4 Library' which is available under a BSD 2-Clause license. + + LZ4 Library + Copyright (c) 2011-2020, Yann Collet + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/NOTICE b/NOTICE index 7502e6c9e..60476b637 100644 --- a/NOTICE +++ b/NOTICE @@ -68,6 +68,8 @@ This software includes third party software subject to the following copyrights: - abseil-cpp - Google Inc. - crc32c - Google Inc., Fangming Fang, Vadim Skipin, Rodrigo Tobar, Harry Mallon - prometheus-cpp - Copyright (c) 2016-2021 Jupp Mueller, Copyright (c) 2017-2022 Gregor Jasny +- Zstandard - Copyright (c) 2016-present, Facebook, Inc. All rights reserved. +- LZ4 Library - Copyright (c) 2011-2020, Yann Collet The licenses for these third party components are included in LICENSE.txt diff --git a/cmake/BundledRocksDB.cmake b/cmake/BundledRocksDB.cmake index 989204ebc..b44855ee1 100644 --- a/cmake/BundledRocksDB.cmake +++ b/cmake/BundledRocksDB.cmake @@ -18,6 +18,14 @@ function(use_bundled_rocksdb SOURCE_DIR BINARY_DIR) message("Using bundled RocksDB") + if (NOT WIN32) + include(Zstd) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/zstd/dummy") + + include(LZ4) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/lz4/dummy") + endif() + # Define byproducts if (WIN32) set(BYPRODUCT "lib/rocksdb.lib") @@ -43,18 +51,29 @@ function(use_bundled_rocksdb SOURCE_DIR BINARY_DIR) list(APPEND ROCKSDB_CMAKE_ARGS -DPORTABLE=ON) endif() if(WIN32) - list(APPEND ROCKSDB_CMAKE_ARGS -DROCKSDB_INSTALL_ON_WINDOWS=ON) + list(APPEND ROCKSDB_CMAKE_ARGS + -DROCKSDB_INSTALL_ON_WINDOWS=ON + -DWITH_XPRESS=ON) + else() + list(APPEND ROCKSDB_CMAKE_ARGS + -DWITH_ZLIB=ON + -DWITH_BZ2=ON + -DWITH_ZSTD=ON + -DWITH_LZ4=ON) endif() + append_third_party_passthrough_args(ROCKSDB_CMAKE_ARGS "${ROCKSDB_CMAKE_ARGS}") + # Build project ExternalProject_Add( rocksdb-external - URL "https://github.com/facebook/rocksdb/archive/refs/tags/v6.29.5.tar.gz" - URL_HASH "SHA256=ddbf84791f0980c0bbce3902feb93a2c7006f6f53bfd798926143e31d4d756f0" + URL "https://github.com/facebook/rocksdb/archive/refs/tags/v7.7.3.tar.gz" + URL_HASH "SHA256=b8ac9784a342b2e314c821f6d701148912215666ac5e9bdbccd93cf3767cb611" SOURCE_DIR "${BINARY_DIR}/thirdparty/rocksdb-src" CMAKE_ARGS ${ROCKSDB_CMAKE_ARGS} BUILD_BYPRODUCTS "${BINARY_DIR}/thirdparty/rocksdb-install/${BYPRODUCT}" EXCLUDE_FROM_ALL TRUE + LIST_SEPARATOR % # This is needed for passing semicolon-separated lists ) # Set variables @@ -66,11 +85,17 @@ function(use_bundled_rocksdb SOURCE_DIR BINARY_DIR) # Create imported targets add_library(RocksDB::RocksDB STATIC IMPORTED) set_target_properties(RocksDB::RocksDB PROPERTIES IMPORTED_LOCATION "${ROCKSDB_LIBRARY}") + if (NOT WIN32) + add_dependencies(rocksdb-external ZLIB::ZLIB BZip2::BZip2 zstd::zstd lz4::lz4) + endif() add_dependencies(RocksDB::RocksDB rocksdb-external) file(MAKE_DIRECTORY ${ROCKSDB_INCLUDE_DIR}) - set_property(TARGET RocksDB::RocksDB APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${ROCKSDB_INCLUDE_DIR}) + target_include_directories(RocksDB::RocksDB INTERFACE ${ROCKSDB_INCLUDE_DIR}) set_property(TARGET RocksDB::RocksDB APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads) + target_link_libraries(RocksDB::RocksDB INTERFACE Threads::Threads) if(WIN32) - set_property(TARGET RocksDB::RocksDB APPEND PROPERTY INTERFACE_LINK_LIBRARIES Rpcrt4.lib) + target_link_libraries(RocksDB::RocksDB INTERFACE Rpcrt4.lib Cabinet.lib) + else() + target_link_libraries(RocksDB::RocksDB INTERFACE ZLIB::ZLIB BZip2::BZip2 zstd::zstd lz4::lz4) endif() endfunction(use_bundled_rocksdb) diff --git a/cmake/BundledZLIB.cmake b/cmake/BundledZLIB.cmake index 8d2d3542f..16b77c907 100644 --- a/cmake/BundledZLIB.cmake +++ b/cmake/BundledZLIB.cmake @@ -69,6 +69,5 @@ function(use_bundled_zlib SOURCE_DIR BINARY_DIR) add_library(ZLIB::ZLIB STATIC IMPORTED) set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION "${ZLIB_LIBRARIES}") add_dependencies(ZLIB::ZLIB zlib-external) - file(MAKE_DIRECTORY ${ZLIB_INCLUDE_DIRS}) set_property(TARGET ZLIB::ZLIB APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${ZLIB_INCLUDE_DIRS}) endfunction(use_bundled_zlib) diff --git a/cmake/LZ4.cmake b/cmake/LZ4.cmake new file mode 100644 index 000000000..40260740b --- /dev/null +++ b/cmake/LZ4.cmake @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include(FetchContent) + +set(LZ4_BUILD_CLI OFF CACHE BOOL "" FORCE) +set(LZ4_BUILD_LEGACY_LZ4C OFF CACHE BOOL "" FORCE) +set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) +set(BUILD_STATIC_LIBS ON CACHE BOOL "" FORCE) + +FetchContent_Declare(lz4 + URL https://github.com/lz4/lz4/archive/refs/tags/v1.9.4.tar.gz + URL_HASH SHA256=0b0e3aa07c8c063ddf40b082bdf7e37a1562bda40a0ff5272957f3e987e0e54b +) + +# With CMake >= 3.18, this block could be replaced with FetchContent_MakeAvailable(lz4), +# if we add the `SOURCE_SUBDIR build/cmake` option to FetchContent_Declare() [this option is not available in CMake < 3.18]. +# As of July 2022, one of our supported platforms, Centos 7, comes with CMake 3.17. +FetchContent_GetProperties(lz4) +if(NOT lz4_POPULATED) + FetchContent_Populate(lz4) + # the top level doesn't contain CMakeLists.txt, it is in the "build/cmake" subdirectory + add_subdirectory(${lz4_SOURCE_DIR}/build/cmake ${lz4_BINARY_DIR}) +endif() + +add_library(lz4::lz4 ALIAS lz4_static) + +# Set variables +set(LZ4_FOUND "YES" CACHE STRING "" FORCE) +set(LZ4_INCLUDE_DIRS "${lz4_SOURCE_DIR}/lib" CACHE STRING "" FORCE) +if (WIN32) + set(LZ4_LIBRARIES "${lz4_BINARY_DIR}/lib/${CMAKE_BUILD_TYPE}/lz4_static.lib" CACHE STRING "" FORCE) +else() + set(LZ4_LIBRARIES "${lz4_BINARY_DIR}/liblz4.a" CACHE STRING "" FORCE) +endif() + +# Set exported variables for FindPackage.cmake +set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_LZ4_INCLUDE_DIRS=${LZ4_INCLUDE_DIRS}" CACHE STRING "" FORCE) +set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_LZ4_LIBRARIES=${LZ4_LIBRARIES}" CACHE STRING "" FORCE) diff --git a/cmake/Zstd.cmake b/cmake/Zstd.cmake new file mode 100644 index 000000000..7d8a2313a --- /dev/null +++ b/cmake/Zstd.cmake @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include(FetchContent) + +set(ZSTD_BUILD_SHARED OFF CACHE BOOL "" FORCE) + +if (WIN32) + set(PATCH_FILE "${CMAKE_SOURCE_DIR}/thirdparty/zstd/exclude_gcc_clang_compiler_options_from_windows.patch") + set(PC "${Patch_EXECUTABLE}" -p1 -i "${PATCH_FILE}") +endif() + +FetchContent_Declare(zstd + URL https://github.com/facebook/zstd/archive/refs/tags/v1.5.2.tar.gz + URL_HASH SHA256=f7de13462f7a82c29ab865820149e778cbfe01087b3a55b5332707abf9db4a6e + PATCH_COMMAND "${PC}" +) + +# With CMake >= 3.18, this block could be replaced with FetchContent_MakeAvailable(zstd), +# if we add the `SOURCE_SUBDIR build/cmake` option to FetchContent_Declare() [this option is not available in CMake < 3.18]. +# As of July 2022, one of our supported platforms, Centos 7, comes with CMake 3.17. +FetchContent_GetProperties(zstd) +if(NOT zstd_POPULATED) + FetchContent_Populate(zstd) + # the top level doesn't contain CMakeLists.txt, it is in the "build/cmake" subdirectory + add_subdirectory(${zstd_SOURCE_DIR}/build/cmake ${zstd_BINARY_DIR}) +endif() + +add_library(zstd::zstd ALIAS libzstd_static) + +# Set variables +set(ZSTD_FOUND "YES" CACHE STRING "" FORCE) +set(ZSTD_INCLUDE_DIRS "${zstd_SOURCE_DIR}/lib" CACHE STRING "" FORCE) +if (WIN32) + set(ZSTD_LIBRARIES "${zstd_BINARY_DIR}/lib/${CMAKE_BUILD_TYPE}/zstd_static.lib" CACHE STRING "" FORCE) +else() + set(ZSTD_LIBRARIES "${zstd_BINARY_DIR}/lib/libzstd.a" CACHE STRING "" FORCE) +endif() + +# Set exported variables for FindPackage.cmake +set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_ZSTD_INCLUDE_DIRS=${ZSTD_INCLUDE_DIRS}" CACHE STRING "" FORCE) +set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_ZSTD_LIBRARIES=${ZSTD_LIBRARIES}" CACHE STRING "" FORCE) diff --git a/cmake/lz4/dummy/Findlz4.cmake b/cmake/lz4/dummy/Findlz4.cmake new file mode 100644 index 000000000..b5ab2e66e --- /dev/null +++ b/cmake/lz4/dummy/Findlz4.cmake @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Dummy lz4 find for when we use bundled version +if(NOT LZ4_FOUND) + set(LZ4_FOUND "YES" CACHE STRING "" FORCE) + set(LZ4_INCLUDE_DIR "${EXPORTED_LZ4_INCLUDE_DIRS}" CACHE STRING "" FORCE) + set(LZ4_INCLUDE_DIRS "${EXPORTED_LZ4_INCLUDE_DIRS}" CACHE STRING "" FORCE) + set(LZ4_LIBRARIES "${EXPORTED_LZ4_LIBRARIES}" CACHE STRING "" FORCE) +endif() + +if(NOT TARGET lz4::lz4) + add_library(lz4::lz4 STATIC IMPORTED) + set_target_properties(lz4::lz4 PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${LZ4_INCLUDE_DIRS}") + set_target_properties(lz4::lz4 PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${LZ4_LIBRARIES}") +endif() diff --git a/cmake/zstd/dummy/Findzstd.cmake b/cmake/zstd/dummy/Findzstd.cmake new file mode 100644 index 000000000..4d857e32c --- /dev/null +++ b/cmake/zstd/dummy/Findzstd.cmake @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Dummy zstd find for when we use bundled version +if(NOT zstd_FOUND) + set(zstd_FOUND "YES" CACHE STRING "" FORCE) + set(ZSTD_INCLUDE_DIR "${EXPORTED_ZSTD_INCLUDE_DIRS}" CACHE STRING "" FORCE) + set(ZSTD_INCLUDE_DIRS "${EXPORTED_ZSTD_INCLUDE_DIRS}" CACHE STRING "" FORCE) + set(ZSTD_LIBRARIES "${EXPORTED_ZSTD_LIBRARIES}" CACHE STRING "" FORCE) +endif() + +if(NOT TARGET zstd::zstd) + add_library(zstd::zstd STATIC IMPORTED) + set_target_properties(zstd::zstd PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${ZSTD_INCLUDE_DIRS}") + set_target_properties(zstd::zstd PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${ZSTD_LIBRARIES}") +endif() diff --git a/conf/minifi.properties b/conf/minifi.properties index 491a94016..856de8fb1 100644 --- a/conf/minifi.properties +++ b/conf/minifi.properties @@ -28,9 +28,11 @@ nifi.provenance.repository.directory.default=${MINIFI_HOME}/provenance_repositor nifi.provenance.repository.max.storage.time=1 MIN nifi.provenance.repository.max.storage.size=1 MB nifi.flowfile.repository.directory.default=${MINIFI_HOME}/flowfile_repository +# nifi.flowfile.repository.rocksdb.compression=auto nifi.database.content.repository.directory.default=${MINIFI_HOME}/content_repository nifi.provenance.repository.class.name=NoOpRepository nifi.content.repository.class.name=DatabaseContentRepository +# nifi.content.repository.rocksdb.compression=auto #nifi.remote.input.secure=true #nifi.security.need.ClientAuth= diff --git a/extensions/libarchive/CMakeLists.txt b/extensions/libarchive/CMakeLists.txt index 674306dc7..2b89d3ea7 100644 --- a/extensions/libarchive/CMakeLists.txt +++ b/extensions/libarchive/CMakeLists.txt @@ -27,12 +27,6 @@ if (NOT DISABLE_LZMA) list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/liblzma/dummy") endif() -if (NOT DISABLE_BZIP2) - include(BundledBZip2) - use_bundled_bzip2(${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR}) - list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/bzip2/dummy") -endif() - include(BundledLibArchive) use_bundled_libarchive(${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR}) diff --git a/extensions/rocksdb-repos/DatabaseContentRepository.cpp b/extensions/rocksdb-repos/DatabaseContentRepository.cpp index 881de1085..9beeb2498 100644 --- a/extensions/rocksdb-repos/DatabaseContentRepository.cpp +++ b/extensions/rocksdb-repos/DatabaseContentRepository.cpp @@ -53,10 +53,13 @@ bool DatabaseContentRepository::initialize(const std::shared_ptr<minifi::Configu db_opts.set(&rocksdb::DBOptions::env, rocksdb::Env::Default()); } }; - auto set_cf_opts = [] (rocksdb::ColumnFamilyOptions& cf_opts){ + auto set_cf_opts = [&configuration] (rocksdb::ColumnFamilyOptions& cf_opts) { cf_opts.OptimizeForPointLookup(4); cf_opts.merge_operator = std::make_shared<StringAppender>(); cf_opts.max_successive_merges = 0; + if (auto compression_type = minifi::internal::readConfiguredCompressionType(configuration, Configure::nifi_content_repository_rocksdb_compression)) { + cf_opts.compression = *compression_type; + } }; db_ = minifi::internal::RocksDatabase::create(set_db_opts, set_cf_opts, directory_); if (db_->open()) { diff --git a/extensions/rocksdb-repos/FlowFileRepository.cpp b/extensions/rocksdb-repos/FlowFileRepository.cpp index ecb9a1bcc..8637b4282 100644 --- a/extensions/rocksdb-repos/FlowFileRepository.cpp +++ b/extensions/rocksdb-repos/FlowFileRepository.cpp @@ -220,11 +220,14 @@ bool FlowFileRepository::initialize(const std::shared_ptr<Configure> &configure) // To avoid DB write issues during heavy load it's recommended to have high number of buffer. // Rocksdb's stall feature can also trigger in case the number of buffers is >= 3. // The more buffers we have the more memory rocksdb can utilize without significant memory consumption under low load. - auto cf_options = [] (rocksdb::ColumnFamilyOptions& cf_opts) { + auto cf_options = [&configure] (rocksdb::ColumnFamilyOptions& cf_opts) { cf_opts.OptimizeForPointLookup(4); cf_opts.write_buffer_size = 8ULL << 20U; cf_opts.max_write_buffer_number = 20; cf_opts.min_write_buffer_number_to_merge = 1; + if (auto compression_type = minifi::internal::readConfiguredCompressionType(configure, Configure::nifi_flow_repository_rocksdb_compression)) { + cf_opts.compression = *compression_type; + } }; db_ = minifi::internal::RocksDatabase::create(db_options, cf_options, directory_); if (db_->open()) { diff --git a/extensions/rocksdb-repos/database/RocksDbUtils.cpp b/extensions/rocksdb-repos/database/RocksDbUtils.cpp new file mode 100644 index 000000000..d3834dfef --- /dev/null +++ b/extensions/rocksdb-repos/database/RocksDbUtils.cpp @@ -0,0 +1,54 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "RocksDbUtils.h" + +#include <string> + +#include "Exception.h" + +namespace org::apache::nifi::minifi::internal { + +std::optional<rocksdb::CompressionType> readConfiguredCompressionType(const std::shared_ptr<Configure> &configuration, const std::string& config_key) { + std::string value; + if (!configuration->get(config_key, value) || value.empty()) { + return std::nullopt; + } +#ifdef WIN32 + if (value == "auto" || value == "xpress") { + return rocksdb::CompressionType::kXpressCompression; + } else { + throw Exception(REPOSITORY_EXCEPTION, "RocksDB compression type not supported: " + value); + } +#else + if (value == "zlib") { + return rocksdb::CompressionType::kZlibCompression; + } else if (value == "bzip2") { + return rocksdb::CompressionType::kBZip2Compression; + } else if (value == "auto" || value == "zstd") { + return rocksdb::CompressionType::kZSTD; + } else if (value == "lz4") { + return rocksdb::CompressionType::kLZ4Compression; + } else if (value == "lz4hc") { + return rocksdb::CompressionType::kLZ4HCCompression; + } else { + throw Exception(REPOSITORY_EXCEPTION, "RocksDB compression type not supported: " + value); + } +#endif +} + +} // namespace org::apache::nifi::minifi::internal diff --git a/extensions/rocksdb-repos/database/RocksDbUtils.h b/extensions/rocksdb-repos/database/RocksDbUtils.h index d42afce6a..b5748c117 100644 --- a/extensions/rocksdb-repos/database/RocksDbUtils.h +++ b/extensions/rocksdb-repos/database/RocksDbUtils.h @@ -15,19 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once #include <functional> #include <algorithm> +#include <optional> +#include <memory> +#include <string> + #include "rocksdb/db.h" +#include "rocksdb/options.h" #include "utils/GeneralUtils.h" +#include "properties/Configure.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace internal { +namespace org::apache::nifi::minifi::internal { enum class RocksDbMode { ReadOnly, @@ -69,8 +70,6 @@ class Writable { using DBOptionsPatch = std::function<void(Writable<rocksdb::DBOptions>&)>; using ColumnFamilyOptionsPatch = std::function<void(rocksdb::ColumnFamilyOptions&)>; -} // namespace internal -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +std::optional<rocksdb::CompressionType> readConfiguredCompressionType(const std::shared_ptr<Configure> &configuration, const std::string& config_key); + +} // namespace org::apache::nifi::minifi::internal diff --git a/extensions/rocksdb-repos/database/StringAppender.cpp b/extensions/rocksdb-repos/database/StringAppender.cpp index 4580dcdbc..7f46e5943 100644 --- a/extensions/rocksdb-repos/database/StringAppender.cpp +++ b/extensions/rocksdb-repos/database/StringAppender.cpp @@ -39,7 +39,7 @@ bool StringAppender::Merge(const rocksdb::Slice& /*key*/, const rocksdb::Slice* return true; } -static auto string_appender_registrar = rocksdb::ObjectLibrary::Default()->Register<StringAppender>( +static auto string_appender_registrar = rocksdb::ObjectLibrary::Default()->AddFactory<StringAppender>( "StringAppender", [] (const std::string& /* uri */, std::unique_ptr<StringAppender>* out, std::string* /* errmsg */) { *out = std::make_unique<StringAppender>(); diff --git a/extensions/standard-processors/tests/unit/PutTCPTests.cpp b/extensions/standard-processors/tests/unit/PutTCPTests.cpp index ac44c4db8..524c451b2 100644 --- a/extensions/standard-processors/tests/unit/PutTCPTests.cpp +++ b/extensions/standard-processors/tests/unit/PutTCPTests.cpp @@ -202,8 +202,8 @@ class PutTCPTestFixture { put_tcp_->setProperty(PutTCP::Port, utils::StringUtils::join_pack("${literal('", std::to_string(port), "')}")); } - void setPutTCPPort(std::string port_str) { - put_tcp_->setProperty(PutTCP::Port, std::move(port_str)); + void setPutTCPPort(const std::string& port_str) { + put_tcp_->setProperty(PutTCP::Port, port_str); } [[nodiscard]] uint16_t getSinglePort() const { diff --git a/libminifi/include/properties/Configuration.h b/libminifi/include/properties/Configuration.h index 25e6b5ebc..978600bb3 100644 --- a/libminifi/include/properties/Configuration.h +++ b/libminifi/include/properties/Configuration.h @@ -51,7 +51,9 @@ class Configuration : public Properties { static constexpr const char *nifi_server_name = "nifi.server.name"; static constexpr const char *nifi_configuration_class_name = "nifi.flow.configuration.class.name"; static constexpr const char *nifi_flow_repository_class_name = "nifi.flowfile.repository.class.name"; + static constexpr const char *nifi_flow_repository_rocksdb_compression = "nifi.flowfile.repository.rocksdb.compression"; static constexpr const char *nifi_content_repository_class_name = "nifi.content.repository.class.name"; + static constexpr const char *nifi_content_repository_rocksdb_compression = "nifi.content.repository.rocksdb.compression"; static constexpr const char *nifi_provenance_repository_class_name = "nifi.provenance.repository.class.name"; static constexpr const char *nifi_volatile_repository_options_flowfile_max_count = "nifi.volatile.repository.options.flowfile.max.count"; static constexpr const char *nifi_volatile_repository_options_flowfile_max_bytes = "nifi.volatile.repository.options.flowfile.max.bytes"; diff --git a/libminifi/src/Configuration.cpp b/libminifi/src/Configuration.cpp index 78ae790f7..c584cb574 100644 --- a/libminifi/src/Configuration.cpp +++ b/libminifi/src/Configuration.cpp @@ -36,7 +36,9 @@ const std::vector<core::ConfigurationProperty> Configuration::CONFIGURATION_PROP core::ConfigurationProperty{Configuration::nifi_server_name}, core::ConfigurationProperty{Configuration::nifi_configuration_class_name}, core::ConfigurationProperty{Configuration::nifi_flow_repository_class_name}, + core::ConfigurationProperty{Configuration::nifi_flow_repository_rocksdb_compression}, core::ConfigurationProperty{Configuration::nifi_content_repository_class_name}, + core::ConfigurationProperty{Configuration::nifi_content_repository_rocksdb_compression}, core::ConfigurationProperty{Configuration::nifi_provenance_repository_class_name}, core::ConfigurationProperty{Configuration::nifi_volatile_repository_options_flowfile_max_count, gsl::make_not_null(core::StandardValidators::get().UNSIGNED_INT_VALIDATOR.get())}, core::ConfigurationProperty{Configuration::nifi_volatile_repository_options_flowfile_max_bytes, gsl::make_not_null(core::StandardValidators::get().DATA_SIZE_VALIDATOR.get())}, diff --git a/thirdparty/zstd/exclude_gcc_clang_compiler_options_from_windows.patch b/thirdparty/zstd/exclude_gcc_clang_compiler_options_from_windows.patch new file mode 100644 index 000000000..c32f78f4f --- /dev/null +++ b/thirdparty/zstd/exclude_gcc_clang_compiler_options_from_windows.patch @@ -0,0 +1,37 @@ +diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt +index 8bba6ea6..ce84ed8c 100644 +--- a/build/cmake/tests/CMakeLists.txt ++++ b/build/cmake/tests/CMakeLists.txt +@@ -57,7 +57,9 @@ target_link_libraries(datagen libzstd_static) + # fullbench + # + add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c) +-set_property(TARGET fullbench APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations") ++if (NOT WIN32) ++ set_property(TARGET fullbench APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations") ++endif() + target_link_libraries(fullbench libzstd_static) + add_test(NAME fullbench COMMAND fullbench ${ZSTD_FULLBENCH_FLAGS}) + +@@ -65,7 +67,9 @@ add_test(NAME fullbench COMMAND fullbench ${ZSTD_FULLBENCH_FLAGS}) + # fuzzer + # + add_executable(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/fuzzer.c) +-set_property(TARGET fuzzer APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations") ++if (NOT WIN32) ++ set_property(TARGET fuzzer APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations") ++endif() + target_link_libraries(fuzzer libzstd_static) + AddTestFlagsOption(ZSTD_FUZZER_FLAGS "$ENV{FUZZERTEST} $ENV{FUZZER_FLAGS}" + "Semicolon-separated list of flags to pass to the fuzzer test (see `fuzzer -h` for usage)") +@@ -78,7 +82,9 @@ add_test(NAME fuzzer COMMAND fuzzer ${ZSTD_FUZZER_FLAGS}) + # zstreamtest + # + add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c) +-set_property(TARGET zstreamtest APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations") ++if (NOT WIN32) ++ set_property(TARGET zstreamtest APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations") ++endif() + target_link_libraries(zstreamtest libzstd_static) + AddTestFlagsOption(ZSTD_ZSTREAM_FLAGS "$ENV{ZSTREAM_TESTTIME} $ENV{FUZZER_FLAGS}" + "Semicolon-separated list of flags to pass to the zstreamtest test (see `zstreamtest -h` for usage)")
