This is an automated email from the ASF dual-hosted git repository. silver pushed a commit to branch cpp-more-operation in repository https://gitbox.apache.org/repos/asf/incubator-opendal.git
commit 4f7dad3ed3e339c09811a1c152e31321b84ab733 Author: silver-ymz <[email protected]> AuthorDate: Fri Sep 1 22:48:41 2023 +0800 feat(bindings/cpp): expose all api returned by value Signed-off-by: silver-ymz <[email protected]> --- .github/workflows/bindings_cpp.yml | 18 ++--- Cargo.lock | 1 + bindings/cpp/CMakeLists.txt | 13 +++- bindings/cpp/CONTRIBUTING.md | 18 +++-- bindings/cpp/Cargo.toml | 1 + bindings/cpp/include/opendal.hpp | 97 ++++++++++++++++++++++- bindings/cpp/src/lib.rs | 154 +++++++++++++++++++++++++++++++++++-- bindings/cpp/src/opendal.cpp | 104 ++++++++++++++++++++----- bindings/cpp/tests/basic_test.cpp | 50 ++++++++++-- 9 files changed, 404 insertions(+), 52 deletions(-) diff --git a/.github/workflows/bindings_cpp.yml b/.github/workflows/bindings_cpp.yml index 03cad7633..80d18a0e7 100644 --- a/.github/workflows/bindings_cpp.yml +++ b/.github/workflows/bindings_cpp.yml @@ -45,25 +45,23 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt-get install libgtest-dev ninja-build - cd /usr/src/gtest - sudo cmake CMakeLists.txt - sudo make - sudo cp lib/*.a /usr/lib - sudo ln -s /usr/lib/libgtest.a /usr/local/lib/libgtest.a - sudo ln -s /usr/lib/libgtest_main.a /usr/local/lib/libgtest_main.a + sudo apt-get install libgtest-dev ninja-build libboost-all-dev valgrind - name: Setup Rust toolchain uses: ./.github/actions/setup - - name: Build Cpp binding + - name: Build Cpp binding && Run tests working-directory: "bindings/cpp" run: | mkdir build cd build cmake -GNinja .. ninja + ./opendal_cpp_test - - name: Run tests + - name: Run tests with valgrind working-directory: "bindings/cpp/build" - run: ./opendal_cpp_test + run: | + cmake -GNinja -DENABLE_ADDRESS_SANITIZER=OFF .. + ninja + valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose ./opendal_cpp_test diff --git a/Cargo.lock b/Cargo.lock index 8fe3a49f3..6cf5fc04d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3599,6 +3599,7 @@ name = "opendal-cpp" version = "0.1.0" dependencies = [ "anyhow", + "chrono", "cxx", "cxx-build", "opendal", diff --git a/bindings/cpp/CMakeLists.txt b/bindings/cpp/CMakeLists.txt index ebffe0d5a..9603de8eb 100644 --- a/bindings/cpp/CMakeLists.txt +++ b/bindings/cpp/CMakeLists.txt @@ -25,6 +25,8 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Debug) endif() +option(ENABLE_ADDRESS_SANITIZER "Enable address sanitizer" ON) + # cargo target dir must be absolute, otherwise some build target cannot find it get_filename_component(CARGO_TARGET_DIR ${CMAKE_SOURCE_DIR}/../../target ABSOLUTE) set(CARGO_MANIFEST ${CMAKE_SOURCE_DIR}/Cargo.toml) @@ -42,9 +44,12 @@ add_custom_command( COMMENT "Running cargo..." ) +find_package(Boost REQUIRED COMPONENTS date_time) + add_library(opendal_cpp STATIC ${CPP_SOURCE_FILE} ${RUST_BRIDGE_CPP}) -target_include_directories(opendal_cpp PUBLIC ${CPP_INCLUDE_DIR}) -target_link_libraries(opendal_cpp PUBLIC ${RUST_LIB} ${CMAKE_DL_LIBS}) +target_include_directories(opendal_cpp PUBLIC ${CPP_INCLUDE_DIR} Boost::date_time) +target_link_libraries(opendal_cpp PUBLIC ${RUST_LIB}) +target_link_libraries(opendal_cpp PRIVATE ${CMAKE_DL_LIBS} Boost::date_time) set_target_properties(opendal_cpp PROPERTIES ADDITIONAL_CLEAN_FILES ${CARGO_TARGET_DIR} ) @@ -71,7 +76,9 @@ target_link_libraries(opendal_cpp_test ${GTEST_LDFLAGS} GTest::gtest_main openda target_compile_options(opendal_cpp_test PRIVATE ${GTEST_CFLAGS}) # enable address sanitizers -if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") +if (ENABLE_ADDRESS_SANITIZER) + target_compile_options(opendal_cpp PRIVATE -fsanitize=leak,address,undefined -fno-omit-frame-pointer -fno-common -O1) + target_link_options(opendal_cpp PRIVATE -fsanitize=leak,address,undefined) target_compile_options(opendal_cpp_test PRIVATE -fsanitize=leak,address,undefined -fno-omit-frame-pointer -fno-common -O1) target_link_options(opendal_cpp_test PRIVATE -fsanitize=leak,address,undefined) endif() diff --git a/bindings/cpp/CONTRIBUTING.md b/bindings/cpp/CONTRIBUTING.md index a6238d7b5..c76dcda44 100644 --- a/bindings/cpp/CONTRIBUTING.md +++ b/bindings/cpp/CONTRIBUTING.md @@ -32,6 +32,8 @@ To build OpenDAL C++ binding, the following is all you need: - **GTest(Google Test)**. It is used to run the tests. To see how to build, check [here](https://github.com/google/googletest). +- **Boost**. It is one dependency of this library. To see how to build, check [here](https://www.boost.org/doc/libs/1_76_0/more/getting_started/unix-variants.html). + - **Doxygen**. It is used to generate the documentation. To see how to build, check [here](https://www.doxygen.nl/manual/install.html). - **Graphviz**. It is used to generate the documentation with graphs. To see how to build, check [here](https://graphviz.org/download/). @@ -48,14 +50,11 @@ sudo apt install cmake ninja-build # install clang-format sudo apt install clang-format -# install and build GTest library under /usr/lib and softlink to /usr/local/lib +# install GTest library sudo apt-get install libgtest-dev -cd /usr/src/gtest -sudo cmake CMakeLists.txt -sudo make -sudo cp lib/*.a /usr/lib -sudo ln -s /usr/lib/libgtest.a /usr/local/lib/libgtest.a -sudo ln -s /usr/lib/libgtest_main.a /usr/local/lib/libgtest_main.a + +# install Boost library +sudo apt install libboost-all-dev # install Doxygen and Graphviz sudo apt install doxygen graphviz @@ -76,6 +75,9 @@ brew install clang-format # install GTest library brew install googletest +# install Boost library +brew install boost + # install Doxygen and Graphviz brew install doxygen graphviz ``` @@ -89,7 +91,7 @@ mkdir build cd build # Add -DCMAKE_EXPORT_COMPILE_COMMANDS=1 to generate compile_commands.json for clangd -cmake -DCMAKE_BUILD_TYPE=Debug -GNinja .. +cmake -GNinja .. ninja ``` diff --git a/bindings/cpp/Cargo.toml b/bindings/cpp/Cargo.toml index 55fcce62f..21f890f0c 100644 --- a/bindings/cpp/Cargo.toml +++ b/bindings/cpp/Cargo.toml @@ -34,6 +34,7 @@ crate-type = ["staticlib"] opendal.workspace = true cxx = "1.0" anyhow = "1.0" +chrono = "0.4" [build-dependencies] cxx-build = "1.0" diff --git a/bindings/cpp/include/opendal.hpp b/bindings/cpp/include/opendal.hpp index 22efc5046..44516ba92 100644 --- a/bindings/cpp/include/opendal.hpp +++ b/bindings/cpp/include/opendal.hpp @@ -20,6 +20,7 @@ #pragma once #include "lib.rs.h" +#include <boost/date_time/posix_time/posix_time.hpp> #include <memory> #include <optional> #include <string> @@ -28,11 +29,48 @@ namespace opendal { +/** + * @enum EntryMode + * @brief The mode of the entry + */ +enum EntryMode { + FILE, + DIR, + UNKNOWN, +}; + +/** + * @struct Metadata + * @brief The metadata of a file or directory + */ +struct Metadata { + EntryMode type; + std::uint64_t content_length; + std::optional<std::string> cache_control; + std::optional<std::string> content_disposition; + std::optional<std::string> content_md5; + std::optional<std::string> content_type; + std::optional<std::string> etag; + std::optional<boost::posix_time::ptime> last_modified; + + Metadata(ffi::Metadata &&); +}; + +/** + * @struct Entry + * @brief The entry of a file or directory + */ +struct Entry { + std::string path; + + Entry(ffi::Entry &&); +}; + /** * @class Operator * @brief Operator is the entry for all public APIs. */ -class Operator : std::enable_shared_from_this<Operator> { +class Operator { public: Operator() = default; @@ -63,6 +101,8 @@ public: /** * @brief Read data from the operator + * @note The operation will make unneccessary copy. So we recommend to use the + * `reader` method. * * @param path The path of the data * @return The data read from the operator @@ -77,6 +117,61 @@ public: */ void write(std::string_view path, const std::vector<uint8_t> &data); + /** + * @brief Check if the path exists + * + * @param path The path to check + * @return true if the path exists, false otherwise + */ + bool is_exist(std::string_view path); + + /** + * @brief Create a directory + * + * @param path The path of the directory + */ + void create_dir(std::string_view path); + + /** + * @brief Copy a file from src to dst. + * + * @param src The source path + * @param dst The destination path + */ + void copy(std::string_view src, std::string_view dst); + + /** + * @brief Rename a file from src to dst. + * + * @param src The source path + * @param dst The destination path + */ + void rename(std::string_view src, std::string_view dst); + + /** + * @brief Remove a file or directory + * + * @param path The path of the file or directory + */ + void remove(std::string_view path); + + /** + * @brief Get the metadata of a file or directory + * + * @param path The path of the file or directory + * @return The metadata of the file or directory + */ + Metadata stat(std::string_view path); + + /** + * @brief List the entries of a directory + * @note The returned entries are sorted by name. + * + * @param path The path of the directory + * @return The entries of the directory + */ + std::vector<Entry> list(std::string_view path); + private: std::optional<rust::Box<opendal::ffi::Operator>> operator_; }; diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index ff9f68e12..6b4cc4af4 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -17,7 +17,6 @@ use anyhow::Result; use opendal as od; -use std::collections::HashMap; use std::str::FromStr; #[cxx::bridge(namespace = "opendal::ffi")] @@ -27,12 +26,40 @@ mod ffi { value: String, } + struct Metadata { + // tag layout: (8 bits flagset) + // 0-1: mode, 2: has_cache_control, 3: has_content_disposition, 4: has_content_md5, + // 5: has_content_type, 6: has_etag, 7: has_last_modified + // + // mode enum: (2 bits) + // 1: file, 2: dir, 0,3: unknown + tag: u8, + content_length: u64, + cache_control: String, + content_disposition: String, + content_md5: String, + content_type: String, + etag: String, + last_modified: String, + } + + struct Entry { + path: String, + } + extern "Rust" { type Operator; fn new_operator(scheme: &str, configs: Vec<HashMapValue>) -> Result<Box<Operator>>; - fn read(&self, path: &str) -> Result<Vec<u8>>; - fn write(&self, path: &str, bs: &[u8]) -> Result<()>; + fn read(self: &Operator, path: &str) -> Result<Vec<u8>>; + fn write(self: &Operator, path: &str, bs: &'static [u8]) -> Result<()>; + fn is_exist(self: &Operator, path: &str) -> Result<bool>; + fn create_dir(self: &Operator, path: &str) -> Result<()>; + fn copy(self: &Operator, src: &str, dst: &str) -> Result<()>; + fn rename(self: &Operator, src: &str, dst: &str) -> Result<()>; + fn remove(self: &Operator, path: &str) -> Result<()>; + fn stat(self: &Operator, path: &str) -> Result<Metadata>; + fn list(self: &Operator, path: &str) -> Result<Vec<Entry>>; } } @@ -44,7 +71,7 @@ fn new_operator(scheme: &str, configs: Vec<ffi::HashMapValue>) -> Result<Box<Ope let map = configs .into_iter() .map(|value| (value.key, value.value)) - .collect::<HashMap<_, _>>(); + .collect(); let op = Box::new(Operator(od::Operator::via_map(scheme, map)?.blocking())); @@ -56,7 +83,122 @@ impl Operator { Ok(self.0.read(path)?) } - fn write(&self, path: &str, bs: &[u8]) -> Result<()> { - Ok(self.0.write(path, bs.to_owned())?) + // To avoid copying the bytes, we use &'static [u8] here. + // + // Safety: The bytes created from bs will be droped after the function call. + // So it's safe to decalre its lifetime as 'static. + fn write(&self, path: &str, bs: &'static [u8]) -> Result<()> { + Ok(self.0.write(path, bs)?) + } + + fn is_exist(&self, path: &str) -> Result<bool> { + Ok(self.0.is_exist(path)?) + } + + fn create_dir(&self, path: &str) -> Result<()> { + Ok(self.0.create_dir(path)?) + } + + fn copy(&self, src: &str, dst: &str) -> Result<()> { + Ok(self.0.copy(src, dst)?) + } + + fn rename(&self, src: &str, dst: &str) -> Result<()> { + Ok(self.0.rename(src, dst)?) + } + + // We can't name it to delete because it's a keyword in C++ + fn remove(&self, path: &str) -> Result<()> { + Ok(self.0.delete(path)?) + } + + fn stat(&self, path: &str) -> Result<ffi::Metadata> { + Ok(self.0.stat(path)?.into()) + } + + fn list(&self, path: &str) -> Result<Vec<ffi::Entry>> { + Ok(self.0.list(path)?.into_iter().map(Into::into).collect()) + } +} + +impl From<od::Metadata> for ffi::Metadata { + fn from(meta: od::Metadata) -> Self { + let mut tag = 0u8; + + match meta.mode() { + od::EntryMode::FILE => tag |= 0b0000_0001, + od::EntryMode::DIR => tag |= 0b0000_0010, + _ => {} + } + + let content_length = meta.content_length(); + + let cache_control = match meta.cache_control() { + Some(v) => { + tag |= 0b0000_0100; + v.to_owned() + } + None => String::new(), + }; + + let content_disposition = match meta.content_disposition() { + Some(v) => { + tag |= 0b0000_1000; + v.to_owned() + } + None => String::new(), + }; + + let content_md5 = match meta.content_md5() { + Some(v) => { + tag |= 0b0001_0000; + v.to_owned() + } + None => String::new(), + }; + + let content_type = match meta.content_type() { + Some(v) => { + tag |= 0b0010_0000; + v.to_owned() + } + None => String::new(), + }; + + let etag = match meta.etag() { + Some(v) => { + tag |= 0b0100_0000; + v.to_owned() + } + None => String::new(), + }; + + let last_modified = match meta.last_modified() { + Some(v) => { + tag |= 0b1000_0000; + v.to_rfc3339_opts(chrono::SecondsFormat::Nanos, false) + } + None => String::new(), + }; + + Self { + tag, + content_length, + cache_control, + content_disposition, + content_md5, + content_type, + etag, + last_modified, + } + } +} + +impl From<od::Entry> for ffi::Entry { + fn from(entry: od::Entry) -> Self { + let (path, _) = entry.into_parts(); + Self { + path, + } } } diff --git a/bindings/cpp/src/opendal.cpp b/bindings/cpp/src/opendal.cpp index 49a1fa227..7b12588b8 100644 --- a/bindings/cpp/src/opendal.cpp +++ b/bindings/cpp/src/opendal.cpp @@ -21,37 +21,105 @@ using namespace opendal; +#define RUST_STR(s) rust::Str(s.data(), s.size()) +#define RUST_STRING(s) rust::String(s.data(), s.size()) + Operator::Operator(std::string_view scheme, const std::unordered_map<std::string, std::string> &config) { auto rust_map = rust::Vec<ffi::HashMapValue>(); rust_map.reserve(config.size()); - for (const auto &[k, v] : config) { - rust_map.push_back(ffi::HashMapValue{ - rust::String(k.data()), - rust::String(v.data()), - }); + for (auto &[k, v] : config) { + rust_map.push_back({RUST_STRING(k), RUST_STRING(v)}); } - operator_ = opendal::ffi::new_operator(rust::Str(scheme.data()), rust_map); + operator_ = opendal::ffi::new_operator(RUST_STR(scheme), rust_map); } bool Operator::available() const { return operator_.has_value(); } +// We can't avoid copy, because std::vector hides the internal structure. +// std::vector doesn't support init from a pointer without copy. std::vector<uint8_t> Operator::read(std::string_view path) { - auto rust_vec = operator_.value()->read(rust::Str(path.data())); - - // Convert rust::Vec<uint8_t> to std::vector<uint8_t> - // This cannot use rust vector pointer to init std::vector because - // rust::Vec owns the memory and will free it when it goes out of scope. - std::vector<uint8_t> res; - res.reserve(rust_vec.size()); - std::copy(rust_vec.cbegin(), rust_vec.cend(), std::back_inserter(res)); + auto rust_vec = operator_.value()->read(RUST_STR(path)); - return res; + return {rust_vec.data(), rust_vec.data() + rust_vec.size()}; } void Operator::write(std::string_view path, const std::vector<uint8_t> &data) { operator_.value()->write( - rust::Str(path.data()), - rust::Slice<const uint8_t>(data.data(), data.size())); -} \ No newline at end of file + RUST_STR(path), rust::Slice<const uint8_t>(data.data(), data.size())); +} + +bool Operator::is_exist(std::string_view path) { + return operator_.value()->is_exist(RUST_STR(path)); +} + +void Operator::create_dir(std::string_view path) { + operator_.value()->create_dir(RUST_STR(path)); +} + +void Operator::copy(std::string_view src, std::string_view dst) { + operator_.value()->copy(RUST_STR(src), RUST_STR(dst)); +} + +void Operator::rename(std::string_view src, std::string_view dst) { + operator_.value()->rename(RUST_STR(src), RUST_STR(dst)); +} + +void Operator::remove(std::string_view path) { + operator_.value()->remove(RUST_STR(path)); +} + +Metadata Operator::stat(std::string_view path) { + return {operator_.value()->stat(RUST_STR(path))}; +} + +std::vector<Entry> Operator::list(std::string_view path) { + auto rust_vec = operator_.value()->list(RUST_STR(path)); + + std::vector<Entry> entries; + entries.reserve(rust_vec.size()); + for (auto &&entry : rust_vec) { + entries.push_back(std::move(entry)); + } + return entries; +} + +Metadata::Metadata(ffi::Metadata &&other) { + if (other.tag & 1) { + type = EntryMode::FILE; + } else if (other.tag & 0b10) { + type = EntryMode::DIR; + } else { + type = EntryMode::UNKNOWN; + } + + content_length = other.content_length; + + if (other.tag & 0b100) { + cache_control = std::string(std::move(other.cache_control)); + } + + if (other.tag & 0b1000) { + content_disposition = std::string(std::move(other.content_disposition)); + } + + if (other.tag & 0b10000) { + content_md5 = std::string(std::move(other.content_md5)); + } + + if (other.tag & 0b100000) { + content_type = std::string(std::move(other.content_type)); + } + + if (other.tag & 0b1000000) { + etag = std::string(std::move(other.etag)); + } + + if (other.tag & 0b10000000) { + last_modified = boost::posix_time::from_iso_string( + std::string(std::move(other.last_modified))); + } +} + +Entry::Entry(ffi::Entry &&other) : path(std::move(other.path)) {} \ No newline at end of file diff --git a/bindings/cpp/tests/basic_test.cpp b/bindings/cpp/tests/basic_test.cpp index 32811b75f..d86cb4fd7 100644 --- a/bindings/cpp/tests/basic_test.cpp +++ b/bindings/cpp/tests/basic_test.cpp @@ -19,6 +19,7 @@ #include "opendal.hpp" #include "gtest/gtest.h" +#include <optional> #include <string> #include <unordered_map> @@ -30,22 +31,59 @@ protected: std::unordered_map<std::string, std::string> config; void SetUp() override { - this->scheme = "memory"; - op = opendal::Operator(this->scheme, this->config); + scheme = "memory"; + op = opendal::Operator(scheme, config); - EXPECT_TRUE(this->op.available()); + EXPECT_TRUE(op.available()); } }; // Scenario: OpenDAL Blocking Operations TEST_F(OpendalTest, BasicTest) { - std::string path = "test"; + std::string file_path = "test"; + std::string file_path_copied = "test_copied"; + std::string file_path_renamed = "test_renamed"; + std::string dir_path = "test_dir/"; std::vector<uint8_t> data = {1, 2, 3, 4, 5}; - op.write("test", data); + // write + op.write(file_path, data); - auto res = op.read("test"); + // read + auto res = op.read(file_path); EXPECT_EQ(res, data); + + // is_exist + EXPECT_TRUE(op.is_exist(file_path)); + + // create_dir + op.create_dir(dir_path); + EXPECT_TRUE(op.is_exist(dir_path)); + + // copy + op.copy(file_path, file_path_copied); + EXPECT_TRUE(op.is_exist(file_path_copied)); + + // rename + op.rename(file_path_copied, file_path_renamed); + EXPECT_TRUE(op.is_exist(file_path_renamed)); + + // stat + auto metadata = op.stat(file_path); + EXPECT_EQ(metadata.type, opendal::EntryMode::FILE); + EXPECT_EQ(metadata.content_length, data.size()); + + // list + auto list_file_path = dir_path + file_path; + op.write(list_file_path, data); + auto entries = op.list(dir_path); + EXPECT_EQ(entries.size(), 1); + EXPECT_EQ(entries[0].path, list_file_path); + + // remove + op.remove(file_path_renamed); + op.remove(dir_path); + EXPECT_FALSE(op.is_exist(file_path_renamed)); } int main(int argc, char **argv) {
