This is an automated email from the ASF dual-hosted git repository.
weibin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new e6b95c8f fix(c++): Add FinalizeS3 function to release S3 resource
(#571)
e6b95c8f is described below
commit e6b95c8fc4cbcabd751a53731bd7cf7aa773763f
Author: Weibin Zeng <[email protected]>
AuthorDate: Mon Aug 5 17:56:04 2024 +0800
fix(c++): Add FinalizeS3 function to release S3 resource (#571)
Signed-off-by: acezen <[email protected]>
---
cpp/src/graphar/filesystem.cc | 11 +++++++++++
cpp/src/graphar/filesystem.h | 23 +++++++++++++++++++++++
cpp/test/test_info.cc | 7 ++++++-
docs/libraries/cpp/getting-started.md | 12 ++++++++++++
4 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/cpp/src/graphar/filesystem.cc b/cpp/src/graphar/filesystem.cc
index eb526d76..0ccbc5a9 100644
--- a/cpp/src/graphar/filesystem.cc
+++ b/cpp/src/graphar/filesystem.cc
@@ -313,6 +313,17 @@ Result<std::shared_ptr<FileSystem>>
FileSystemFromUriOrPath(
return std::make_shared<FileSystem>(arrow_fs);
}
+Status InitializeS3() {
+ RETURN_NOT_ARROW_OK(
+ arrow::fs::InitializeS3(arrow::fs::S3GlobalOptions::Defaults()));
+ return Status::OK();
+}
+
+Status FinalizeS3() {
+ RETURN_NOT_ARROW_OK(arrow::fs::FinalizeS3());
+ return Status::OK();
+}
+
/// template specialization for std::string
template Result<IdType> FileSystem::ReadFileToValue<IdType>(
const std::string&) const noexcept;
diff --git a/cpp/src/graphar/filesystem.h b/cpp/src/graphar/filesystem.h
index ec11216a..309bb564 100644
--- a/cpp/src/graphar/filesystem.h
+++ b/cpp/src/graphar/filesystem.h
@@ -153,4 +153,27 @@ class FileSystem {
Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
const std::string& uri, std::string* out_path = nullptr);
+/**
+ * @brief Initialize the S3 APIs.
+ *
+ * It is required to call this function at least once before using S3
+ * FileSystem. Once this function is called you MUST call FinalizeS3 before the
+ * end of the application in order to avoid a segmentation fault at shutdown.
+ *
+ * This function calls arrow:fs::Initialize() internally.
+ *
+ */
+Status InitializeS3();
+
+/**
+ * @brief Shutdown the S3 APIs.
+ *
+ * This function should be called before the program exits to ensure that
+ * all S3 resources are properly released.
+ *
+ * This function calls arrow:fs::FinalizeS3() internally.
+ *
+ */
+Status FinalizeS3();
+
} // namespace graphar
diff --git a/cpp/test/test_info.cc b/cpp/test/test_info.cc
index 3216b8ed..2799d11e 100644
--- a/cpp/test/test_info.cc
+++ b/cpp/test/test_info.cc
@@ -774,7 +774,10 @@ extra_info:
}
}
-TEST_CASE_METHOD(GlobalFixture, "LoadFromS3", "[.hidden]") {
+TEST_CASE_METHOD(GlobalFixture, "LoadFromS3") {
+ // explicitly call InitS3 to initialize S3 APIs before using
+ // S3 file system.
+ InitializeS3();
std::string path =
"s3://graphar/ldbc/ldbc.graph.yml"
"?endpoint_override=graphscope.oss-cn-beijing.aliyuncs.com";
@@ -787,5 +790,7 @@ TEST_CASE_METHOD(GlobalFixture, "LoadFromS3", "[.hidden]") {
const auto& edge_infos = graph_info->GetEdgeInfos();
REQUIRE(vertex_infos.size() == 8);
REQUIRE(edge_infos.size() == 23);
+ // explicitly call FinalizeS3 to avoid memory leak
+ FinalizeS3();
}
} // namespace graphar
diff --git a/docs/libraries/cpp/getting-started.md
b/docs/libraries/cpp/getting-started.md
index 80f285c4..7265026d 100644
--- a/docs/libraries/cpp/getting-started.md
+++ b/docs/libraries/cpp/getting-started.md
@@ -276,3 +276,15 @@ is used to write the results to new generated data chunks.
Please refer to [more examples](examples/out-of-core.md) to learn
about the other available case studies utilizing GraphAr.
+
+### Working with Cloud Storage (S3, OSS)
+
+GraphAr supports reading and writing data from and to cloud storage, including
+AWS S3 and Alibaba Cloud OSS.
+
+To read data from cloud storage, you can specify the path of the data files
+with URI schema, e.g., "s3://bucket-name/path/to/data" or
"s3://\[access-key:secret-key\]@bucket-name/path/to/data".
+
+[Code
example](https://github.com/apache/incubator-graphar/blob/main/cpp/test/test_info.cc#L777-L792)
demonstrates how to read data from S3.
+
+Note that once you use cloud storage, you need to call `graphar::InitalizeS3`
to initialize S3 APIs before starting the work and call`graphar::FinalizeS3()`
to shut down the APIs after the work finish.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]