This is an automated email from the ASF dual-hosted git repository.

weibin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new e6b95c8f fix(c++): Add FinalizeS3 function to release S3 resource 
(#571)
e6b95c8f is described below

commit e6b95c8fc4cbcabd751a53731bd7cf7aa773763f
Author: Weibin Zeng <[email protected]>
AuthorDate: Mon Aug 5 17:56:04 2024 +0800

    fix(c++): Add FinalizeS3 function to release S3 resource (#571)
    
    
    Signed-off-by: acezen <[email protected]>
---
 cpp/src/graphar/filesystem.cc         | 11 +++++++++++
 cpp/src/graphar/filesystem.h          | 23 +++++++++++++++++++++++
 cpp/test/test_info.cc                 |  7 ++++++-
 docs/libraries/cpp/getting-started.md | 12 ++++++++++++
 4 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/cpp/src/graphar/filesystem.cc b/cpp/src/graphar/filesystem.cc
index eb526d76..0ccbc5a9 100644
--- a/cpp/src/graphar/filesystem.cc
+++ b/cpp/src/graphar/filesystem.cc
@@ -313,6 +313,17 @@ Result<std::shared_ptr<FileSystem>> 
FileSystemFromUriOrPath(
   return std::make_shared<FileSystem>(arrow_fs);
 }
 
+Status InitializeS3() {
+  RETURN_NOT_ARROW_OK(
+      arrow::fs::InitializeS3(arrow::fs::S3GlobalOptions::Defaults()));
+  return Status::OK();
+}
+
+Status FinalizeS3() {
+  RETURN_NOT_ARROW_OK(arrow::fs::FinalizeS3());
+  return Status::OK();
+}
+
 /// template specialization for std::string
 template Result<IdType> FileSystem::ReadFileToValue<IdType>(
     const std::string&) const noexcept;
diff --git a/cpp/src/graphar/filesystem.h b/cpp/src/graphar/filesystem.h
index ec11216a..309bb564 100644
--- a/cpp/src/graphar/filesystem.h
+++ b/cpp/src/graphar/filesystem.h
@@ -153,4 +153,27 @@ class FileSystem {
 Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
     const std::string& uri, std::string* out_path = nullptr);
 
+/**
+ * @brief Initialize the S3 APIs.
+ *
+ * It is required to call this function at least once before using S3
+ * FileSystem. Once this function is called you MUST call FinalizeS3 before the
+ * end of the application in order to avoid a segmentation fault at shutdown.
+ *
+ * This function calls arrow:fs::Initialize() internally.
+ *
+ */
+Status InitializeS3();
+
+/**
+ * @brief Shutdown the S3 APIs.
+ *
+ * This function should be called before the program exits to ensure that
+ * all S3 resources are properly released.
+ *
+ * This function calls arrow:fs::FinalizeS3() internally.
+ *
+ */
+Status FinalizeS3();
+
 }  // namespace graphar
diff --git a/cpp/test/test_info.cc b/cpp/test/test_info.cc
index 3216b8ed..2799d11e 100644
--- a/cpp/test/test_info.cc
+++ b/cpp/test/test_info.cc
@@ -774,7 +774,10 @@ extra_info:
   }
 }
 
-TEST_CASE_METHOD(GlobalFixture, "LoadFromS3", "[.hidden]") {
+TEST_CASE_METHOD(GlobalFixture, "LoadFromS3") {
+  // explicitly call InitS3 to initialize S3 APIs before using
+  // S3 file system.
+  InitializeS3();
   std::string path =
       "s3://graphar/ldbc/ldbc.graph.yml"
       "?endpoint_override=graphscope.oss-cn-beijing.aliyuncs.com";
@@ -787,5 +790,7 @@ TEST_CASE_METHOD(GlobalFixture, "LoadFromS3", "[.hidden]") {
   const auto& edge_infos = graph_info->GetEdgeInfos();
   REQUIRE(vertex_infos.size() == 8);
   REQUIRE(edge_infos.size() == 23);
+  // explicitly call FinalizeS3 to avoid memory leak
+  FinalizeS3();
 }
 }  // namespace graphar
diff --git a/docs/libraries/cpp/getting-started.md 
b/docs/libraries/cpp/getting-started.md
index 80f285c4..7265026d 100644
--- a/docs/libraries/cpp/getting-started.md
+++ b/docs/libraries/cpp/getting-started.md
@@ -276,3 +276,15 @@ is used to write the results to new generated data chunks.
 
 Please refer to [more examples](examples/out-of-core.md) to learn
 about the other available case studies utilizing GraphAr.
+
+### Working with Cloud Storage (S3, OSS)
+
+GraphAr supports reading and writing data from and to cloud storage, including
+AWS S3 and Alibaba Cloud OSS.
+
+To read data from cloud storage, you can specify the path of the data files
+with URI schema, e.g., "s3://bucket-name/path/to/data" or 
"s3://\[access-key:secret-key\]@bucket-name/path/to/data".
+
+[Code 
example](https://github.com/apache/incubator-graphar/blob/main/cpp/test/test_info.cc#L777-L792)
 demonstrates how to read data from S3.
+
+Note that once you use cloud storage, you need to call `graphar::InitalizeS3` 
to initialize S3 APIs before starting the work and call`graphar::FinalizeS3()` 
to shut down the APIs after the work finish.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to