This is an automated email from the ASF dual-hosted git repository.

lixueclaire pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new 883d7eb0 feat(c++): support read and write multi-property (#719)
883d7eb0 is described below

commit 883d7eb05b71e7b349c8f735c911bc9079bbe313
Author: Xiaokang Yang <81174897+yang...@users.noreply.github.com>
AuthorDate: Mon Jul 21 13:41:06 2025 +0800

    feat(c++): support read and write multi-property (#719)
---
 cpp/CMakeLists.txt                          |   1 +
 cpp/src/graphar/arrow/chunk_reader.cc       |   7 +-
 cpp/src/graphar/arrow/chunk_writer.cc       |  13 +--
 cpp/src/graphar/fwd.h                       |   2 +
 cpp/src/graphar/graph_info.cc               |  43 +++++++-
 cpp/src/graphar/graph_info.h                |  10 +-
 cpp/src/graphar/types.h                     |  39 +++++++
 cpp/test/test_info.cc                       | 116 ++++++++++++++++----
 cpp/test/test_multi_label.cc                |  20 ++--
 cpp/test/test_multi_property.cc             | 161 ++++++++++++++++++++++++++++
 docs/specification/implementation-status.md |   2 +
 11 files changed, 368 insertions(+), 46 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 9d9f8736..5fb6c142 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -566,6 +566,7 @@ if (BUILD_TESTS)
     add_test(test_arrow_chunk_reader SRCS test/test_arrow_chunk_reader.cc)
     add_test(test_graph SRCS test/test_graph.cc)
     add_test(test_multi_label SRCS test/test_multi_label.cc)
+    add_test(test_multi_property SRCS test/test_multi_property.cc)
 
     # enable_testing()
 endif()
diff --git a/cpp/src/graphar/arrow/chunk_reader.cc 
b/cpp/src/graphar/arrow/chunk_reader.cc
index a5c77cd2..1b5200fc 100644
--- a/cpp/src/graphar/arrow/chunk_reader.cc
+++ b/cpp/src/graphar/arrow/chunk_reader.cc
@@ -49,8 +49,11 @@ Result<std::shared_ptr<arrow::Schema>> PropertyGroupToSchema(
         GeneralParams::kVertexIndexCol, arrow::int64()));
   }
   for (const auto& prop : pg->GetProperties()) {
-    fields.push_back(std::make_shared<arrow::Field>(
-        prop.name, DataType::DataTypeToArrowDataType(prop.type)));
+    auto dataType = DataType::DataTypeToArrowDataType(prop.type);
+    if (prop.cardinality != Cardinality::SINGLE) {
+      dataType = arrow::list(dataType);
+    }
+    fields.push_back(std::make_shared<arrow::Field>(prop.name, dataType));
   }
   return arrow::schema(fields);
 }
diff --git a/cpp/src/graphar/arrow/chunk_writer.cc 
b/cpp/src/graphar/arrow/chunk_writer.cc
index 234bbfb3..c8612383 100644
--- a/cpp/src/graphar/arrow/chunk_writer.cc
+++ b/cpp/src/graphar/arrow/chunk_writer.cc
@@ -321,17 +321,6 @@ Status VertexPropertyWriter::WriteTable(
   return Status::OK();
 }
 
-// Helper function to split a string by a delimiter
-std::vector<std::string> SplitString(const std::string& str, char delimiter) {
-  std::vector<std::string> tokens;
-  std::string token;
-  std::istringstream tokenStream(str);
-  while (std::getline(tokenStream, token, delimiter)) {
-    tokens.push_back(token);
-  }
-  return tokens;
-}
-
 Status VertexPropertyWriter::WriteLabelTable(
     const std::shared_ptr<arrow::Table>& input_table, IdType start_chunk_index,
     FileType file_type, ValidateLevel validate_level) const {
@@ -379,6 +368,8 @@ Result<std::shared_ptr<arrow::Table>> 
VertexPropertyWriter::GetLabelTable(
     auto label_column = std::static_pointer_cast<arrow::StringArray>(chunk);
 
     // Populate the matrix based on :LABEL column values
+    // TODO(@yangxk):  store array in the label_column, split the string when
+    // reading file
     for (int64_t row = 0; row < label_column->length(); ++row) {
       if (label_column->IsValid(row)) {
         std::string labels_string = label_column->GetString(row);
diff --git a/cpp/src/graphar/fwd.h b/cpp/src/graphar/fwd.h
index f7b5d34a..e7feefd0 100644
--- a/cpp/src/graphar/fwd.h
+++ b/cpp/src/graphar/fwd.h
@@ -72,6 +72,8 @@ class FileSystem;
 using IdType = int64_t;
 enum class Type;
 class DataType;
+/** Defines how multiple values are handled for a given property key */
+enum Cardinality { SINGLE, LIST, SET };
 /** Type of file format */
 enum FileType { CSV = 0, PARQUET = 1, ORC = 2, JSON = 3 };
 enum SelectType { PROPERTIES = 0, LABELS = 1 };
diff --git a/cpp/src/graphar/graph_info.cc b/cpp/src/graphar/graph_info.cc
index 81d1bba7..9959f194 100644
--- a/cpp/src/graphar/graph_info.cc
+++ b/cpp/src/graphar/graph_info.cc
@@ -20,6 +20,7 @@
 #include <unordered_set>
 #include <utility>
 
+#include "graphar/status.h"
 #include "mini-yaml/yaml/Yaml.hpp"
 
 #include "graphar/filesystem.h"
@@ -86,7 +87,8 @@ std::string BuildPath(const std::vector<std::string>& paths) {
 bool operator==(const Property& lhs, const Property& rhs) {
   return (lhs.name == rhs.name) && (lhs.type == rhs.type) &&
          (lhs.is_primary == rhs.is_primary) &&
-         (lhs.is_nullable == rhs.is_nullable);
+         (lhs.is_nullable == rhs.is_nullable) &&
+         (lhs.cardinality == rhs.cardinality);
 }
 
 PropertyGroup::PropertyGroup(const std::vector<Property>& properties,
@@ -138,6 +140,11 @@ bool PropertyGroup::IsValidated() const {
       // list type is not supported in csv file
       return false;
     }
+    // TODO(@yangxk): support cardinality in csv file
+    if (p.cardinality != Cardinality::SINGLE && file_type_ == FileType::CSV) {
+      // list cardinality is not supported in csv file
+      return false;
+    }
   }
   return true;
 }
@@ -212,6 +219,7 @@ class VertexInfo::Impl {
         property_name_to_primary_.emplace(p.name, p.is_primary);
         property_name_to_nullable_.emplace(p.name, p.is_nullable);
         property_name_to_type_.emplace(p.name, p.type);
+        property_name_to_cardinality_.emplace(p.name, p.cardinality);
       }
     }
   }
@@ -251,6 +259,7 @@ class VertexInfo::Impl {
   std::unordered_map<std::string, bool> property_name_to_nullable_;
   std::unordered_map<std::string, std::shared_ptr<DataType>>
       property_name_to_type_;
+  std::unordered_map<std::string, Cardinality> property_name_to_cardinality_;
 };
 
 VertexInfo::VertexInfo(const std::string& type, IdType chunk_size,
@@ -363,6 +372,15 @@ Result<std::shared_ptr<DataType>> 
VertexInfo::GetPropertyType(
   return it->second;
 }
 
+Result<Cardinality> VertexInfo::GetPropertyCardinality(
+    const std::string& property_name) const {
+  auto it = impl_->property_name_to_cardinality_.find(property_name);
+  if (it == impl_->property_name_to_cardinality_.end()) {
+    return Status::Invalid("property name not found: ", property_name);
+  }
+  return it->second;
+}
+
 Result<std::shared_ptr<VertexInfo>> VertexInfo::AddPropertyGroup(
     std::shared_ptr<PropertyGroup> property_group) const {
   if (property_group == nullptr) {
@@ -440,8 +458,13 @@ Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(
         bool is_primary = p_node["is_primary"].As<bool>();
         bool is_nullable =
             p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
+        Cardinality cardinality = Cardinality::SINGLE;
+        if (!p_node["cardinality"].IsNone()) {
+          cardinality =
+              StringToCardinality(p_node["cardinality"].As<std::string>());
+        }
         property_vec.emplace_back(property_name, property_type, is_primary,
-                                  is_nullable);
+                                  is_nullable, cardinality);
       }
       property_groups.push_back(
           std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
@@ -485,6 +508,9 @@ Result<std::string> VertexInfo::Dump() const noexcept {
         p_node["data_type"] = p.type->ToTypeName();
         p_node["is_primary"] = p.is_primary ? "true" : "false";
         p_node["is_nullable"] = p.is_nullable ? "true" : "false";
+        if (p.cardinality != Cardinality::SINGLE) {
+          p_node["cardinality"] = CardinalityToString(p.cardinality);
+        }
         pg_node["properties"].PushBack();
         pg_node["properties"][pg_node["properties"].Size() - 1] = p_node;
       }
@@ -574,6 +600,13 @@ class EdgeInfo::Impl {
       }
       // check if property name is unique in all property groups
       for (const auto& p : pg->GetProperties()) {
+        if (p.cardinality != Cardinality::SINGLE) {
+          // edge property only supports single cardinality
+          std::cout
+              << "Edge property only supports single cardinality, but got: "
+              << CardinalityToString(p.cardinality) << std::endl;
+          return false;
+        }
         if (check_property_unique_set.find(p.name) !=
             check_property_unique_set.end()) {
           return false;
@@ -910,6 +943,12 @@ Result<std::shared_ptr<EdgeInfo>> 
EdgeInfo::Load(std::shared_ptr<Yaml> yaml) {
         auto property_name = p_node["name"].As<std::string>();
         auto property_type =
             
DataType::TypeNameToDataType(p_node["data_type"].As<std::string>());
+        if (!p_node["cardinality"].IsNone() &&
+            StringToCardinality(p_node["cardinality"].As<std::string>()) !=
+                Cardinality::SINGLE) {
+          return Status::YamlError(
+              "Unsupported set cardinality for edge property.");
+        }
         bool is_primary = p_node["is_primary"].As<bool>();
         bool is_nullable =
             p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
diff --git a/cpp/src/graphar/graph_info.h b/cpp/src/graphar/graph_info.h
index c067810c..27a487d4 100644
--- a/cpp/src/graphar/graph_info.h
+++ b/cpp/src/graphar/graph_info.h
@@ -37,16 +37,20 @@ class Property {
   std::shared_ptr<DataType> type;  // property data type
   bool is_primary;                 // primary key tag
   bool is_nullable;                // nullable tag for non-primary key
+  Cardinality
+      cardinality;  // cardinality of the property, only use in vertex info
 
   Property() = default;
 
   explicit Property(const std::string& name,
                     const std::shared_ptr<DataType>& type = nullptr,
-                    bool is_primary = false, bool is_nullable = true)
+                    bool is_primary = false, bool is_nullable = true,
+                    Cardinality cardinality = Cardinality::SINGLE)
       : name(name),
         type(type),
         is_primary(is_primary),
-        is_nullable(!is_primary && is_nullable) {}
+        is_nullable(!is_primary && is_nullable),
+        cardinality(cardinality) {}
 };
 
 bool operator==(const Property& lhs, const Property& rhs);
@@ -276,6 +280,8 @@ class VertexInfo {
   Result<std::shared_ptr<DataType>> GetPropertyType(
       const std::string& property_name) const;
 
+  Result<Cardinality> GetPropertyCardinality(
+      const std::string& property_name) const;
   /**
    * Get whether the vertex info contains the specified property.
    *
diff --git a/cpp/src/graphar/types.h b/cpp/src/graphar/types.h
index cb468a20..ac318ca4 100644
--- a/cpp/src/graphar/types.h
+++ b/cpp/src/graphar/types.h
@@ -23,6 +23,7 @@
 #include <memory>
 #include <string>
 #include <utility>
+#include <vector>
 
 #include "graphar/fwd.h"
 #include "graphar/macros.h"
@@ -242,4 +243,42 @@ static inline const char* FileTypeToString(FileType 
file_type) {
   return file_type2string.at(file_type);
 }
 
+static inline Cardinality StringToCardinality(const std::string& str) {
+  static const std::map<std::string, Cardinality> str2cardinality{
+      {"single", Cardinality::SINGLE},
+      {"list", Cardinality::LIST},
+      {"set", Cardinality::SET},
+  };
+  try {
+    return str2cardinality.at(str.c_str());
+  } catch (const std::exception& e) {
+    throw std::runtime_error("KeyError: " + str);
+  }
+}
+
+static inline const char* CardinalityToString(Cardinality cardinality) {
+  static const std::map<Cardinality, const char*> cardinality2string{
+      {Cardinality::SINGLE, "single"},
+      {Cardinality::LIST, "list"},
+      {Cardinality::SET, "set"},
+  };
+  try {
+    return cardinality2string.at(cardinality);
+  } catch (const std::exception& e) {
+    throw std::runtime_error("KeyError: " +
+                             std::to_string(static_cast<int>(cardinality)));
+  }
+}
+
+// Helper function to split a string by a delimiter
+inline std::vector<std::string> SplitString(const std::string& str,
+                                            char delimiter) {
+  std::vector<std::string> tokens;
+  std::string token;
+  std::istringstream tokenStream(str);
+  while (std::getline(tokenStream, token, delimiter)) {
+    tokens.push_back(token);
+  }
+  return tokens;
+}
 }  // namespace graphar
diff --git a/cpp/test/test_info.cc b/cpp/test/test_info.cc
index 82d839e3..9901ea47 100644
--- a/cpp/test/test_info.cc
+++ b/cpp/test/test_info.cc
@@ -25,6 +25,8 @@
 #include "./util.h"
 
 #include "graphar/api/info.h"
+#include "graphar/fwd.h"
+#include "graphar/status.h"
 
 #include <catch2/catch_test_macros.hpp>
 
@@ -75,9 +77,9 @@ TEST_CASE_METHOD(GlobalFixture, "Property") {
 
 TEST_CASE_METHOD(GlobalFixture, "PropertyGroup") {
   Property p0("p0", int32(), true);
-  Property p1("p1", int32(), false);
-  Property p2("p2", string(), false);
-  Property p3("p3", float32(), false);
+  Property p1("p1", int32(), false, true, Cardinality::SINGLE);
+  Property p2("p2", string(), false, true, Cardinality::LIST);
+  Property p3("p3", float32(), false, true, Cardinality::SET);
   Property p4("p4", float64(), false);
 
   PropertyGroup pg0({p0, p1}, FileType::CSV, "p0_and_p1/");
@@ -94,6 +96,11 @@ TEST_CASE_METHOD(GlobalFixture, "PropertyGroup") {
     REQUIRE(p.type->ToTypeName() == int32()->ToTypeName());
     REQUIRE(p.is_primary == true);
     REQUIRE(p.is_nullable == false);
+    // cardinality
+    REQUIRE(p0.cardinality == Cardinality::SINGLE);
+    REQUIRE(p1.cardinality == Cardinality::SINGLE);
+    REQUIRE(p2.cardinality == Cardinality::LIST);
+    REQUIRE(p3.cardinality == Cardinality::SET);
   }
 
   SECTION("FileType") {
@@ -180,8 +187,10 @@ TEST_CASE_METHOD(GlobalFixture, "VertexInfo") {
   int chunk_size = 100;
   auto version = std::make_shared<InfoVersion>(1);
   auto pg = CreatePropertyGroup(
-      {Property("p0", int32(), true), Property("p1", string(), false)},
-      FileType::CSV, "p0_p1/");
+      {Property("p0", int32(), true),
+       Property("p1", string(), false, true, Cardinality::LIST),
+       Property("p2", string(), false, true, Cardinality::SET)},
+      FileType::PARQUET, "p0_p1/");
   auto vertex_info =
       CreateVertexInfo(type, chunk_size, {pg}, {}, "test_vertex", version);
 
@@ -206,6 +215,12 @@ TEST_CASE_METHOD(GlobalFixture, "VertexInfo") {
     REQUIRE(vertex_info->IsPrimaryKey("p1") == false);
     REQUIRE(vertex_info->IsNullableKey("p0") == false);
     REQUIRE(vertex_info->IsNullableKey("p1") == true);
+    REQUIRE(vertex_info->GetPropertyCardinality("p0").value() ==
+            Cardinality::SINGLE);
+    REQUIRE(vertex_info->GetPropertyCardinality("p1").value() ==
+            Cardinality::LIST);
+    REQUIRE(vertex_info->GetPropertyCardinality("p2").value() ==
+            Cardinality::SET);
     REQUIRE(vertex_info->HasProperty("not_exist") == false);
     REQUIRE(vertex_info->IsPrimaryKey("not_exist") == false);
     REQUIRE(vertex_info->HasPropertyGroup(nullptr) == false);
@@ -221,11 +236,17 @@ TEST_CASE_METHOD(GlobalFixture, "VertexInfo") {
 
   SECTION("IsValidate") {
     REQUIRE(vertex_info->IsValidated() == true);
-    auto invalid_pg =
-        CreatePropertyGroup({Property("p0", nullptr, true)}, FileType::CSV);
+    auto invalid_pg = CreatePropertyGroup(
+        {Property("p0", list(string()), true, false, Cardinality::SET)},
+        FileType::CSV);
     auto invalid_vertex_info0 = CreateVertexInfo(type, chunk_size, 
{invalid_pg},
                                                  {}, "test_vertex/", version);
     REQUIRE(invalid_vertex_info0->IsValidated() == false);
+    invalid_pg =
+        CreatePropertyGroup({Property("p0", nullptr, true)}, FileType::CSV);
+    invalid_vertex_info0 = CreateVertexInfo(type, chunk_size, {invalid_pg}, {},
+                                            "test_vertex/", version);
+    REQUIRE(invalid_vertex_info0->IsValidated() == false);
     VertexInfo invalid_vertex_info1("", chunk_size, {pg}, {}, "test_vertex/",
                                     version);
     REQUIRE(invalid_vertex_info1.IsValidated() == false);
@@ -252,17 +273,23 @@ TEST_CASE_METHOD(GlobalFixture, "VertexInfo") {
     std::string expected = R"(chunk_size: 100
 prefix: test_vertex
 property_groups: 
-  - file_type: csv
+  - file_type: parquet
     prefix: p0_p1/
     properties: 
       - data_type: int32
         is_nullable: false
         is_primary: true
         name: p0
-      - data_type: string
+      - cardinality: list
+        data_type: string
         is_nullable: true
         is_primary: false
         name: p1
+      - cardinality: set
+        data_type: string
+        is_nullable: true
+        is_primary: false
+        name: p2
 type: test_vertex
 version: gar/v1
 )";
@@ -280,22 +307,23 @@ version: gar/v1
   }
 
   SECTION("AddPropertyGroup") {
-    auto pg2 = CreatePropertyGroup({Property("p2", int32(), false)},
-                                   FileType::CSV, "p2/");
-    auto maybe_extend_info = vertex_info->AddPropertyGroup(pg2);
+    auto pg3 = CreatePropertyGroup({Property("p3", int32(), false)},
+                                   FileType::CSV, "p3/");
+    auto maybe_extend_info = vertex_info->AddPropertyGroup(pg3);
     REQUIRE(maybe_extend_info.status().ok());
     auto extend_info = maybe_extend_info.value();
     REQUIRE(extend_info->PropertyGroupNum() == 2);
     REQUIRE(extend_info->HasProperty("p2") == true);
-    REQUIRE(extend_info->HasPropertyGroup(pg2) == true);
+    REQUIRE(extend_info->HasPropertyGroup(pg3) == true);
     REQUIRE(extend_info->GetPropertyGroups().size() == 2);
-    REQUIRE(*(extend_info->GetPropertyGroups()[1]) == *pg2);
-    REQUIRE(extend_info->GetPropertyType("p2").value()->ToTypeName() ==
+    REQUIRE(*(extend_info->GetPropertyGroups()[1]) == *pg3);
+    REQUIRE(extend_info->GetPropertyType("p3").value()->ToTypeName() ==
             int32()->ToTypeName());
-    REQUIRE(extend_info->IsPrimaryKey("p2") == false);
-    REQUIRE(extend_info->IsNullableKey("p2") == true);
-    auto extend_info2 = extend_info->AddPropertyGroup(pg2);
-    REQUIRE(!extend_info2.status().ok());
+    REQUIRE(extend_info->IsPrimaryKey("p3") == false);
+    REQUIRE(extend_info->IsNullableKey("p3") == true);
+    REQUIRE(extend_info->GetPropertyCardinality("p3") == Cardinality::SINGLE);
+    auto extend_info3 = extend_info->AddPropertyGroup(pg3);
+    REQUIRE(!extend_info3.status().ok());
   }
 }
 
@@ -388,6 +416,15 @@ TEST_CASE_METHOD(GlobalFixture, "EdgeInfo") {
                        src_chunk_size, dst_chunk_size, directed, {adj_list},
                        {invalid_pg}, "test_edge/", version);
     REQUIRE(invalid_edge_info0->IsValidated() == false);
+    // edgeInfo does not support list/set cardinality
+    invalid_pg = CreatePropertyGroup(
+        {Property("p_cardinality", string(), false, true, Cardinality::LIST)},
+        FileType::PARQUET);
+    auto cardinality_invalid_edge_info0 =
+        CreateEdgeInfo(src_type, edge_type, dst_type, chunk_size,
+                       src_chunk_size, dst_chunk_size, directed, {adj_list},
+                       {invalid_pg}, "test_edge/", version);
+    REQUIRE(invalid_edge_info0->IsValidated() == false);
     for (int i = 0; i < 3; i++) {
       std::vector<std::string> types = {src_type, edge_type, dst_type};
       types[i] = "";
@@ -691,6 +728,13 @@ property_groups:
         is_primary: true
         is_nullable: false
     file_type: parquet
+  - properties:
+      - name: email
+        data_type: string
+        is_primary: false
+        is_nullable: true
+        cardinality: list
+    file_type: parquet
   - properties:
       - name: firstName
         data_type: string
@@ -750,6 +794,10 @@ extra_info:
     REQUIRE(vertex_info->GetChunkSize() == 100);
     REQUIRE(vertex_info->GetPrefix() == "vertex/person/");
     REQUIRE(vertex_info->version()->ToString() == "gar/v1");
+    REQUIRE(vertex_info->GetPropertyCardinality("id").value() ==
+            Cardinality::SINGLE);
+    REQUIRE(vertex_info->GetPropertyCardinality("email").value() ==
+            Cardinality::LIST);
   }
 
   SECTION("EdgeInfo::Load") {
@@ -761,6 +809,36 @@ extra_info:
     REQUIRE(edge_info->GetDstType() == "person");
   }
 
+  edge_info_yaml = R"(src_type: person
+edge_type: knows
+dst_type: person
+chunk_size: 1024
+src_chunk_size: 100
+dst_chunk_size: 100
+directed: false
+prefix: edge/person_knows_person/
+adj_lists:
+  - ordered: false
+    aligned_by: src
+    file_type: parquet
+property_groups:
+  - file_type: parquet
+    properties:
+      - name: creationDate
+        data_type: string
+        is_primary: false
+        is_nullable: true
+        cardinality: list
+version: gar/v1
+)";
+
+  SECTION("EdgeInfo::Load with cardinality") {
+    auto maybe_edge_info = EdgeInfo::Load(edge_info_yaml);
+    REQUIRE(maybe_edge_info.has_error());
+    REQUIRE(maybe_edge_info.status().code() == StatusCode::kYamlError);
+    std::cout << maybe_edge_info.status().message() << std::endl;
+  }
+
   SECTION("GraphInfo::Load") {
     auto maybe_graph_info = GraphInfo::Load(graph_info_yaml, "/");
     std::cout << maybe_graph_info.status().message() << std::endl;
diff --git a/cpp/test/test_multi_label.cc b/cpp/test/test_multi_label.cc
index a77b9ff0..1795b211 100644
--- a/cpp/test/test_multi_label.cc
+++ b/cpp/test/test_multi_label.cc
@@ -17,19 +17,19 @@
  * under the License.
  */
 
-#include <fstream>
+#include <arrow/compute/api.h>
+#include <cstddef>
 #include <iostream>
-#include <unordered_map>
+#include <memory>
+#include <ostream>
+#include <string>
 #include "arrow/api.h"
-#include "arrow/csv/api.h"
-#include "arrow/filesystem/api.h"
-#include "arrow/io/api.h"
-#include "graphar/fwd.h"
+#include "graphar/arrow/chunk_reader.h"
+#include "graphar/arrow/chunk_writer.h"
+#include "graphar/graph_info.h"
 #include "parquet/arrow/writer.h"
 
 #include "./util.h"
-#include "graphar/api/arrow_reader.h"
-#include "graphar/api/high_level_writer.h"
 
 #include <catch2/catch_test_macros.hpp>
 
@@ -79,7 +79,7 @@ TEST_CASE_METHOD(GlobalFixture, "test_multi_label_builder") {
 
   // write arrow table as parquet chunk
   auto maybe_writer =
-      VertexPropertyWriter::Make(vertex_info, test_data_dir + 
"/ldbc/parquet/");
+      VertexPropertyWriter::Make(vertex_info, "/tmp/ldbc/parquet/");
   REQUIRE(!maybe_writer.has_error());
   auto writer = maybe_writer.value();
   REQUIRE(writer->WriteTable(table, 0).ok());
@@ -87,7 +87,7 @@ TEST_CASE_METHOD(GlobalFixture, "test_multi_label_builder") {
 
   // read label chunk as arrow table
   auto maybe_reader = VertexPropertyArrowChunkReader::Make(
-      graph_info, "organisation", labels, SelectType::LABELS);
+      vertex_info, labels, "/tmp/ldbc/parquet/");
   assert(maybe_reader.status().ok());
   auto reader = maybe_reader.value();
   assert(reader->seek(0).ok());
diff --git a/cpp/test/test_multi_property.cc b/cpp/test/test_multi_property.cc
new file mode 100644
index 00000000..f82fc608
--- /dev/null
+++ b/cpp/test/test_multi_property.cc
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow/compute/api.h>
+#include <cstddef>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <string>
+#include "arrow/api.h"
+#include "examples/config.h"
+#include "graphar/arrow/chunk_reader.h"
+#include "graphar/arrow/chunk_writer.h"
+#include "graphar/graph_info.h"
+#include "graphar/types.h"
+#include "parquet/arrow/writer.h"
+
+#include "./util.h"
+
+#include <catch2/catch_test_macros.hpp>
+
+std::shared_ptr<arrow::Table> read_csv_to_table(const std::string& filename) {
+  arrow::csv::ReadOptions read_options{};
+  arrow::csv::ParseOptions parse_options{};
+  arrow::csv::ConvertOptions convert_options{};
+
+  parse_options.delimiter = '|';
+
+  auto input =
+      arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool())
+          .ValueOrDie();
+
+  auto reader = arrow::csv::TableReader::Make(arrow::io::default_io_context(),
+                                              input, read_options,
+                                              parse_options, convert_options)
+                    .ValueOrDie();
+
+  std::shared_ptr<arrow::Table> table;
+  table = reader->Read().ValueOrDie();
+
+  return table;
+}
+
+namespace graphar {
+TEST_CASE_METHOD(GlobalFixture, "read from csv file") {
+  // read labels csv file as arrow table
+  auto person_table = read_csv_to_table(test_data_dir + 
"/ldbc/person_0_0.csv");
+  auto seed = static_cast<unsigned int>(time(NULL));
+  int expected_row = rand_r(&seed) % person_table->num_rows();
+  auto person_schema = person_table->schema();
+  arrow::MemoryPool* pool = arrow::default_memory_pool();
+  auto value_builder = std::make_shared<arrow::StringBuilder>();
+  arrow::ListBuilder builder(pool, value_builder);
+  auto email_col_idx = person_table->schema()->GetFieldIndex("emails");
+  std::string expected_emails =
+      std::static_pointer_cast<arrow::StringArray>(
+          person_table->column(email_col_idx)->chunk(0))
+          ->GetString(expected_row);
+  for (int64_t chunk_idx = 0;
+       chunk_idx < person_table->column(email_col_idx)->num_chunks();
+       ++chunk_idx) {
+    auto chunk = person_table->column(email_col_idx)->chunk(chunk_idx);
+    auto email_column = std::static_pointer_cast<arrow::StringArray>(chunk);
+    for (int64_t row = 0; row < email_column->length(); ++row) {
+      auto result = builder.Append();
+      ASSERT(result.ok());
+      if (email_column->IsValid(row)) {
+        std::string emails_string = email_column->GetString(row);
+        auto row_emails = SplitString(emails_string, ';');
+        for (const auto& email : row_emails) {
+          ASSERT(value_builder->Append(email).ok());
+        }
+      }
+    }
+  }
+  std::shared_ptr<arrow::Array> array;
+  builder.Finish(&array);
+  auto person_emails_chunked_array = std::make_shared<arrow::ChunkedArray>(
+      std::vector<std::shared_ptr<arrow::Array>>{array});
+  int emailFieldIndex = person_schema->GetFieldIndex("emails");
+  person_table = person_table->RemoveColumn(emailFieldIndex).ValueOrDie();
+  person_schema = person_schema->RemoveField(emailFieldIndex).ValueOrDie();
+  person_schema =
+      person_schema
+          ->AddField(person_schema->num_fields(),
+                     arrow::field("emails", arrow::list(arrow::utf8())))
+          .ValueOrDie();
+  person_table = person_table
+                     ->AddColumn(person_table->num_columns(),
+                                 person_schema->fields().back(),
+                                 person_emails_chunked_array)
+                     .ValueOrDie();
+  auto index = person_schema->GetFieldIndex("emails");
+  auto emails_col = person_table->column(index)->chunk(0);
+  auto result = std::static_pointer_cast<arrow::ListArray>(
+      emails_col->View(arrow::list(arrow::utf8())).ValueOrDie());
+  auto values = std::static_pointer_cast<arrow::StringArray>(result->values());
+  int64_t start = result->value_offset(expected_row);
+  int64_t end = result->value_offset(expected_row + 1);
+  std::string emails = "";
+  for (int64_t i = start; i < end; ++i) {
+    emails += values->GetString(i);
+    if (i < end - 1)
+      emails += ";";
+  }
+  std::cout << "random row: " << expected_row << std::endl;
+  ASSERT(expected_emails == emails);
+  // write to parquet file
+  std::string path = test_data_dir + "/ldbc/parquet/" + "ldbc.graph.yml";
+  auto graph_info = graphar::GraphInfo::Load(path).value();
+  auto vertex_info = graph_info->GetVertexInfo("person");
+  auto maybe_writer =
+      VertexPropertyWriter::Make(vertex_info, "/tmp/ldbc/parquet/");
+  REQUIRE(!maybe_writer.has_error());
+  auto writer = maybe_writer.value();
+  REQUIRE(writer->WriteTable(person_table, 0).ok());
+  REQUIRE(writer->WriteVerticesNum(person_table->num_rows()).ok());
+
+  auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+      vertex_info, vertex_info->GetPropertyGroup("emails"),
+      "/tmp/ldbc/parquet/");
+  assert(maybe_reader.status().ok());
+  auto reader = maybe_reader.value();
+  assert(reader->seek(expected_row).ok());
+  auto table_result = reader->GetChunk();
+  ASSERT(table_result.status().ok());
+  auto table = table_result.value();
+  index = table->schema()->GetFieldIndex("emails");
+  emails_col = table->column(index)->chunk(0);
+  result = std::static_pointer_cast<arrow::ListArray>(
+      emails_col->View(arrow::list(arrow::large_utf8())).ValueOrDie());
+  expected_row = expected_row % vertex_info->GetChunkSize();
+  auto email_result =
+      
std::static_pointer_cast<arrow::LargeStringArray>(result->value_slice(0));
+  emails = "";
+  end = email_result->length();
+  for (int64_t i = 0; i < end; ++i) {
+    emails += email_result->GetString(i);
+    if (i < end - 1)
+      emails += ";";
+  }
+  std::cout << emails << std::endl;
+  ASSERT(expected_emails == emails);
+}
+}  // namespace graphar
diff --git a/docs/specification/implementation-status.md 
b/docs/specification/implementation-status.md
index 6e634714..6d205ae7 100644
--- a/docs/specification/implementation-status.md
+++ b/docs/specification/implementation-status.md
@@ -117,6 +117,8 @@ Vertex features:
 | tag               |       |       |       |            |
 | chunk based       | ✓     | ✓     | ✓     | ✓          |
 | property group    | ✓     | ✓     | ✓     | ✓          |
+| multi-label       | ✓     |       | ✓     |            |
+| multi-property    | ✓     |       |       |            |
 
 :::note
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@graphar.apache.org
For additional commands, e-mail: commits-h...@graphar.apache.org

Reply via email to