This is an automated email from the ASF dual-hosted git repository.
xiaokang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new 8d33604b feat(python): support high level api in python sdk (#801)
8d33604b is described below
commit 8d33604b632fe44adcdb74cd3c3453eca4bd73b4
Author: Xiaokang Yang <[email protected]>
AuthorDate: Thu Nov 6 17:14:03 2025 +0800
feat(python): support high level api in python sdk (#801)
* init python library
* break out the API
* rename cli to python
* try to fix
* try to fix
* architecture build
* bind graphInfo
* add version info and graphInfo test
* update workflow yaml
* try run on macos
* try to use python 3.13
* cancel import test in macos
* architecture build
* foramt
* foramt
* format
* build
* add version command
* format
* update workflow yaml
* add license header
* add high level api binding
* static make method
* fix writer
* fix
* fix reader
* format
* add python component
* rename client to cli
* add example
---
python/CMakeLists.txt | 5 +-
python/example/graph_info_example.py | 126 ++++++++++
python/example/high_level_example.py | 127 ++++++++++
python/src/bindings/_core_module.cc | 4 +-
python/src/bindings/high_level_binding.cc | 321 +++++++++++++++++++++++++
python/src/bindings/types_binding.cc | 10 +
python/src/graphar/graph_info.py | 2 -
python/src/graphar/{types.py => high_level.py} | 13 +-
python/src/graphar/types.py | 2 +-
python/test/test_high_level_api.py | 207 ++++++++++++++++
10 files changed, 810 insertions(+), 7 deletions(-)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 5bb7ccd5..7938fea0 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -45,10 +45,11 @@ else()
endif()
# Add a library using FindPython's tooling (pybind11 also provides a helper
like
# this)
-python_add_library(_core MODULE src/bindings/_core_module.cc
+python_add_library(_core MODULE src/bindings/types_binding.cc
src/bindings/cli_binding.cc
src/bindings/graph_info_binding.cc
- src/bindings/types_binding.cc WITH_SOABI)
+ src/bindings/high_level_binding.cc
+ src/bindings/_core_module.cc WITH_SOABI)
target_link_libraries(_core PRIVATE pybind11::headers graphar
Arrow::arrow_shared
Parquet::parquet_shared
diff --git a/python/example/graph_info_example.py
b/python/example/graph_info_example.py
new file mode 100644
index 00000000..7dd44519
--- /dev/null
+++ b/python/example/graph_info_example.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import sys
+import graphar
+import graphar.types as types
+
+
+def load_graph_info():
+ """
+ Demonstrates how to load graph information from YAML files.
+ """
+ # Get the testing data path
+ test_data_dir = os.environ.get("GAR_TEST_DATA")
+ if not test_data_dir:
+ print("Test data path not set. Please set GAR_TEST_DATA environment
variable.")
+ return
+
+ # Path to the graph YAML file
+ graph_yaml_path = os.path.join(test_data_dir,
"ldbc_sample/parquet/ldbc_sample.graph.yml")
+
+ # Load graph info from YAML
+ try:
+ graph_info = graphar.GraphInfo.load(graph_yaml_path)
+ print(f"Graph name: {graph_info.get_name()}")
+ print(f"Graph prefix: {graph_info.get_prefix()}")
+ return graph_info
+ except Exception as e:
+ print(f"Error loading graph info: {e}")
+ return None
+
+
+def access_vertex_info(graph_info):
+ """
+ Demonstrates how to access vertex information from graph info.
+ """
+ print("\n--- Vertex Info ---")
+ # Get vertex info for "person" type
+ vertex_info = graph_info.get_vertex_info("person")
+ if vertex_info:
+ print(f"Vertex type: {vertex_info.get_type()}")
+ print(f"Chunk size: {vertex_info.get_chunk_size()}")
+ print(f"Vertex prefix: {vertex_info.get_prefix()}")
+
+ # List all properties
+ property_groups = vertex_info.get_property_groups()
+ print("Properties:")
+ for i, pg in enumerate(property_groups):
+ print(f" Group {i}:")
+ for prop in pg.get_properties():
+ print(f" - {prop.name}: {prop.type} (primary:
{prop.is_primary})")
+
+
+def access_edge_info(graph_info):
+ """
+ Demonstrates how to access edge information from graph info.
+ """
+ print("\n--- Edge Info ---")
+ # Get edge info for "person_knows_person" type
+ edge_info = graph_info.get_edge_info("person", "knows", "person")
+ if edge_info:
+ print(f"Edge type: {edge_info.get_edge_type()}")
+ print(f"Source type: {edge_info.get_src_type()}")
+ print(f"Destination type: {edge_info.get_dst_type()}")
+ print(f"Chunk size: {edge_info.get_chunk_size()}")
+ print(f"Directed: {edge_info.is_directed()}")
+
+ # List adjacency list types - we need to check each type individually
+ print("Adjacency list types:")
+ adj_list_types = [
+ types.AdjListType.unordered_by_source,
+ types.AdjListType.ordered_by_source,
+ types.AdjListType.unordered_by_dest,
+ types.AdjListType.ordered_by_dest,
+ ]
+
+ for adj_list_type in adj_list_types:
+ if edge_info.has_adjacent_list_type(adj_list_type):
+ adj_list = edge_info.get_adjacent_list(adj_list_type)
+ print(f" - {adj_list.get_type()}: {adj_list.get_file_type()}")
+
+ # List all properties
+ property_groups = edge_info.get_property_groups()
+ print("Properties:")
+ for i, pg in enumerate(property_groups):
+ print(f" Group {i}:")
+ for prop in pg.get_properties():
+ print(f" - {prop.name}: {prop.type}")
+
+
+def main():
+ print("GraphAr GraphInfo Example")
+ print("=========================")
+
+ # Load graph info
+ graph_info = load_graph_info()
+ if not graph_info:
+ return
+
+ # Access vertex information
+ access_vertex_info(graph_info)
+
+ # Access edge information
+ access_edge_info(graph_info)
+
+ print("\nExample completed successfully!")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/python/example/high_level_example.py
b/python/example/high_level_example.py
new file mode 100644
index 00000000..4e9c36f5
--- /dev/null
+++ b/python/example/high_level_example.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import sys
+import graphar
+import graphar.high_level as gar_api
+import graphar.types as types
+
+
+def load_graph_info():
+ """
+ Load graph information from YAML files.
+ """
+ # Get the testing data path
+ test_data_dir = os.environ.get("GAR_TEST_DATA")
+ if not test_data_dir:
+ print("Test data path not set. Please set GAR_TEST_DATA environment
variable.")
+ return None
+
+ # Path to the graph YAML file
+ graph_yaml_path = os.path.join(test_data_dir,
"ldbc_sample/parquet/ldbc_sample.graph.yml")
+
+ # Load graph info from YAML
+ try:
+ graph_info = graphar.GraphInfo.load(graph_yaml_path)
+ return graph_info
+ except Exception as e:
+ print(f"Error loading graph info: {e}")
+ return None
+
+
+def demonstrate_vertices_collection(graph_info):
+ """
+ Demonstrates how to use VerticesCollection to read vertex data.
+ """
+ print("\n--- Vertices Collection ---")
+
+ # Create vertices collection for "person" type
+ try:
+ vertices = gar_api.VerticesCollection.Make(graph_info, "person")
+ print(f"Total number of vertices: {vertices.size()}")
+
+ # Iterate through vertices and print first 5
+ count = 0
+ for vertex in vertices:
+ if count >= 5:
+ break
+ vertex_id = vertex.id()
+ first_name = vertex.property("firstName")
+ last_name = vertex.property("lastName")
+ print(f"Vertex ID: {vertex_id}, Name: {first_name} {last_name}")
+ count += 1
+
+ if count == 0:
+ print("No vertices found")
+
+ except Exception as e:
+ print(f"Error working with vertices collection: {e}")
+
+
+def demonstrate_edges_collection(graph_info):
+ """
+ Demonstrates how to use EdgesCollection to read edge data.
+ """
+ print("\n--- Edges Collection ---")
+
+ # Create edges collection for "person_knows_person" with ordered_by_source
+ try:
+ edges = gar_api.EdgesCollection.Make(
+ graph_info, "person", "knows", "person",
types.AdjListType.ordered_by_source
+ )
+ print(f"Total number of edges: {edges.size()}")
+
+ # Iterate through edges and print first 5
+ count = 0
+ for edge in edges:
+ if count >= 5:
+ break
+ source_id = edge.source()
+ destination_id = edge.destination()
+ creation_date = edge.property("creationDate")
+ print(f"Edge: {source_id} -> {destination_id}, Creation Date:
{creation_date}")
+ count += 1
+
+ if count == 0:
+ print("No edges found")
+
+ except Exception as e:
+ print(f"Error working with edges collection: {e}")
+
+
+def main():
+ print("GraphAr High-Level API Example")
+ print("==============================")
+
+ # Load graph info
+ graph_info = load_graph_info()
+ if not graph_info:
+ return
+
+ # Demonstrate vertices collection
+ demonstrate_vertices_collection(graph_info)
+
+ # Demonstrate edges collection
+ demonstrate_edges_collection(graph_info)
+
+ print("\nExample completed successfully!")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/python/src/bindings/_core_module.cc
b/python/src/bindings/_core_module.cc
index 50452e24..70d86296 100644
--- a/python/src/bindings/_core_module.cc
+++ b/python/src/bindings/_core_module.cc
@@ -20,8 +20,9 @@
#include <pybind11/pybind11.h>
// Declare binding functions
-extern "C" void bind_graph_info(pybind11::module_& m);
extern "C" void bind_types(pybind11::module_& m);
+extern "C" void bind_graph_info(pybind11::module_& m);
+extern "C" void bind_high_level_api(pybind11::module_& m);
extern "C" void bind_cli(pybind11::module_& m);
PYBIND11_MODULE(_core, m) {
@@ -29,5 +30,6 @@ PYBIND11_MODULE(_core, m) {
bind_types(m);
bind_graph_info(m);
+ bind_high_level_api(m);
bind_cli(m);
}
\ No newline at end of file
diff --git a/python/src/bindings/high_level_binding.cc
b/python/src/bindings/high_level_binding.cc
new file mode 100644
index 00000000..7369c46c
--- /dev/null
+++ b/python/src/bindings/high_level_binding.cc
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+#include "utils/pybind_util.h"
+
+#include "graphar/api/high_level_reader.h"
+#include "graphar/api/high_level_writer.h"
+#include "graphar/graph_info.h"
+#include "graphar/types.h"
+#include "graphar/version_parser.h"
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace py = pybind11;
+
+// Changed from PYBIND11_MODULE to a regular function
+extern "C" void bind_high_level_api(pybind11::module_& m) {
+ // Bind Vertex class
+ auto vertex = py::class_<graphar::Vertex,
std::shared_ptr<graphar::Vertex>>(m, "Vertex");
+ vertex.def("id", &graphar::Vertex::id)
+ .def("property", [](const graphar::Vertex& self, const std::string&
property) {
+ // We need to handle different property types
+ // For now, let's support common types used in examples
+ try {
+ return py::cast(self.property<int64_t>(property).value());
+ } catch (...) {
+ try {
+ return
py::cast(self.property<std::string>(property).value());
+ } catch (...) {
+ throw std::runtime_error("Unsupported property type or
property not found");
+ }
+ }
+ })
+ .def("IsValid", &graphar::Vertex::IsValid);
+
+ // Bind Edge class
+ auto edge = py::class_<graphar::Edge, std::shared_ptr<graphar::Edge>>(m,
"Edge");
+ edge.def("source", &graphar::Edge::source)
+ .def("destination", &graphar::Edge::destination)
+ .def("property", [](const graphar::Edge& self, const std::string&
property) {
+ // We need to handle different property types
+ // For now, let's support common types used in examples
+ try {
+ return py::cast(self.property<std::string>(property).value());
+ } catch (...) {
+ try {
+ return py::cast(self.property<int64_t>(property).value());
+ } catch (...) {
+ throw std::runtime_error("Unsupported property type or
property not found");
+ }
+ }
+ })
+ .def("IsValid", &graphar::Edge::IsValid);
+
+ // Bind VertexIter class
+ auto vertex_iter = py::class_<graphar::VertexIter,
std::shared_ptr<graphar::VertexIter>>(m, "VertexIter");
+ vertex_iter.def("__iter__", [](graphar::VertexIter& it) ->
graphar::VertexIter& { return it; })
+ .def("__next__", [](graphar::VertexIter& it) {
+ // TODO: Implement proper end checking
+ auto vertex = *it;
+ ++it;
+ return vertex;
+ })
+ .def("id", &graphar::VertexIter::id)
+ .def("property", [](graphar::VertexIter& self, const std::string&
property) {
+ // We need to handle different property types
+ // For now, let's support common types used in examples
+ try {
+ return py::cast(self.property<int64_t>(property).value());
+ } catch (...) {
+ try {
+ return
py::cast(self.property<std::string>(property).value());
+ } catch (...) {
+ throw std::runtime_error("Unsupported property type or
property not found");
+ }
+ }
+ });
+
+ // Bind VerticesCollection class
+ auto vertices_collection = py::class_<graphar::VerticesCollection,
+
std::shared_ptr<graphar::VerticesCollection>>(m, "VerticesCollection");
+ vertices_collection.def("__iter__", [](graphar::VerticesCollection& self) {
+ return py::make_iterator(self.begin(), self.end());
+ }, py::keep_alive<0, 1>()) // Keep collection alive while iterator is
used
+ .def("begin", &graphar::VerticesCollection::begin)
+ .def("end", &graphar::VerticesCollection::end)
+ .def("find", &graphar::VerticesCollection::find)
+ .def("size", &graphar::VerticesCollection::size)
+ .def_static("Make", [](const std::shared_ptr<graphar::GraphInfo>&
graph_info, const std::string& type) {
+ auto result = graphar::VerticesCollection::Make(graph_info, type);
+ return ThrowOrReturn(result);
+ });
+
+ // Bind EdgeIter class
+ auto edge_iter = py::class_<graphar::EdgeIter,
std::shared_ptr<graphar::EdgeIter>>(m, "EdgeIter");
+ edge_iter.def("__iter__", [](graphar::EdgeIter& it) -> graphar::EdgeIter&
{ return it; })
+ .def("__next__", [](graphar::EdgeIter& it) {
+ // TODO: Implement proper end checking
+ auto edge = *it;
+ ++it;
+ return edge;
+ })
+ .def("source", &graphar::EdgeIter::source)
+ .def("destination", &graphar::EdgeIter::destination)
+ .def("property", [](graphar::EdgeIter& self, const std::string&
property) {
+ // We need to handle different property types
+ // For now, let's support common types used in examples
+ try {
+ return py::cast(self.property<std::string>(property).value());
+ } catch (...) {
+ try {
+ return py::cast(self.property<int64_t>(property).value());
+ } catch (...) {
+ throw std::runtime_error("Unsupported property type or
property not found");
+ }
+ }
+ });
+
+ // Bind EdgesCollection class
+ auto edges_collection = py::class_<graphar::EdgesCollection,
+
std::shared_ptr<graphar::EdgesCollection>>(m, "EdgesCollection");
+ edges_collection.def("__iter__", [](graphar::EdgesCollection& self) {
+ return py::make_iterator(self.begin(), self.end());
+ }, py::keep_alive<0, 1>()) // Keep collection alive while iterator is
used
+ .def("begin", &graphar::EdgesCollection::begin)
+ .def("end", &graphar::EdgesCollection::end)
+ .def("size", &graphar::EdgesCollection::size)
+ .def("find_src", &graphar::EdgesCollection::find_src)
+ .def("find_dst", &graphar::EdgesCollection::find_dst)
+ .def_static("Make", [](const std::shared_ptr<graphar::GraphInfo>&
graph_info,
+ const std::string& src_type,
+ const std::string& edge_type,
+ const std::string& dst_type,
+ graphar::AdjListType adj_list_type) {
+ auto result = graphar::EdgesCollection::Make(graph_info, src_type,
edge_type, dst_type, adj_list_type);
+ return ThrowOrReturn(result);
+ });
+
+ // Bind builder::Vertex class
+ auto builder_vertex = py::class_<graphar::builder::Vertex,
std::shared_ptr<graphar::builder::Vertex>>(m, "BuilderVertex");
+ builder_vertex.def(py::init<>())
+ .def(py::init<graphar::IdType>())
+ .def("GetId", &graphar::builder::Vertex::GetId)
+ .def("SetId", &graphar::builder::Vertex::SetId)
+ .def("Empty", &graphar::builder::Vertex::Empty)
+ .def("AddProperty", [](graphar::builder::Vertex& self, const
std::string& name, const py::object& val) {
+ // Convert Python object to std::any
+ if (py::isinstance<py::int_>(val)) {
+ self.AddProperty(name, py::cast<int64_t>(val));
+ } else if (py::isinstance<py::str>(val)) {
+ self.AddProperty(name, py::cast<std::string>(val));
+ } else if (py::isinstance<py::list>(val)) {
+ // Handle list properties
+ py::list py_list = val.cast<py::list>();
+ std::vector<std::string> string_list;
+ for (auto item : py_list) {
+ string_list.push_back(py::str(item).cast<std::string>());
+ }
+ self.AddProperty(graphar::Cardinality::LIST, name,
string_list);
+ } else {
+ throw std::runtime_error("Unsupported property type");
+ }
+ })
+ .def("GetProperty", [](const graphar::builder::Vertex& self, const
std::string& property) {
+ const auto& prop = self.GetProperty(property);
+ // Try to cast to common types
+ try {
+ return py::cast(std::any_cast<int64_t>(prop));
+ } catch (...) {
+ try {
+ return py::cast(std::any_cast<std::string>(prop));
+ } catch (...) {
+ throw std::runtime_error("Unsupported property type");
+ }
+ }
+ })
+ .def("GetProperties", &graphar::builder::Vertex::GetProperties)
+ .def("ContainProperty", &graphar::builder::Vertex::ContainProperty);
+
+ //WRITER
+ // Bind WriterOptions class
+ //TODO add csv_option_builder parquet_option_builder orc_option_builder
+ auto writer_options = py::class_<graphar::WriterOptions,
std::shared_ptr<graphar::WriterOptions>>(m, "WriterOptions");
+ // Bind builder::VerticesBuilder class
+ auto vertices_builder = py::class_<graphar::builder::VerticesBuilder,
std::shared_ptr<graphar::builder::VerticesBuilder>>(m, "VerticesBuilder");
+ vertices_builder.def("Clear", &graphar::builder::VerticesBuilder::Clear)
+ .def("SetWriterOptions",
&graphar::builder::VerticesBuilder::SetWriterOptions)
+ .def("GetWriterOptions",
&graphar::builder::VerticesBuilder::GetWriterOptions)
+ .def("SetValidateLevel",
&graphar::builder::VerticesBuilder::SetValidateLevel)
+ .def("GetValidateLevel",
&graphar::builder::VerticesBuilder::GetValidateLevel)
+ .def("AddVertex", [](graphar::builder::VerticesBuilder& self,
graphar::builder::Vertex& v,
+ graphar::IdType index, const
graphar::ValidateLevel& validate_level) {
+ return CheckStatus(self.AddVertex(v, index, validate_level));
+ }, py::arg("v"), py::arg("index") = -1, py::arg("validate_level") =
graphar::ValidateLevel::default_validate)
+ .def("GetNum", &graphar::builder::VerticesBuilder::GetNum)
+ .def("Dump", [](graphar::builder::VerticesBuilder& self) {
+ return CheckStatus(self.Dump());
+ });
+
+ // Static factory methods for VerticesBuilder
+ vertices_builder.def_static("Make", [](const
std::shared_ptr<graphar::VertexInfo>& vertex_info,
+ const std::string& prefix,
+
std::shared_ptr<graphar::WriterOptions> writer_options,
+ graphar::IdType start_vertex_index,
+ const graphar::ValidateLevel&
validate_level) {
+ auto result = graphar::builder::VerticesBuilder::Make(vertex_info,
prefix, writer_options, start_vertex_index, validate_level);
+ return ThrowOrReturn(result);
+ }, py::arg("vertex_info"), py::arg("prefix"), py::arg("writer_options") =
nullptr,
+ py::arg("start_vertex_index") = 0, py::arg("validate_level") =
graphar::ValidateLevel::no_validate);
+
+ vertices_builder.def_static("Make", [](const
std::shared_ptr<graphar::GraphInfo>& graph_info,
+ const std::string& type,
+
std::shared_ptr<graphar::WriterOptions> writer_options,
+ graphar::IdType start_vertex_index,
+ const graphar::ValidateLevel&
validate_level) {
+ auto result = graphar::builder::VerticesBuilder::Make(graph_info,
type, writer_options, start_vertex_index, validate_level);
+ return ThrowOrReturn(result);
+ }, py::arg("graph_info"), py::arg("type"), py::arg("writer_options") =
nullptr,
+ py::arg("start_vertex_index") = 0, py::arg("validate_level") =
graphar::ValidateLevel::no_validate);
+ vertices_builder.def_static("Make", [](const
std::shared_ptr<graphar::VertexInfo>& vertex_info,
+ const std::string& prefix,
+ graphar::IdType start_vertex_index)
{
+ auto result = graphar::builder::VerticesBuilder::Make(vertex_info,
prefix, start_vertex_index);
+ return ThrowOrReturn(result);
+ }, py::arg("vertex_info"), py::arg("prefix"),
py::arg("start_vertex_index") = 0);
+
+ // Bind builder::Edge class
+ auto builder_edge = py::class_<graphar::builder::Edge,
std::shared_ptr<graphar::builder::Edge>>(m, "BuilderEdge");
+ builder_edge.def(py::init<graphar::IdType, graphar::IdType>())
+ .def("Empty", &graphar::builder::Edge::Empty)
+ .def("GetSource", &graphar::builder::Edge::GetSource)
+ .def("GetDestination", &graphar::builder::Edge::GetDestination)
+ .def("AddProperty", [](graphar::builder::Edge& self, const
std::string& name, const py::object& val) {
+ // Convert Python object to std::any
+ if (py::isinstance<py::int_>(val)) {
+ self.AddProperty(name, py::cast<int64_t>(val));
+ } else if (py::isinstance<py::str>(val)) {
+ self.AddProperty(name, py::cast<std::string>(val));
+ } else {
+ throw std::runtime_error("Unsupported property type");
+ }
+ })
+ .def("GetProperty", [](const graphar::builder::Edge& self, const
std::string& property) {
+ const auto& prop = self.GetProperty(property);
+ // Try to cast to common types
+ try {
+ return py::cast(std::any_cast<std::string>(prop));
+ } catch (...) {
+ try {
+ return py::cast(std::any_cast<int64_t>(prop));
+ } catch (...) {
+ throw std::runtime_error("Unsupported property type");
+ }
+ }
+ })
+ .def("GetProperties", &graphar::builder::Edge::GetProperties)
+ .def("ContainProperty", &graphar::builder::Edge::ContainProperty);
+
+ // Bind builder::EdgesBuilder class
+ auto edges_builder = py::class_<graphar::builder::EdgesBuilder,
std::shared_ptr<graphar::builder::EdgesBuilder>>(m, "EdgesBuilder");
+ edges_builder
+ .def("SetValidateLevel",
&graphar::builder::EdgesBuilder::SetValidateLevel)
+ .def("SetWriterOptions",
&graphar::builder::EdgesBuilder::SetWriterOptions)
+ .def("GetWriterOptions",
&graphar::builder::EdgesBuilder::GetWriterOptions)
+ .def("GetValidateLevel",
&graphar::builder::EdgesBuilder::GetValidateLevel)
+ .def("Clear", &graphar::builder::EdgesBuilder::Clear)
+ .def("AddEdge", [](graphar::builder::EdgesBuilder& self,
+ const graphar::builder::Edge& e,
+ const graphar::ValidateLevel& validate_level) {
+ return CheckStatus(self.AddEdge(e, validate_level));
+ }, py::arg("e"), py::arg("validate_level") =
graphar::ValidateLevel::default_validate)
+ .def("GetNum", &graphar::builder::EdgesBuilder::GetNum)
+ .def("Dump", [](graphar::builder::EdgesBuilder& self) {
+ return CheckStatus(self.Dump());
+ });
+
+ // Static factory methods for EdgesBuilder
+ edges_builder.def_static("Make", [](const
std::shared_ptr<graphar::EdgeInfo>& edge_info,
+ const std::string& prefix,
+ graphar::AdjListType adj_list_type,
+ graphar::IdType num_vertices,
+
std::shared_ptr<graphar::WriterOptions> writer_options,
+ const graphar::ValidateLevel&
validate_level) {
+ auto result = graphar::builder::EdgesBuilder::Make(edge_info, prefix,
adj_list_type, num_vertices, writer_options, validate_level);
+ return ThrowOrReturn(result);
+ }, py::arg("edge_info"), py::arg("prefix"), py::arg("adj_list_type"),
py::arg("num_vertices"),
+ py::arg("writer_options") = nullptr, py::arg("validate_level") =
graphar::ValidateLevel::no_validate);
+
+ edges_builder.def_static("Make", [](const
std::shared_ptr<graphar::GraphInfo>& graph_info,
+ const std::string& src_type,
+ const std::string& edge_type,
+ const std::string& dst_type,
+ const graphar::AdjListType&
adj_list_type,
+ graphar::IdType num_vertices,
+
std::shared_ptr<graphar::WriterOptions> writer_options,
+ const graphar::ValidateLevel&
validate_level) {
+ auto result = graphar::builder::EdgesBuilder::Make(graph_info,
src_type, edge_type, dst_type, adj_list_type, num_vertices, writer_options,
validate_level);
+ return ThrowOrReturn(result);
+ }, py::arg("graph_info"), py::arg("src_type"), py::arg("edge_type"),
py::arg("dst_type"),
+ py::arg("adj_list_type"), py::arg("num_vertices"),
py::arg("writer_options") = nullptr,
+ py::arg("validate_level") = graphar::ValidateLevel::no_validate);
+} // namespace graphar
\ No newline at end of file
diff --git a/python/src/bindings/types_binding.cc
b/python/src/bindings/types_binding.cc
index 9d39e7f2..8e4aefa5 100644
--- a/python/src/bindings/types_binding.cc
+++ b/python/src/bindings/types_binding.cc
@@ -17,6 +17,7 @@
* under the License.
*/
+#include "graphar/writer_util.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
@@ -65,4 +66,13 @@ extern "C" void bind_types(pybind11::module_& m) {
.value("LIST", graphar::Cardinality::LIST)
.value("SET", graphar::Cardinality::SET)
.export_values();
+
+ // Bind ValidateLevel enum
+ py::enum_<graphar::ValidateLevel>(m, "ValidateLevel")
+ .value("default_validate", graphar::ValidateLevel::default_validate)
+ .value("no_validate", graphar::ValidateLevel::no_validate)
+ .value("weak_validate", graphar::ValidateLevel::weak_validate)
+ .value("strong_validate", graphar::ValidateLevel::strong_validate)
+ .export_values();
+
} // namespace graphar
\ No newline at end of file
diff --git a/python/src/graphar/graph_info.py b/python/src/graphar/graph_info.py
index 13f8cb1c..bbf750ba 100644
--- a/python/src/graphar/graph_info.py
+++ b/python/src/graphar/graph_info.py
@@ -15,6 +15,4 @@
# specific language governing permissions and limitations
# under the License.
-from .types import Type
-
from ._core import DataType, Property, PropertyGroup, AdjacentList,
VertexInfo, EdgeInfo, GraphInfo
diff --git a/python/src/graphar/types.py b/python/src/graphar/high_level.py
similarity index 80%
copy from python/src/graphar/types.py
copy to python/src/graphar/high_level.py
index 7a1c1b24..06db1a74 100644
--- a/python/src/graphar/types.py
+++ b/python/src/graphar/high_level.py
@@ -15,4 +15,15 @@
# specific language governing permissions and limitations
# under the License.
-from ._core import Type, FileType, AdjListType, Cardinality
+from ._core import (
+ Vertex,
+ Edge,
+ VertexIter,
+ VerticesCollection,
+ EdgeIter,
+ EdgesCollection,
+ BuilderVertex,
+ VerticesBuilder,
+ BuilderEdge,
+ EdgesBuilder,
+)
diff --git a/python/src/graphar/types.py b/python/src/graphar/types.py
index 7a1c1b24..b2c74fda 100644
--- a/python/src/graphar/types.py
+++ b/python/src/graphar/types.py
@@ -15,4 +15,4 @@
# specific language governing permissions and limitations
# under the License.
-from ._core import Type, FileType, AdjListType, Cardinality
+from ._core import ValidateLevel, Type, FileType, AdjListType, Cardinality
diff --git a/python/test/test_high_level_api.py
b/python/test/test_high_level_api.py
new file mode 100644
index 00000000..8ab2bcbb
--- /dev/null
+++ b/python/test/test_high_level_api.py
@@ -0,0 +1,207 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import tempfile
+
+from graphar import GraphInfo
+from graphar.types import AdjListType, ValidateLevel
+from graphar.high_level import (
+ VerticesCollection,
+ EdgesCollection,
+ BuilderVertex,
+ VerticesBuilder,
+ BuilderEdge,
+ EdgesBuilder,
+)
+
+
[email protected]
+def ldbc_sample_graph(test_data_root):
+ return test_data_root + "/ldbc_sample/parquet/ldbc_sample.graph.yml"
+
+
[email protected]
+def sample_graph_info(ldbc_sample_graph):
+ return GraphInfo.load(ldbc_sample_graph)
+
+
[email protected]
+def sample_graph_vertex(sample_graph_info):
+ return sample_graph_info.get_vertex_info("person")
+
+
[email protected]
+def sample_graph_edge(sample_graph_info):
+ return sample_graph_info.get_edge_info("person", "knows", "person")
+
+
+def test_vertices_collection(sample_graph_info):
+ """Test vertices collection reading functionality."""
+ # Construct vertices collection
+ type_name = "person"
+ vertices = VerticesCollection.Make(sample_graph_info, type_name)
+ # Use vertices collection
+ count = 0
+ # Iterate through vertices collection
+ for vertex in vertices:
+ count += 1
+ # Access data through vertex
+ assert vertex.id() >= 0
+ # Try to access properties
+ try:
+ vertex.property("id")
+ vertex.property("firstName")
+ except Exception:
+ pass # Properties might not exist in all test data=
+
+ # Test size
+ assert count == vertices.size()
+
+
+def test_edges_collection(sample_graph_info):
+ """Test edges collection reading functionality."""
+ # Construct edges collection
+ src_type = "person"
+ edge_type = "knows"
+ dst_type = "person"
+ adj_list_type = AdjListType.ordered_by_source
+
+ edges = EdgesCollection.Make(sample_graph_info, src_type, edge_type,
dst_type, adj_list_type)
+
+ # Use edges collection
+ count = 0
+ # Iterate through edges collection
+ for edge in edges:
+ count += 1
+ # Access data through edge
+ assert edge.source() >= 0
+ assert edge.destination() >= 0
+ # Try to access properties
+ try:
+ edge.property("creationDate")
+ except Exception:
+ pass # Properties might not exist in all test data
+
+ # Test size
+ assert count == edges.size()
+
+
+def test_vertices_builder(sample_graph_vertex):
+ """Test vertices builder functionality."""
+ with tempfile.TemporaryDirectory() as temp_dir:
+ # Construct vertices builder
+ start_index = 0
+ builder = VerticesBuilder.Make(sample_graph_vertex, temp_dir,
start_index)
+
+ # Set validate level
+ builder.SetValidateLevel(ValidateLevel.strong_validate)
+
+ # Prepare vertex data
+ vertex_count = 3
+ property_names = ["id", "firstName", "lastName", "gender"]
+ id_values = [0, 1, 2]
+ firstName_values = ["John", "Jane", "Alice"]
+ lastName_values = ["Smith", "Doe", "Wonderland"]
+ gender_values = ["male", "female", "female"]
+
+ # Add vertices
+ for i in range(vertex_count):
+ v = BuilderVertex()
+ v.AddProperty(property_names[0], id_values[i])
+ v.AddProperty(property_names[1], firstName_values[i])
+ v.AddProperty(property_names[2], lastName_values[i])
+ v.AddProperty(property_names[3], gender_values[i])
+ builder.AddVertex(v)
+
+ # Test vertex count
+ assert builder.GetNum() == vertex_count
+
+ # Dump
+ builder.Dump()
+
+ # Clear vertices
+ builder.Clear()
+ assert builder.GetNum() == 0
+
+
+def test_edges_builder(sample_graph_edge):
+ """Test edges builder functionality."""
+ with tempfile.TemporaryDirectory() as temp_dir:
+ # Construct edges builder
+ vertex_count = 3
+ adj_list_type = AdjListType.ordered_by_dest
+ builder = EdgesBuilder.Make(sample_graph_edge, temp_dir,
adj_list_type, vertex_count)
+
+ # Set validate level
+ builder.SetValidateLevel(ValidateLevel.strong_validate)
+
+ # Prepare edge data
+ edge_count = 4
+ src_values = [1, 0, 0, 2]
+ dst_values = [0, 1, 2, 1]
+ creationDate_values = ["2010-01-01", "2011-01-01", "2012-01-01",
"2013-01-01"]
+
+ # Add edges
+ for i in range(edge_count):
+ e = BuilderEdge(src_values[i], dst_values[i])
+ e.AddProperty("creationDate", creationDate_values[i])
+ builder.AddEdge(e)
+
+ # Test edge count
+ assert builder.GetNum() == edge_count
+
+ # Dump
+ builder.Dump()
+
+ # Clear edges
+ builder.Clear()
+ assert builder.GetNum() == 0
+
+
+def test_vertex_iter_operations(sample_graph_info):
+ """Test vertex iterator operations."""
+ # Construct vertices collection
+ type_name = "person"
+ vertices = VerticesCollection.Make(sample_graph_info, type_name)
+
+ # Test iterator operations
+ it = vertices.begin()
+ if it != vertices.end():
+ # Test accessing vertex id through iterator
+ vertex_id = it.id()
+ assert vertex_id >= 0
+
+
+def test_edge_iter_operations(sample_graph_info):
+ """Test edge iterator operations."""
+ # Construct edges collection
+ src_type = "person"
+ edge_type = "knows"
+ dst_type = "person"
+ adj_list_type = AdjListType.ordered_by_source
+
+ edges = EdgesCollection.Make(sample_graph_info, src_type, edge_type,
dst_type, adj_list_type)
+
+ # Test iterator operations
+ it = edges.begin()
+ if it != edges.end():
+ # Test accessing edge source and destination through iterator
+ source = it.source()
+ destination = it.destination()
+ assert source >= 0
+ assert destination >= 0
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]