This is an automated email from the ASF dual-hosted git repository.
xiaokang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new 03b8a322 feat(python): refactor python module(from cli module) (#799)
03b8a322 is described below
commit 03b8a322736dfc56877d0b3c6225c015446f64b3
Author: Xiaokang Yang <[email protected]>
AuthorDate: Thu Nov 6 16:56:42 2025 +0800
feat(python): refactor python module(from cli module) (#799)
* init python library
* break out the API
* rename cli to python
* try to fix
* try to fix
* architecture build
* bind graphInfo
* add version info and graphInfo test
* update workflow yaml
* try run on macos
* try to use python 3.13
* cancel import test in macos
* architecture build
* foramt
* foramt
* format
* build
* add version command
* format
* update workflow yaml
* add license header
* add python component
* rename client to cli
---
.github/ISSUE_TEMPLATE/feature_request.yml | 1 +
.github/workflows/{cli.yml => python.yml} | 69 ++--
.pre-commit-config.yaml | 20 ++
cli/test/merge.py | 103 ------
{cli => python}/.clang-format | 0
{cli => python}/.gitignore | 0
{cli => python}/CMakeLists.txt | 23 +-
python/README.md | 94 +++++
{cli => python}/pyproject.toml | 31 +-
python/src/bindings/_core_module.cc | 33 ++
.../main.cc => python/src/bindings/cli_binding.cc | 8 +-
python/src/bindings/graph_info_binding.cc | 400 +++++++++++++++++++++
{cli/src => python/src/bindings}/importer.h | 2 +-
python/src/bindings/types_binding.cc | 68 ++++
.../src/bindings/utils/import_util.h | 0
python/src/bindings/utils/pybind_util.h | 41 +++
{cli => python/src/cli}/README.md | 6 +-
.../src/graphar_cli => python/src/cli}/__init__.py | 12 +-
.../graphar_cli => python/src/cli}/graphar_cli.py | 85 ++---
.../graphar_cli => python/src/graphar}/__init__.py | 4 +-
.../src/graphar/graph_info.py | 8 +-
.../src/graphar/importer/__init__.py | 8 -
.../src/graphar/importer}/config.py | 0
python/src/graphar/importer/data_import.py | 71 ++++
.../src/graphar/importer}/importer.py | 0
.../graphar_cli => python/src/graphar}/logging.py | 0
.../__init__.py => python/src/graphar/types.py | 6 +-
cli/test/test_basic.py => python/test/conftest.py | 11 +-
python/test/test_graph_info.py | 160 +++++++++
python/test/test_graphar_cli.py | 80 +++++
30 files changed, 1116 insertions(+), 228 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml
b/.github/ISSUE_TEMPLATE/feature_request.yml
index 891902dc..59c888a9 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -41,6 +41,7 @@ body:
- Java
- Spark
- PySpark
+ - Python
- Continuous Integration
- Developer Tools
- Documentation
diff --git a/.github/workflows/cli.yml b/.github/workflows/python.yml
similarity index 71%
rename from .github/workflows/cli.yml
rename to .github/workflows/python.yml
index a2670767..05c213d1 100644
--- a/.github/workflows/cli.yml
+++ b/.github/workflows/python.yml
@@ -15,36 +15,38 @@
# specific language governing permissions and limitations
# under the License.
-name: GraphAr CLI CI
+name: GraphAr Python CI
on:
# Trigger the workflow on push or pull request,
# but only for the main branch
push:
branches:
- - main
+ - "main"
paths:
- 'cpp/**'
- - 'cli/**'
+ - 'python/**'
- '.github/workflows/ci.yml'
- - '.github/workflows/cli.yml'
+ - '.github/workflows/python.yml'
pull_request:
branches:
- - main
+ - "main"
paths:
- 'cpp/**'
- - 'cli/**'
+ - 'python/**'
- '.github/workflows/ci.yml'
- - '.github/workflows/cli.yml'
+ - '.github/workflows/python.yml'
concurrency:
group: ${{ github.repository }}-${{ github.event.number || github.head_ref
|| github.sha }}-${{ github.workflow }}
cancel-in-progress: true
jobs:
ubuntu:
- name: Ubuntu 22.04 CLI
+ name: Ubuntu 22.04 Python
runs-on: ubuntu-latest
- if: ${{ !contains(github.event.pull_request.title, 'WIP') &&
!github.event.pull_request.draft }}
+ env:
+ GAR_TEST_DATA: ${{ github.workspace }}/graphar-testing/
+
steps:
- uses: actions/checkout@v3
with:
@@ -63,17 +65,25 @@ jobs:
libarrow-acero-dev=17.0.0-1 \
libparquet-dev=17.0.0-1
sudo apt-get install -y ccache libcurl4-openssl-dev
+ git clone https://github.com/apache/incubator-graphar-testing.git
$GAR_TEST_DATA --depth 1
- - name: Install GraphAr CLI and Run Tests
- working-directory: "cli"
+ - name: Install GraphAr Python SDK
+ working-directory: "python"
run: |
pip install ./ -v
+ - name: Run Cli Test
+ working-directory: "python"
+ run: |
graphar --help
graphar check -p ../testing/neo4j/MovieGraph.graph.yml
graphar show -p ../testing/neo4j/MovieGraph.graph.yml -v Person
graphar show -p ../testing/neo4j/MovieGraph.graph.yml -es Person -e
ACTED_IN -ed Movie
graphar import -c ../testing/neo4j/data/import.mini.yml
-# TODO: Add unit tests
+
+ - name: Run pytest
+ working-directory: "python"
+ run: |
+ pytest test -v
- name: Upload coverage reports to Codecov
@@ -82,31 +92,44 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
macos:
- name: macos latest CLI
+ name: macos latest Python
runs-on: macos-latest
- # TODO: Remove this when the macos issue is fixed
- if: false
- strategy:
- fail-fast: false
+ env:
+ GAR_TEST_DATA: ${{ github.workspace }}/graphar-testing/
steps:
- uses: actions/checkout@v3
with:
submodules: true
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.13'
+
- name: Install dependencies
run: |
brew bundle --file=cpp/Brewfile
-
-
- - name: Build GraphAr And Run Tests
- working-directory: "cli"
+
+ git clone https://github.com/apache/incubator-graphar-testing.git
$GAR_TEST_DATA --depth 1
+
+ - name: Install GraphAr Python SDK
+ working-directory: "python"
+ run: |
+ pip install ./ -v
+ - name: Run Cli Test
+ working-directory: "python"
run: |
- pip install ./
graphar --help
+ graphar --version
graphar check -p ../testing/neo4j/MovieGraph.graph.yml
graphar show -p ../testing/neo4j/MovieGraph.graph.yml -v Person
graphar show -p ../testing/neo4j/MovieGraph.graph.yml -es Person -e
ACTED_IN -ed Movie
- graphar import -c ../testing/neo4j/data/import.mini.yml
+
+ - name: Run pytest
+ working-directory: "python"
+ run: |
+ pytest test -v
+
# TODO: Add unit tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1a69b525..a5767647 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,26 @@ repos:
- id: gitleaks
args:
- '--verbose'
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.14.0
+ hooks:
+ - id: ruff-check
+ files: ^python/
+ args: [--output-format, github, --fix]
+ - id: ruff-format
+ - repo: https://github.com/crate-ci/typos
+ rev: v1.38.1
+ hooks:
+ - id: typos
+ files: ^python/
+ args: [--force-exclude]
+ - repo: https://github.com/pre-commit/mirrors-clang-format
+ rev: v21.1.2
+ hooks:
+ - id: clang-format
+ files: ^python/
+ types_or: [c++]
+ args: [--style=file, --verbose]
- repo: local
hooks:
diff --git a/cli/test/merge.py b/cli/test/merge.py
deleted file mode 100644
index 53d7a704..00000000
--- a/cli/test/merge.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from enum import Enum
-from pathlib import Path
-from typing import List, Optional
-
-import pandas as pd
-import typer
-from typing_extensions import Annotated
-
-app = typer.Typer(no_args_is_help=True, context_settings={"help_option_names":
["-h", "--help"]})
-
-
-support_file_types = {"parquet", "orc", "csv", "json"}
-
-
-class FileType(str, Enum):
- parquet = "parquet"
- csv = "csv"
- orc = "orc"
- json = "json"
-
-
[email protected](
- "merge",
- context_settings={"help_option_names": ["-h", "--help"]},
- help="Merge source files",
- no_args_is_help=True,
-)
-def merge_data(
- files: Annotated[
- List[str], typer.Option("--file", "-f", help="Files to merge",
show_default=False)
- ],
- output_file: Annotated[
- str, typer.Option("--output", "-o", help="Output file",
show_default=False)
- ],
- type: Annotated[
- Optional[FileType], typer.Option("--type", "-t", help="Type of data to
output", show_default=False)
- ] = None,
-):
- if not files:
- typer.echo("No files to merge")
- raise typer.Exit(1)
- if not output_file:
- typer.echo("No output file")
- raise typer.Exit(1)
- data = []
- for file in files:
- path = Path(file)
- if not path.is_file():
- typer.echo(f"File {file} not found")
- raise typer.Exit(1)
- file_type = path.suffix.removeprefix(".")
- if file_type == "":
- typer.echo(f"File {file} has no file type suffix")
- raise typer.Exit(1)
- if file_type not in support_file_types:
- typer.echo(f"File type {file_type} not supported")
- raise typer.Exit(1)
- if file_type == "parquet":
- data.append(pd.read_parquet(file))
- elif file_type == "csv":
- data.append(pd.read_csv(file))
- elif file_type == "orc":
- data.append(pd.read_orc(file))
- elif file_type == "json":
- data.append(pd.read_json(file))
- output_path = Path(output_file)
- if output_path.is_file():
- typer.echo(f"Output file {output_file} already exists")
- if not typer.prompt("Do you want to overwrite it?", default=False):
- raise typer.Exit(1)
- if not type:
- type = output_path.suffix.removeprefix(".")
- result = pd.concat(data, ignore_index=True)
- if type == "parquet":
- result.to_parquet(output_file)
- elif type == "csv":
- result.to_csv(output_file)
- elif type == "orc":
- result.to_orc(output_file)
- elif type == "json":
- result.to_json(output_file, orient="records", lines=True)
- typer.echo(f"Data merged to {output_file}")
-
-
-if __name__ == "__main__":
- app()
diff --git a/cli/.clang-format b/python/.clang-format
similarity index 100%
rename from cli/.clang-format
rename to python/.clang-format
diff --git a/cli/.gitignore b/python/.gitignore
similarity index 100%
rename from cli/.gitignore
rename to python/.gitignore
diff --git a/cli/CMakeLists.txt b/python/CMakeLists.txt
similarity index 75%
rename from cli/CMakeLists.txt
rename to python/CMakeLists.txt
index 21caa61f..5bb7ccd5 100644
--- a/cli/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -45,8 +45,11 @@ else()
endif()
# Add a library using FindPython's tooling (pybind11 also provides a helper
like
# this)
-python_add_library(_core MODULE src/main.cc WITH_SOABI)
-
+python_add_library(_core MODULE src/bindings/_core_module.cc
+ src/bindings/cli_binding.cc
+ src/bindings/graph_info_binding.cc
+ src/bindings/types_binding.cc WITH_SOABI)
+
target_link_libraries(_core PRIVATE pybind11::headers graphar
Arrow::arrow_shared
Parquet::parquet_shared
ArrowDataset::arrow_dataset_shared
@@ -60,6 +63,16 @@ target_include_directories(_core PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/../cpp/thir
target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION})
# The install directory is the output (wheel) directory
-set_target_properties(_core PROPERTIES INSTALL_RPATH "$ORIGIN")
-install(TARGETS graphar DESTINATION graphar_cli)
-install(TARGETS _core DESTINATION graphar_cli)
+# Use platform-appropriate rpath so the Python extension can find the
+# packaged libgraphar at runtime inside the wheel.
+if(APPLE)
+ # macOS uses @loader_path to find libraries relative to the module
+ set_target_properties(_core PROPERTIES INSTALL_RPATH "@loader_path")
+else()
+ # On Linux and other Unix, use $ORIGIN (escaped so CMake preserves the $)
+ set_target_properties(_core PROPERTIES INSTALL_RPATH "\$ORIGIN")
+endif()
+
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/cli/ DESTINATION cli)
+install(TARGETS graphar DESTINATION graphar)
+install(TARGETS _core DESTINATION graphar)
\ No newline at end of file
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 00000000..78fecbca
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,94 @@
+# GraphAr Python SDK
+
+GraphAr Python SDK provides Python bindings for the GraphAr C++ library,
allowing user to work with GraphAr formatted graph data in Python environments.
It includes both a high-level API for data manipulation and a command-line
interface for common operations.
+
+## Installation
+
+### Prerequisites
+
+- Python >= 3.7
+- pip (latest version recommended)
+- CMake >= 3.15 (for building from source)
+- Apache Arrow >= 12.0 (for building from source)
+
+### Install from Source
+
+Clone the repository and install the Python package:
+
+```bash
+git clone https://github.com/apache/incubator-graphar.git
+cd incubator-graphar
+pip install ./python
+```
+
+For verbose output during installation:
+
+```bash
+pip install -v ./python
+```
+
+### Using Docker (Recommended)
+
+The easiest way to get started is by using our pre-configured Docker
environment:
+
+```bash
+docker run -it ghcr.io/apache/graphar-dev
+```
+
+## Quick Start
+
+### Importing the Package
+
+After installation, you can import the GraphAr Python SDK in your Python
scripts:
+
+```python
+import graphar
+```
+
+### Basic Usage
+
+Loading graph information:
+
+```python
+import graphar
+
+# Load graph info from a YAML file
+graph_info = graphar.graph_info.GraphInfo.load("path/to/graph.yaml")
+
+# Access vertex information
+vertex_info = graph_info.get_vertex_info("person")
+print(f"Vertex type: {vertex_info.get_type()}")
+
+# Access edge information
+edge_info = graph_info.get_edge_info("person", "knows", "person")
+print(f"Edge type: {edge_info.get_edge_type()}")
+```
+
+## Command-Line Interface
+
+GraphAr Python SDK also provides a command-line interface for common
operations such as checking metadata, showing graph information, and importing
data.
+
+For detailed information about the CLI functionality, please see [CLI
Documentation](src/cli/README.md).
+
+## API Documentation
+
+The Python SDK exposes the core GraphAr functionality through several modules:
+
+- `graphar.graph_info`: Main API for working with graph, vertex, and edge
information
+
+## Examples
+> [!NOTE]
+> under development.
+
+You can find various examples in the [examples directory](../cpp/examples/)
which demonstrate usage of the underlying C++ library. These concepts translate
directly to the Python SDK.
+
+## Development
+
+To contribute to the Python SDK, please follow the guidelines in the main
[CONTRIBUTING.md](../CONTRIBUTING.md) file.
+
+## License
+
+**GraphAr** is distributed under [Apache License
+2.0](https://github.com/apache/incubator-graphar/blob/main/LICENSE).
+Please note that third-party libraries may not have the same license as
+GraphAr.
diff --git a/cli/pyproject.toml b/python/pyproject.toml
similarity index 55%
rename from cli/pyproject.toml
rename to python/pyproject.toml
index 636a90b7..fb8b3f38 100644
--- a/cli/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,27 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
[build-system]
requires = ["scikit-build-core>=0.3.3", "pybind11", "ninja ~= 1.11"]
build-backend = "scikit_build_core.build"
[project]
-name = "graphar_cli"
-version = "0.0.1"
+name = "graphar"
+version = "0.13.0"
description = "GraphAr command line tool"
readme = "README.md"
authors = [{ name = "GraphAr community", email = "[email protected]" }]
requires-python = ">=3.7"
-dependencies = ["typer ~= 0.1", "pydantic ~= 2.0, < 2.7", "pyyaml ~= 6.0"]
+dependencies = ["typer ~= 0.1", "pydantic ~= 2.0, < 2.12", "pyyaml ~= 6.0",
"pytest ~= 7.2"]
+
[project.optional-dependencies]
test = ["pandas ~= 2.0", "typing_extensions ~= 4.0"]
-[project.scripts]
-graphar = "graphar_cli.graphar_cli:main"
-
-
[tool.scikit-build]
build-dir = "build"
+[project.scripts]
+graphar = "cli.graphar_cli:main"
+
[tool.ruff]
src = ["src"]
line-length = 100
diff --git a/python/src/bindings/_core_module.cc
b/python/src/bindings/_core_module.cc
new file mode 100644
index 00000000..50452e24
--- /dev/null
+++ b/python/src/bindings/_core_module.cc
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <pybind11/pybind11.h>
+
+// Declare binding functions
+extern "C" void bind_graph_info(pybind11::module_& m);
+extern "C" void bind_types(pybind11::module_& m);
+extern "C" void bind_cli(pybind11::module_& m);
+
+PYBIND11_MODULE(_core, m) {
+ m.doc() = "GraphAr core Python bindings";
+
+ bind_types(m);
+ bind_graph_info(m);
+ bind_cli(m);
+}
\ No newline at end of file
diff --git a/cli/src/main.cc b/python/src/bindings/cli_binding.cc
similarity index 97%
rename from cli/src/main.cc
rename to python/src/bindings/cli_binding.cc
index 4a0b0346..0fbc9957 100644
--- a/cli/src/main.cc
+++ b/python/src/bindings/cli_binding.cc
@@ -152,8 +152,10 @@ std::vector<std::vector<std::string>> GetEdgeTypes(const
std::string& path) {
}
namespace py = pybind11;
-PYBIND11_MODULE(_core, m) {
- m.doc() = "GraphAr Python bindings";
+
+// Changed from PYBIND11_MODULE to a regular function
+extern "C" void bind_cli(pybind11::module_& m) {
+ // CLI-level convenience functions
m.def("show_graph", &ShowGraph, "Show the graph info");
m.def("show_vertex", &ShowVertex, "Show the vertex info");
m.def("show_edge", &ShowEdge, "Show the edge info");
@@ -170,4 +172,4 @@ PYBIND11_MODULE(_core, m) {
#else
m.attr("__version__") = "dev";
#endif
-}
+}
\ No newline at end of file
diff --git a/python/src/bindings/graph_info_binding.cc
b/python/src/bindings/graph_info_binding.cc
new file mode 100644
index 00000000..e4475b8f
--- /dev/null
+++ b/python/src/bindings/graph_info_binding.cc
@@ -0,0 +1,400 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+#include "utils/pybind_util.h"
+
+#include "graphar/graph_info.h"
+#include "graphar/types.h"
+#include "graphar/version_parser.h"
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace py = pybind11;
+
+// Changed from PYBIND11_MODULE to a regular function
+extern "C" void bind_graph_info(pybind11::module_& m) {
+ // Minimal binding for DataType so pybind11 recognizes
+ // std::shared_ptr<graphar::DataType> used in Property constructor defaults.
+ py::class_<graphar::DataType, std::shared_ptr<graphar::DataType>>(m,
+ "DataType")
+ .def(py::init<>())
+ .def(py::init<graphar::Type>())
+ .def("id", &graphar::DataType::id)
+ .def("to_type_name", &graphar::DataType::ToTypeName);
+
+ // Bind InfoVersion
+ py::class_<graphar::InfoVersion, std::shared_ptr<graphar::InfoVersion>>(
+ m, "InfoVersion")
+ .def(py::init<>())
+ .def(py::init<int>(), py::arg("version"))
+ .def(py::init<int, const std::vector<std::string>&>(),
py::arg("version"),
+ py::arg("user_define_types"))
+ .def("get_version", &graphar::InfoVersion::version)
+ .def("get_user_define_types", &graphar::InfoVersion::user_define_types,
+ py::return_value_policy::reference_internal)
+ .def("to_string", &graphar::InfoVersion::ToString)
+ .def("check_type", &graphar::InfoVersion::CheckType)
+ .def_static("parse",
+ [](const std::string& str) {
+ return ThrowOrReturn(graphar::InfoVersion::Parse(str));
+ })
+ .def("__eq__",
+ [](const graphar::InfoVersion& self,
+ const graphar::InfoVersion& other) { return self == other; });
+
+ // Bind Property
+ py::class_<graphar::Property>(m, "Property")
+ .def(py::init<>())
+ .def(py::init<const std::string&,
+ const std::shared_ptr<graphar::DataType>&, bool, bool,
+ graphar::Cardinality>(),
+ py::arg("name"), py::arg("type") = nullptr,
+ py::arg("is_primary") = false, py::arg("is_nullable") = true,
+ py::arg("cardinality") = graphar::Cardinality::SINGLE)
+ .def_readwrite("name", &graphar::Property::name)
+ .def_readwrite("type", &graphar::Property::type)
+ .def_readwrite("is_primary", &graphar::Property::is_primary)
+ .def_readwrite("is_nullable", &graphar::Property::is_nullable)
+ .def_readwrite("cardinality", &graphar::Property::cardinality);
+
+ // Bind PropertyGroup
+ py::class_<graphar::PropertyGroup, std::shared_ptr<graphar::PropertyGroup>>(
+ m, "PropertyGroup")
+ .def(py::init<const std::vector<graphar::Property>&, graphar::FileType,
+ const std::string&>(),
+ py::arg("properties"), py::arg("file_type"), py::arg("prefix") = "")
+ .def("get_properties", &graphar::PropertyGroup::GetProperties,
+ py::return_value_policy::reference_internal)
+ .def("has_property", &graphar::PropertyGroup::HasProperty)
+ .def("get_file_type", &graphar::PropertyGroup::GetFileType)
+ .def("get_prefix", &graphar::PropertyGroup::GetPrefix)
+ .def("is_validated", &graphar::PropertyGroup::IsValidated);
+
+ // Bind AdjacentList
+ py::class_<graphar::AdjacentList, std::shared_ptr<graphar::AdjacentList>>(
+ m, "AdjacentList")
+ .def(py::init<graphar::AdjListType, graphar::FileType,
+ const std::string&>(),
+ py::arg("type"), py::arg("file_type"), py::arg("prefix") = "")
+ .def("get_type", &graphar::AdjacentList::GetType)
+ .def("get_file_type", &graphar::AdjacentList::GetFileType)
+ .def("get_prefix", &graphar::AdjacentList::GetPrefix)
+ .def("is_validated", &graphar::AdjacentList::IsValidated);
+
+ // Bind VertexInfo
+ py::class_<graphar::VertexInfo, std::shared_ptr<graphar::VertexInfo>>(
+ m, "VertexInfo")
+ .def(py::init<const std::string&, graphar::IdType,
+ const
std::vector<std::shared_ptr<graphar::PropertyGroup>>&,
+ const std::vector<std::string>&, const std::string&,
+ std::shared_ptr<const graphar::InfoVersion>>(),
+ py::arg("type"), py::arg("chunk_size"), py::arg("property_groups"),
+ py::arg("labels") = std::vector<std::string>(),
+ py::arg("prefix") = "", py::arg("version") = nullptr)
+ .def("add_property_group",
+ [](const graphar::VertexInfo& self,
+ std::shared_ptr<graphar::PropertyGroup> property_group) {
+ return ThrowOrReturn(self.AddPropertyGroup(property_group));
+ })
+ .def("remove_property_group",
+ [](const graphar::VertexInfo& self,
+ std::shared_ptr<graphar::PropertyGroup> property_group) {
+ return ThrowOrReturn(self.RemovePropertyGroup(property_group));
+ })
+ .def("get_type", &graphar::VertexInfo::GetType,
+ py::return_value_policy::reference_internal)
+ .def("get_chunk_size", &graphar::VertexInfo::GetChunkSize)
+ .def("get_prefix", &graphar::VertexInfo::GetPrefix,
+ py::return_value_policy::reference_internal)
+ .def("version", &graphar::VertexInfo::version)
+ .def("get_labels", &graphar::VertexInfo::GetLabels,
+ py::return_value_policy::reference_internal)
+ .def("property_group_num", &graphar::VertexInfo::PropertyGroupNum)
+ .def("get_property_groups", &graphar::VertexInfo::GetPropertyGroups,
+ py::return_value_policy::reference_internal)
+ .def("get_property_group",
+ [](const graphar::VertexInfo& self,
+ const std::string& property_name) {
+ return self.GetPropertyGroup(property_name);
+ })
+ .def("get_property_group_by_index",
+ [](const graphar::VertexInfo& self, int index) {
+ return self.GetPropertyGroupByIndex(index);
+ })
+ .def("get_property_type",
+ [](const graphar::VertexInfo& self,
+ const std::string& property_name) {
+ return ThrowOrReturn(self.GetPropertyType(property_name));
+ })
+ .def("get_property_cardinality",
+ [](const graphar::VertexInfo& self,
+ const std::string& property_name) {
+ return ThrowOrReturn(self.GetPropertyCardinality(property_name));
+ })
+ .def("has_property", &graphar::VertexInfo::HasProperty)
+ .def("save",
+ [](const graphar::VertexInfo& self, const std::string& file_name) {
+ CheckStatus(self.Save(file_name));
+ })
+ .def("dump",
+ [](const graphar::VertexInfo& self) {
+ return ThrowOrReturn(self.Dump());
+ })
+ .def("is_primary_key", &graphar::VertexInfo::IsPrimaryKey)
+ .def("is_nullable_key", &graphar::VertexInfo::IsNullableKey)
+ .def("has_property_group", &graphar::VertexInfo::HasPropertyGroup)
+ .def(
+ "get_file_path",
+ [](const graphar::VertexInfo& self,
+ std::shared_ptr<graphar::PropertyGroup> property_group,
+ graphar::IdType chunk_index) {
+ return ThrowOrReturn(self.GetFilePath(property_group,
chunk_index));
+ })
+ .def("get_path_prefix",
+ [](const graphar::VertexInfo& self,
+ std::shared_ptr<graphar::PropertyGroup> property_group) {
+ return ThrowOrReturn(self.GetPathPrefix(property_group));
+ })
+ .def("get_vertices_num_file_path",
+ [](const graphar::VertexInfo& self) {
+ return ThrowOrReturn(self.GetVerticesNumFilePath());
+ })
+ .def("is_validated", &graphar::VertexInfo::IsValidated);
+
+ // Bind EdgeInfo
+ py::class_<graphar::EdgeInfo, std::shared_ptr<graphar::EdgeInfo>>(m,
+ "EdgeInfo")
+ .def(py::init<const std::string&, const std::string&, const std::string&,
+ graphar::IdType, graphar::IdType, graphar::IdType, bool,
+ const std::vector<std::shared_ptr<graphar::AdjacentList>>&,
+ const
std::vector<std::shared_ptr<graphar::PropertyGroup>>&,
+ const std::string&,
+ std::shared_ptr<const graphar::InfoVersion>>(),
+ py::arg("src_type"), py::arg("edge_type"), py::arg("dst_type"),
+ py::arg("chunk_size"), py::arg("src_chunk_size"),
+ py::arg("dst_chunk_size"), py::arg("directed"),
+ py::arg("adjacent_lists"), py::arg("property_groups"),
+ py::arg("prefix") = "", py::arg("version") = nullptr)
+ .def("add_adjacent_list",
+ [](const graphar::EdgeInfo& self,
+ std::shared_ptr<graphar::AdjacentList> adj_list) {
+ return ThrowOrReturn(self.AddAdjacentList(adj_list));
+ })
+ .def("remove_adjacent_list",
+ [](const graphar::EdgeInfo& self,
+ std::shared_ptr<graphar::AdjacentList> adj_list) {
+ return ThrowOrReturn(self.RemoveAdjacentList(adj_list));
+ })
+ .def("add_property_group",
+ [](const graphar::EdgeInfo& self,
+ std::shared_ptr<graphar::PropertyGroup> property_group) {
+ return ThrowOrReturn(self.AddPropertyGroup(property_group));
+ })
+ .def("remove_property_group",
+ [](const graphar::EdgeInfo& self,
+ std::shared_ptr<graphar::PropertyGroup> property_group) {
+ return ThrowOrReturn(self.RemovePropertyGroup(property_group));
+ })
+ .def("get_src_type", &graphar::EdgeInfo::GetSrcType,
+ py::return_value_policy::reference_internal)
+ .def("get_edge_type", &graphar::EdgeInfo::GetEdgeType,
+ py::return_value_policy::reference_internal)
+ .def("get_dst_type", &graphar::EdgeInfo::GetDstType,
+ py::return_value_policy::reference_internal)
+ .def("get_chunk_size", &graphar::EdgeInfo::GetChunkSize)
+ .def("get_src_chunk_size", &graphar::EdgeInfo::GetSrcChunkSize)
+ .def("get_dst_chunk_size", &graphar::EdgeInfo::GetDstChunkSize)
+ .def("get_prefix", &graphar::EdgeInfo::GetPrefix,
+ py::return_value_policy::reference_internal)
+ .def("is_directed", &graphar::EdgeInfo::IsDirected)
+ .def("version", &graphar::EdgeInfo::version)
+ .def("has_adjacent_list_type", &graphar::EdgeInfo::HasAdjacentListType)
+ .def("has_property", &graphar::EdgeInfo::HasProperty)
+ .def("has_property_group", &graphar::EdgeInfo::HasPropertyGroup)
+ .def("get_adjacent_list", &graphar::EdgeInfo::GetAdjacentList)
+ .def("property_group_num", &graphar::EdgeInfo::PropertyGroupNum)
+ .def("get_property_groups", &graphar::EdgeInfo::GetPropertyGroups,
+ py::return_value_policy::reference_internal)
+ .def("get_property_group",
+ [](const graphar::EdgeInfo& self, const std::string& property) {
+ return self.GetPropertyGroup(property);
+ })
+ .def("get_property_group_by_index",
+ [](const graphar::EdgeInfo& self, int index) {
+ return self.GetPropertyGroupByIndex(index);
+ })
+ .def("get_vertices_num_file_path",
+ [](const graphar::EdgeInfo& self,
+ graphar::AdjListType adj_list_type) {
+ return ThrowOrReturn(self.GetVerticesNumFilePath(adj_list_type));
+ })
+ .def("get_edges_num_file_path",
+ [](const graphar::EdgeInfo& self, graphar::IdType
vertex_chunk_index,
+ graphar::AdjListType adj_list_type) {
+ return ThrowOrReturn(
+ self.GetEdgesNumFilePath(vertex_chunk_index, adj_list_type));
+ })
+ .def("get_adj_list_file_path",
+ [](const graphar::EdgeInfo& self, graphar::IdType
vertex_chunk_index,
+ graphar::IdType edge_chunk_index,
+ graphar::AdjListType adj_list_type) {
+ return ThrowOrReturn(self.GetAdjListFilePath(
+ vertex_chunk_index, edge_chunk_index, adj_list_type));
+ })
+ .def("get_adj_list_path_prefix",
+ [](const graphar::EdgeInfo& self,
+ graphar::AdjListType adj_list_type) {
+ return ThrowOrReturn(self.GetAdjListPathPrefix(adj_list_type));
+ })
+ .def("get_adj_list_offset_file_path",
+ [](const graphar::EdgeInfo& self, graphar::IdType
vertex_chunk_index,
+ graphar::AdjListType adj_list_type) {
+ return ThrowOrReturn(self.GetAdjListOffsetFilePath(
+ vertex_chunk_index, adj_list_type));
+ })
+ .def("get_offset_path_prefix",
+ [](const graphar::EdgeInfo& self,
+ graphar::AdjListType adj_list_type) {
+ return ThrowOrReturn(self.GetOffsetPathPrefix(adj_list_type));
+ })
+ .def("get_property_file_path",
+ [](const graphar::EdgeInfo& self,
+ const std::shared_ptr<graphar::PropertyGroup>& property_group,
+ graphar::AdjListType adj_list_type,
+ graphar::IdType vertex_chunk_index,
+ graphar::IdType edge_chunk_index) {
+ return ThrowOrReturn(self.GetPropertyFilePath(
+ property_group, adj_list_type, vertex_chunk_index,
+ edge_chunk_index));
+ })
+ .def("get_property_group_path_prefix",
+ [](const graphar::EdgeInfo& self,
+ const std::shared_ptr<graphar::PropertyGroup>& property_group,
+ graphar::AdjListType adj_list_type) {
+ return ThrowOrReturn(self.GetPropertyGroupPathPrefix(
+ property_group, adj_list_type));
+ })
+ .def("get_property_type",
+ [](const graphar::EdgeInfo& self, const std::string& property_name)
{
+ return ThrowOrReturn(self.GetPropertyType(property_name));
+ })
+ .def("is_primary_key", &graphar::EdgeInfo::IsPrimaryKey)
+ .def("is_nullable_key", &graphar::EdgeInfo::IsNullableKey)
+ .def("save",
+ [](const graphar::EdgeInfo& self, const std::string& file_name) {
+ CheckStatus(self.Save(file_name));
+ })
+ .def("dump",
+ [](const graphar::EdgeInfo& self) {
+ return ThrowOrReturn(self.Dump());
+ })
+ .def("is_validated", &graphar::EdgeInfo::IsValidated);
+
+ // Bind GraphInfo
+ py::class_<graphar::GraphInfo, std::shared_ptr<graphar::GraphInfo>>(
+ m, "GraphInfo")
+ .def(py::init<const std::string&,
+ const std::vector<std::shared_ptr<graphar::VertexInfo>>&,
+ const std::vector<std::shared_ptr<graphar::EdgeInfo>>&,
+ const std::vector<std::string>&, const std::string&,
+ std::shared_ptr<const graphar::InfoVersion>,
+ const std::unordered_map<std::string, std::string>&>(),
+ py::arg("graph_name"), py::arg("vertex_infos"),
+ py::arg("edge_infos"),
+ py::arg("labels") = std::vector<std::string>(),
+ py::arg("prefix") = "./", py::arg("version") = nullptr,
+ py::arg("extra_info") =
+ std::unordered_map<std::string, std::string>())
+ .def_static("load",
+ [](const std::string& path) {
+ return ThrowOrReturn(graphar::GraphInfo::Load(path));
+ })
+ .def_static(
+ "load",
+ [](const std::string& input, const std::string& relative_path) {
+ return ThrowOrReturn(
+ graphar::GraphInfo::Load(input, relative_path));
+ })
+ .def("add_vertex",
+ [](const graphar::GraphInfo& self,
+ std::shared_ptr<graphar::VertexInfo> vertex_info) {
+ return ThrowOrReturn(self.AddVertex(vertex_info));
+ })
+ .def("remove_vertex",
+ [](const graphar::GraphInfo& self,
+ std::shared_ptr<graphar::VertexInfo> vertex_info) {
+ return ThrowOrReturn(self.RemoveVertex(vertex_info));
+ })
+ .def("add_edge",
+ [](const graphar::GraphInfo& self,
+ std::shared_ptr<graphar::EdgeInfo> edge_info) {
+ return ThrowOrReturn(self.AddEdge(edge_info));
+ })
+ .def("remove_edge",
+ [](const graphar::GraphInfo& self,
+ std::shared_ptr<graphar::EdgeInfo> edge_info) {
+ return ThrowOrReturn(self.RemoveEdge(edge_info));
+ })
+ .def("get_name", &graphar::GraphInfo::GetName,
+ py::return_value_policy::reference_internal)
+ .def("get_labels", &graphar::GraphInfo::GetLabels,
+ py::return_value_policy::reference_internal)
+ .def("get_prefix", &graphar::GraphInfo::GetPrefix,
+ py::return_value_policy::reference_internal)
+ .def("version", &graphar::GraphInfo::version)
+ .def("get_extra_info", &graphar::GraphInfo::GetExtraInfo,
+ py::return_value_policy::reference_internal)
+ .def("get_vertex_info",
+ [](const graphar::GraphInfo& self, const std::string& type) {
+ return self.GetVertexInfo(type);
+ })
+ .def("get_edge_info",
+ [](const graphar::GraphInfo& self, const std::string& src_type,
+ const std::string& edge_type, const std::string& dst_type) {
+ return self.GetEdgeInfo(src_type, edge_type, dst_type);
+ })
+ .def("get_vertex_info_index", &graphar::GraphInfo::GetVertexInfoIndex)
+ .def("get_edge_info_index", &graphar::GraphInfo::GetEdgeInfoIndex)
+ .def("vertex_info_num", &graphar::GraphInfo::VertexInfoNum)
+ .def("edge_info_num", &graphar::GraphInfo::EdgeInfoNum)
+ .def("get_vertex_info_by_index",
+ [](const graphar::GraphInfo& self, int index) {
+ return self.GetVertexInfoByIndex(index);
+ })
+ .def("get_edge_info_by_index",
+ [](const graphar::GraphInfo& self, int index) {
+ return self.GetEdgeInfoByIndex(index);
+ })
+ .def("get_vertex_infos", &graphar::GraphInfo::GetVertexInfos,
+ py::return_value_policy::reference_internal)
+ .def("get_edge_infos", &graphar::GraphInfo::GetEdgeInfos,
+ py::return_value_policy::reference_internal)
+ .def("save",
+ [](const graphar::GraphInfo& self, const std::string& path) {
+ CheckStatus(self.Save(path));
+ })
+ .def("dump",
+ [](const graphar::GraphInfo& self) {
+ return ThrowOrReturn(self.Dump());
+ })
+ .def("is_validated", &graphar::GraphInfo::IsValidated);
+} // namespace graphar
\ No newline at end of file
diff --git a/cli/src/importer.h b/python/src/bindings/importer.h
similarity index 99%
rename from cli/src/importer.h
rename to python/src/bindings/importer.h
index 11e4a6bd..dc1abf1a 100644
--- a/cli/src/importer.h
+++ b/python/src/bindings/importer.h
@@ -30,7 +30,7 @@
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
-#include "util.h"
+#include "utils/import_util.h"
namespace py = pybind11;
namespace fs = std::filesystem;
diff --git a/python/src/bindings/types_binding.cc
b/python/src/bindings/types_binding.cc
new file mode 100644
index 00000000..9d39e7f2
--- /dev/null
+++ b/python/src/bindings/types_binding.cc
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+
+#include "graphar/types.h"
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace py = pybind11;
+
+// Changed from PYBIND11_MODULE to a regular function
+extern "C" void bind_types(pybind11::module_& m) {
+ // Bind Type enum
+ py::enum_<graphar::Type>(m, "Type")
+ .value("BOOL", graphar::Type::BOOL)
+ .value("INT32", graphar::Type::INT32)
+ .value("INT64", graphar::Type::INT64)
+ .value("FLOAT", graphar::Type::FLOAT)
+ .value("DOUBLE", graphar::Type::DOUBLE)
+ .value("STRING", graphar::Type::STRING)
+ .value("LIST", graphar::Type::LIST)
+ .value("DATE", graphar::Type::DATE)
+ .value("TIMESTAMP", graphar::Type::TIMESTAMP)
+ .value("USER_DEFINED", graphar::Type::USER_DEFINED)
+ .export_values();
+
+ // Bind FileType enum
+ py::enum_<graphar::FileType>(m, "FileType")
+ .value("CSV", graphar::FileType::CSV)
+ .value("PARQUET", graphar::FileType::PARQUET)
+ .value("ORC", graphar::FileType::ORC)
+ .value("JSON", graphar::FileType::JSON)
+ .export_values();
+
+ // Bind AdjListType enum
+ py::enum_<graphar::AdjListType>(m, "AdjListType")
+ .value("unordered_by_source", graphar::AdjListType::unordered_by_source)
+ .value("unordered_by_dest", graphar::AdjListType::unordered_by_dest)
+ .value("ordered_by_source", graphar::AdjListType::ordered_by_source)
+ .value("ordered_by_dest", graphar::AdjListType::ordered_by_dest)
+ .export_values();
+
+ // Bind Cardinality enum
+ py::enum_<graphar::Cardinality>(m, "Cardinality")
+ .value("SINGLE", graphar::Cardinality::SINGLE)
+ .value("LIST", graphar::Cardinality::LIST)
+ .value("SET", graphar::Cardinality::SET)
+ .export_values();
+} // namespace graphar
\ No newline at end of file
diff --git a/cli/src/util.h b/python/src/bindings/utils/import_util.h
similarity index 100%
rename from cli/src/util.h
rename to python/src/bindings/utils/import_util.h
diff --git a/python/src/bindings/utils/pybind_util.h
b/python/src/bindings/utils/pybind_util.h
new file mode 100644
index 00000000..6728900e
--- /dev/null
+++ b/python/src/bindings/utils/pybind_util.h
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+#include "graphar/fwd.h"
+
+// Helper function to convert Status to Python exception
+inline void CheckStatus(const graphar::Status& status) {
+ if (!status.ok()) {
+ PyErr_SetString(PyExc_ValueError, status.message().c_str());
+ throw pybind11::error_already_set();
+ }
+}
+
+template <typename T>
+T ThrowOrReturn(const graphar::Result<T>& result) {
+ if (result.has_error()) {
+ // TODO(yxk) handle different error type
+ PyErr_SetString(PyExc_ValueError, result.status().message().c_str());
+ throw pybind11::error_already_set();
+ }
+ return result.value();
+}
\ No newline at end of file
diff --git a/cli/README.md b/python/src/cli/README.md
similarity index 92%
rename from cli/README.md
rename to python/src/cli/README.md
index 0f2ac54f..22e68b1d 100644
--- a/cli/README.md
+++ b/python/src/cli/README.md
@@ -1,6 +1,6 @@
-# GraphAr Cli
+# GraphAr Python CLI
-GraphAr Cli uses [pybind11][] and [scikit-build-core][] to bind C++ code into
Python and build command line tools through Python. Command line tools
developed using [typer][].
+GraphAr python cli uses [pybind11][] and [scikit-build-core][] to bind C++
code into Python and build command line tools through Python. Command line
tools developed using [typer][].
[pybind11]: https://pybind11.readthedocs.io
[scikit-build-core]: https://scikit-build-core.readthedocs.io
@@ -22,7 +22,7 @@ And using Python in conda or venv is a good choice.
## Installation
- Clone this repository
-- `pip install ./cli` or set verbose level `pip install -v ./cli`
+- `pip install ./python` or set verbose level `pip install -v ./python`
## Usage
diff --git a/cli/src/graphar_cli/__init__.py b/python/src/cli/__init__.py
similarity index 77%
copy from cli/src/graphar_cli/__init__.py
copy to python/src/cli/__init__.py
index e8091abd..09f891db 100644
--- a/cli/src/graphar_cli/__init__.py
+++ b/python/src/cli/__init__.py
@@ -15,8 +15,14 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import annotations
+"""CLI module for GraphAr."""
-from ._core import __doc__, __version__
+try:
+ from importlib import metadata
+except ImportError:
+ import importlib_metadata as metadata
-__all__ = ["__doc__", "__version__"]
+try:
+ __version__ = metadata.version("graphar")
+except Exception:
+ __version__ = "unknown"
diff --git a/cli/src/graphar_cli/graphar_cli.py b/python/src/cli/graphar_cli.py
similarity index 65%
rename from cli/src/graphar_cli/graphar_cli.py
rename to python/src/cli/graphar_cli.py
index 1c5be2e6..c7c1fc62 100644
--- a/cli/src/graphar_cli/graphar_cli.py
+++ b/python/src/cli/graphar_cli.py
@@ -17,16 +17,11 @@
from logging import getLogger
from pathlib import Path
-from typing import List
+from typing import List, Optional
import typer
-import yaml
-from ._core import ( # type: ignore # noqa: PGH003
- check_edge,
- check_graph,
- check_vertex,
- do_import,
+from graphar._core import (
get_edge_count,
get_edge_types,
get_vertex_count,
@@ -35,9 +30,11 @@ from ._core import ( # type: ignore # noqa: PGH003
show_graph,
show_vertex,
)
-from .config import ImportConfig
-from .importer import validate
-from .logging import setup_logging
+from graphar.logging import setup_logging
+
+from graphar.importer import data_import
+
+from . import __version__
app = typer.Typer(
help="GraphAr Cli",
@@ -47,7 +44,21 @@ app = typer.Typer(
)
setup_logging()
-logger = getLogger(__name__)
+logger = getLogger("graphar_cli")
+
+
[email protected](invoke_without_command=True)
+def _callback(
+ ctx: typer.Context,
+ version: Optional[bool] = typer.Option(
+ False, "--version", "-v", help="Show GraphAr version and exit",
is_eager=True
+ ),
+):
+ """Top-level callback to support global options like --version."""
+ if version:
+ # Print version and exit immediately
+ typer.echo(f"GraphAr CLI Version: {__version__}")
+ raise typer.Exit()
@app.command(
@@ -109,33 +120,12 @@ def show(
def check(
path: str = typer.Option(None, "--path", "-p", help="Path to the GraphAr
config file"),
):
- if not Path(path).exists():
- logger.error("File not found: %s", path)
- raise typer.Exit(1)
- path = Path(path).resolve() if Path(path).is_absolute() else
Path(Path.cwd(), path).resolve()
- path = str(path)
- vertex_types = get_vertex_types(path)
- for vertex_type in vertex_types:
- if not check_vertex(path, vertex_type):
- logger.error("Vertex type %s is not valid", vertex_type)
- raise typer.Exit(1)
- edge_types = get_edge_types(path)
- for edge_type in edge_types:
- if edge_type[0] not in vertex_types:
- logger.error("Source vertex type %s not found in the graph",
edge_type[0])
- raise typer.Exit(1)
- if edge_type[2] not in vertex_types:
- logger.error("Destination vertex type %s not found in the graph",
edge_type[2])
- raise typer.Exit(1)
- if not check_edge(path, edge_type[0], edge_type[1], edge_type[2]):
- logger.error(
- "Edge type %s_%s_%s is not valid", edge_type[0], edge_type[1],
edge_type[2]
- )
- raise typer.Exit(1)
- if not check_graph(path):
- logger.error("Graph is not valid")
+ try:
+ result_str = data_import.check(path)
+ except Exception as e:
+ logger.error(e)
raise typer.Exit(1)
- logger.info("Graph is valid")
+ logger.info(result_str)
@app.command(
@@ -147,25 +137,12 @@ def check(
def import_data(
config_file: str = typer.Option(None, "--config", "-c", help="Path of the
GraphAr config file"),
):
- if not Path(config_file).is_file():
- logger.error("File not found: %s", config_file)
- raise typer.Exit(1)
-
try:
- with Path(config_file).open(encoding="utf-8") as file:
- config = yaml.safe_load(file)
- import_config = ImportConfig(**config)
- validate(import_config)
+ result_str = data_import.import_data(config_file)
except Exception as e:
- logger.error("Invalid config: %s", e)
- raise typer.Exit(1) from None
- try:
- logger.info("Starting import")
- res = do_import(import_config.model_dump())
- logger.info(res)
- except Exception as e:
- logger.error("Import failed: %s", e)
- raise typer.Exit(1) from None
+ logger.error(e)
+ raise typer.Exit(1)
+ logger.info(result_str)
def main() -> None:
diff --git a/cli/src/graphar_cli/__init__.py b/python/src/graphar/__init__.py
similarity index 83%
copy from cli/src/graphar_cli/__init__.py
copy to python/src/graphar/__init__.py
index e8091abd..bd438bff 100644
--- a/cli/src/graphar_cli/__init__.py
+++ b/python/src/graphar/__init__.py
@@ -17,6 +17,6 @@
from __future__ import annotations
-from ._core import __doc__, __version__
+from .graph_info import GraphInfo, VertexInfo, EdgeInfo, PropertyGroup,
Property
-__all__ = ["__doc__", "__version__"]
+__all__ = ["GraphInfo", "VertexInfo", "EdgeInfo", "PropertyGroup", "Property"]
diff --git a/cli/test/test_basic.py b/python/src/graphar/graph_info.py
similarity index 86%
copy from cli/test/test_basic.py
copy to python/src/graphar/graph_info.py
index 44afa889..13f8cb1c 100644
--- a/cli/test/test_basic.py
+++ b/python/src/graphar/graph_info.py
@@ -15,10 +15,6 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import annotations
+from .types import Type
-import graphar_cli as m
-
-
-def test_version():
- assert m.__version__ == "0.0.1"
+from ._core import DataType, Property, PropertyGroup, AdjacentList,
VertexInfo, EdgeInfo, GraphInfo
diff --git a/cli/test/test_basic.py b/python/src/graphar/importer/__init__.py
similarity index 86%
copy from cli/test/test_basic.py
copy to python/src/graphar/importer/__init__.py
index 44afa889..13a83393 100644
--- a/cli/test/test_basic.py
+++ b/python/src/graphar/importer/__init__.py
@@ -14,11 +14,3 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
-from __future__ import annotations
-
-import graphar_cli as m
-
-
-def test_version():
- assert m.__version__ == "0.0.1"
diff --git a/cli/src/graphar_cli/config.py
b/python/src/graphar/importer/config.py
similarity index 100%
rename from cli/src/graphar_cli/config.py
rename to python/src/graphar/importer/config.py
diff --git a/python/src/graphar/importer/data_import.py
b/python/src/graphar/importer/data_import.py
new file mode 100644
index 00000000..d0b1e60e
--- /dev/null
+++ b/python/src/graphar/importer/data_import.py
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+
+import yaml
+
+from .._core import (
+ check_edge,
+ check_graph,
+ check_vertex,
+ do_import,
+ get_edge_types,
+ get_vertex_types,
+)
+from .config import ImportConfig
+from .importer import validate
+
+
+def check(path: str):
+ if not Path(path).exists():
+ raise ValueError(f"File not found: {path}")
+ path = Path(path).resolve() if Path(path).is_absolute() else
Path(Path.cwd(), path).resolve()
+ path = str(path)
+ vertex_types = get_vertex_types(path)
+ for vertex_type in vertex_types:
+ if not check_vertex(path, vertex_type):
+ raise ValueError(f"Vertex type {vertex_type} is not valid")
+ edge_types = get_edge_types(path)
+ for edge_type in edge_types:
+ if edge_type[0] not in vertex_types:
+ raise ValueError(f"Source vertex type {edge_type[0]} not found in
the graph")
+ if edge_type[2] not in vertex_types:
+ raise ValueError(f"Destination vertex type {edge_type[2]} not
found in the graph")
+ if not check_edge(path, edge_type[0], edge_type[1], edge_type[2]):
+ raise ValueError(f"Edge type
{edge_type[0]}_{edge_type[1]}_{edge_type[2]} is not valid")
+ if not check_graph(path):
+ raise ValueError("Graph is not valid")
+ return "Graph is valid"
+
+
+def import_data(config_file: str):
+ if not Path(config_file).is_file():
+ raise ValueError(f"File not found: {config_file}")
+
+ try:
+ with Path(config_file).open(encoding="utf-8") as file:
+ config = yaml.safe_load(file)
+ import_config = ImportConfig(**config)
+ validate(import_config)
+ except Exception as e:
+ raise ValueError(f"Invalid config: {e}")
+ try:
+ res = do_import(import_config.model_dump())
+ except Exception as e:
+ raise ValueError(f"Import failed: {e}")
+ return res
diff --git a/cli/src/graphar_cli/importer.py
b/python/src/graphar/importer/importer.py
similarity index 100%
rename from cli/src/graphar_cli/importer.py
rename to python/src/graphar/importer/importer.py
diff --git a/cli/src/graphar_cli/logging.py b/python/src/graphar/logging.py
similarity index 100%
rename from cli/src/graphar_cli/logging.py
rename to python/src/graphar/logging.py
diff --git a/cli/src/graphar_cli/__init__.py b/python/src/graphar/types.py
similarity index 87%
rename from cli/src/graphar_cli/__init__.py
rename to python/src/graphar/types.py
index e8091abd..7a1c1b24 100644
--- a/cli/src/graphar_cli/__init__.py
+++ b/python/src/graphar/types.py
@@ -15,8 +15,4 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import annotations
-
-from ._core import __doc__, __version__
-
-__all__ = ["__doc__", "__version__"]
+from ._core import Type, FileType, AdjListType, Cardinality
diff --git a/cli/test/test_basic.py b/python/test/conftest.py
similarity index 83%
rename from cli/test/test_basic.py
rename to python/test/conftest.py
index 44afa889..9b62460a 100644
--- a/cli/test/test_basic.py
+++ b/python/test/conftest.py
@@ -15,10 +15,11 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import annotations
+import os
+import pytest
-import graphar_cli as m
-
-def test_version():
- assert m.__version__ == "0.0.1"
[email protected]
+def test_data_root():
+ test_data_root = os.environ.get("GAR_TEST_DATA", "../../")
+ return test_data_root
diff --git a/python/test/test_graph_info.py b/python/test/test_graph_info.py
new file mode 100644
index 00000000..095a22ea
--- /dev/null
+++ b/python/test/test_graph_info.py
@@ -0,0 +1,160 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import typer
+
+import graphar as gar
+
+
[email protected]
+def sample_graph(test_data_root):
+ return test_data_root + "/ldbc_sample/csv/" + "ldbc_sample.graph.yml"
+
+
[email protected]
+def sample_graph_info(sample_graph):
+ return gar.graph_info.GraphInfo.load(sample_graph)
+
+
[email protected]
+def sample_graph_vertex(sample_graph_info):
+ return sample_graph_info.get_vertex_info("person")
+
+
[email protected]
+def sample_graph_edge(sample_graph_info):
+ return sample_graph_info.get_edge_info("person", "knows", "person")
+
+
+def test_graph_info_basics(sample_graph_info):
+ """Test basic graph info functionality."""
+ assert sample_graph_info is not None
+ assert sample_graph_info.get_name() == "ldbc_sample"
+
+ # Test vertex and edge info counts
+ assert len(sample_graph_info.get_vertex_infos()) == 1
+ assert sample_graph_info.vertex_info_num() == 1
+ assert len(sample_graph_info.get_edge_infos()) == 1
+ assert sample_graph_info.edge_info_num() == 1
+
+ # Test getting specific vertex and edge info
+ person_vertex_info = sample_graph_info.get_vertex_info("person")
+ assert person_vertex_info is not None
+
+ knows_edge_info = sample_graph_info.get_edge_info("person", "knows",
"person")
+ assert knows_edge_info is not None
+
+ # Test version
+ assert sample_graph_info.version().get_version() == 1
+
+
+def test_person_vertex_info_basics(sample_graph_vertex):
+ """Test person vertex info basics."""
+ assert sample_graph_vertex.get_type() == "person"
+ assert sample_graph_vertex.get_chunk_size() == 100
+ assert sample_graph_vertex.get_prefix() == "vertex/person/"
+ assert sample_graph_vertex.property_group_num() == 2
+ assert sample_graph_vertex.version().get_version() == 1
+
+
+def test_person_vertex_property_groups(sample_graph_vertex):
+ """Test person vertex property groups."""
+ # Test first property group (id)
+ id_property_group = sample_graph_vertex.get_property_group_by_index(0)
+ assert id_property_group is not None
+ assert id_property_group.get_prefix() == "id/"
+ assert id_property_group.get_file_type() == gar.types.FileType.CSV
+
+ # Check id property
+ assert sample_graph_vertex.has_property("id")
+ id_property_type = sample_graph_vertex.get_property_type("id")
+ assert id_property_type.to_type_name() == "int64"
+ assert sample_graph_vertex.is_primary_key("id")
+ assert not sample_graph_vertex.is_nullable_key("id")
+
+ # Test second property group (firstName_lastName_gender)
+ name_property_group = sample_graph_vertex.get_property_group_by_index(1)
+ assert name_property_group is not None
+ assert name_property_group.get_prefix() == "firstName_lastName_gender/"
+ assert name_property_group.get_file_type() == gar.types.FileType.CSV
+
+ # Check name properties
+ assert sample_graph_vertex.has_property("firstName")
+ first_name_type = sample_graph_vertex.get_property_type("firstName")
+ assert first_name_type.to_type_name() == "string"
+
+ assert sample_graph_vertex.has_property("lastName")
+ last_name_type = sample_graph_vertex.get_property_type("lastName")
+ assert last_name_type.to_type_name() == "string"
+
+ assert sample_graph_vertex.has_property("gender")
+ gender_type = sample_graph_vertex.get_property_type("gender")
+ assert gender_type.to_type_name() == "string"
+
+
+def test_knows_edge_info_basics(sample_graph_edge):
+ """Test knows edge info basics."""
+ assert sample_graph_edge.get_edge_type() == "knows"
+ assert sample_graph_edge.get_chunk_size() == 1024
+ assert sample_graph_edge.get_src_type() == "person"
+ assert sample_graph_edge.get_src_chunk_size() == 100
+ assert sample_graph_edge.get_dst_type() == "person"
+ assert sample_graph_edge.get_dst_chunk_size() == 100
+ assert not sample_graph_edge.is_directed()
+ assert sample_graph_edge.get_prefix() == "edge/person_knows_person/"
+ assert sample_graph_edge.version().get_version() == 1
+
+
+def test_knows_edge_adjacency_lists(sample_graph_edge):
+ """Test knows edge adjacency lists."""
+ # Check that edge has both ordered_by_source and ordered_by_dest adjacency
lists
+ assert
sample_graph_edge.has_adjacent_list_type(gar.types.AdjListType.ordered_by_source)
+ assert
sample_graph_edge.has_adjacent_list_type(gar.types.AdjListType.ordered_by_dest)
+
+ # Test ordered_by_source adjacency list
+ adj_by_source =
sample_graph_edge.get_adjacent_list(gar.types.AdjListType.ordered_by_source)
+ assert adj_by_source is not None
+ assert adj_by_source.get_file_type() == gar.types.FileType.CSV
+ assert adj_by_source.get_type() == gar.types.AdjListType.ordered_by_source
+ assert adj_by_source.get_prefix() == "ordered_by_source/"
+
+
+def test_knows_edge_property_groups(sample_graph_edge):
+ """Test knows edge property groups."""
+ assert sample_graph_edge.property_group_num() == 1
+
+ # Test property group
+ property_group = sample_graph_edge.get_property_group_by_index(0)
+ assert property_group is not None
+ assert property_group.get_prefix() == "creationDate/"
+ assert property_group.get_file_type() == gar.types.FileType.CSV
+
+ # Check creationDate property
+ assert sample_graph_edge.has_property("creationDate")
+ creation_date_type = sample_graph_edge.get_property_type("creationDate")
+ assert creation_date_type.to_type_name() == "string"
+ assert not sample_graph_edge.is_primary_key("creationDate")
+ assert sample_graph_edge.is_nullable_key("creationDate")
+
+
+def test_graph_validation(sample_graph_info, sample_graph_vertex,
sample_graph_edge):
+ """Test graph validation."""
+ # Test that the sample graph is validated
+ assert sample_graph_info.is_validated()
+ assert sample_graph_vertex.is_validated()
+ assert sample_graph_edge.is_validated()
diff --git a/python/test/test_graphar_cli.py b/python/test/test_graphar_cli.py
new file mode 100644
index 00000000..028ee842
--- /dev/null
+++ b/python/test/test_graphar_cli.py
@@ -0,0 +1,80 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import typer
+from pathlib import Path
+
+from cli import graphar_cli
+
+
[email protected]
+def sample_cfg():
+ return (
+ Path(__file__).parent
+ / "../"
+ / ".."
+ / "testing"
+ / "ldbc_sample"
+ / "parquet"
+ / "ldbc_sample.graph.yml"
+ ).resolve()
+
+
+def test_show_file_not_found(tmp_path):
+ """Test show command with a non-existent file path."""
+ # path that does not exist
+ missing = tmp_path / "nope.yaml"
+ with pytest.raises(typer.Exit):
+ graphar_cli.show(path=str(missing))
+
+
+def test_show_edge_not_all_set(sample_cfg):
+ """Test show command with incomplete edge parameters."""
+ cfg = sample_cfg
+ # only provide edge_src, missing others
+ with pytest.raises(typer.Exit):
+ graphar_cli.show(path=str(cfg), edge_src="s")
+
+
+def test_show_graph_default(sample_cfg):
+ """Test show command with default parameters (show entire graph)."""
+ cfg = sample_cfg
+ # This should run without throwing exceptions
+ try:
+ graphar_cli.show(path=str(cfg))
+ except typer.Exit:
+ # typer.Exit is expected when the command completes successfully
+ pass
+
+
+def test_check_success(sample_cfg):
+ """Test check command with a valid graph configuration."""
+ cfg = sample_cfg
+ # This should run without throwing exceptions
+ try:
+ graphar_cli.check(path=str(cfg))
+ except typer.Exit:
+ # typer.Exit is expected when the command completes successfully
+ pass
+
+
+def test_import_data_exception():
+ """Test import_data command with a non-existent config file."""
+ # Using a non-existent config file should raise an exception
+ with pytest.raises(typer.Exit):
+ graphar_cli.import_data(config_file="non_existent_config.yaml")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]