This is an automated email from the ASF dual-hosted git repository.

xiaokang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new 03b8a322 feat(python): refactor python module(from cli module) (#799)
03b8a322 is described below

commit 03b8a322736dfc56877d0b3c6225c015446f64b3
Author: Xiaokang Yang <[email protected]>
AuthorDate: Thu Nov 6 16:56:42 2025 +0800

    feat(python): refactor python module(from cli module) (#799)
    
    * init python library
    
    * break out the API
    
    * rename cli to python
    
    * try to fix
    
    * try to fix
    
    * architecture build
    
    * bind graphInfo
    
    * add version info and graphInfo test
    
    * update workflow yaml
    
    * try run on macos
    
    * try to use python 3.13
    
    * cancel import test in macos
    
    * architecture build
    
    * foramt
    
    * foramt
    
    * format
    
    * build
    
    * add version command
    
    * format
    
    * update workflow yaml
    
    * add license header
    
    * add python component
    
    * rename client to cli
---
 .github/ISSUE_TEMPLATE/feature_request.yml         |   1 +
 .github/workflows/{cli.yml => python.yml}          |  69 ++--
 .pre-commit-config.yaml                            |  20 ++
 cli/test/merge.py                                  | 103 ------
 {cli => python}/.clang-format                      |   0
 {cli => python}/.gitignore                         |   0
 {cli => python}/CMakeLists.txt                     |  23 +-
 python/README.md                                   |  94 +++++
 {cli => python}/pyproject.toml                     |  31 +-
 python/src/bindings/_core_module.cc                |  33 ++
 .../main.cc => python/src/bindings/cli_binding.cc  |   8 +-
 python/src/bindings/graph_info_binding.cc          | 400 +++++++++++++++++++++
 {cli/src => python/src/bindings}/importer.h        |   2 +-
 python/src/bindings/types_binding.cc               |  68 ++++
 .../src/bindings/utils/import_util.h               |   0
 python/src/bindings/utils/pybind_util.h            |  41 +++
 {cli => python/src/cli}/README.md                  |   6 +-
 .../src/graphar_cli => python/src/cli}/__init__.py |  12 +-
 .../graphar_cli => python/src/cli}/graphar_cli.py  |  85 ++---
 .../graphar_cli => python/src/graphar}/__init__.py |   4 +-
 .../src/graphar/graph_info.py                      |   8 +-
 .../src/graphar/importer/__init__.py               |   8 -
 .../src/graphar/importer}/config.py                |   0
 python/src/graphar/importer/data_import.py         |  71 ++++
 .../src/graphar/importer}/importer.py              |   0
 .../graphar_cli => python/src/graphar}/logging.py  |   0
 .../__init__.py => python/src/graphar/types.py     |   6 +-
 cli/test/test_basic.py => python/test/conftest.py  |  11 +-
 python/test/test_graph_info.py                     | 160 +++++++++
 python/test/test_graphar_cli.py                    |  80 +++++
 30 files changed, 1116 insertions(+), 228 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml 
b/.github/ISSUE_TEMPLATE/feature_request.yml
index 891902dc..59c888a9 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -41,6 +41,7 @@ body:
         - Java
         - Spark
         - PySpark
+        - Python
         - Continuous Integration
         - Developer Tools
         - Documentation
diff --git a/.github/workflows/cli.yml b/.github/workflows/python.yml
similarity index 71%
rename from .github/workflows/cli.yml
rename to .github/workflows/python.yml
index a2670767..05c213d1 100644
--- a/.github/workflows/cli.yml
+++ b/.github/workflows/python.yml
@@ -15,36 +15,38 @@
 # specific language governing permissions and limitations
 # under the License.
 
-name: GraphAr CLI CI
+name: GraphAr Python CI
 
 on:
   # Trigger the workflow on push or pull request,
   # but only for the main branch
   push:
     branches:
-      - main
+      - "main"
     paths:
       - 'cpp/**'
-      - 'cli/**'
+      - 'python/**'
       - '.github/workflows/ci.yml'
-      - '.github/workflows/cli.yml'
+      - '.github/workflows/python.yml'
   pull_request:
     branches:
-      - main
+      - "main"
     paths:
       - 'cpp/**'
-      - 'cli/**'
+      - 'python/**'
       - '.github/workflows/ci.yml'
-      - '.github/workflows/cli.yml'
+      - '.github/workflows/python.yml'
 concurrency:
   group: ${{ github.repository }}-${{ github.event.number || github.head_ref 
|| github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
 jobs:
   ubuntu:
-    name: Ubuntu 22.04 CLI
+    name: Ubuntu 22.04 Python
     runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') && 
!github.event.pull_request.draft }}
+    env:
+      GAR_TEST_DATA: ${{ github.workspace }}/graphar-testing/
+
     steps:
     - uses: actions/checkout@v3
       with:
@@ -63,17 +65,25 @@ jobs:
                             libarrow-acero-dev=17.0.0-1 \
                             libparquet-dev=17.0.0-1
         sudo apt-get install -y ccache libcurl4-openssl-dev
+        git clone https://github.com/apache/incubator-graphar-testing.git 
$GAR_TEST_DATA --depth 1
 
-    - name: Install GraphAr CLI and Run Tests
-      working-directory: "cli"
+    - name: Install GraphAr Python SDK
+      working-directory: "python"
       run: |
         pip install ./ -v
+    - name: Run Cli Test
+      working-directory: "python"
+      run: |
         graphar --help
         graphar check -p ../testing/neo4j/MovieGraph.graph.yml
         graphar show -p ../testing/neo4j/MovieGraph.graph.yml -v Person
         graphar show -p ../testing/neo4j/MovieGraph.graph.yml -es Person -e 
ACTED_IN -ed Movie
         graphar import -c ../testing/neo4j/data/import.mini.yml
-# TODO: Add unit tests
+
+    - name: Run pytest
+      working-directory: "python"
+      run: |
+        pytest test -v
 
 
     - name: Upload coverage reports to Codecov
@@ -82,31 +92,44 @@ jobs:
         token: ${{ secrets.CODECOV_TOKEN }}
 
   macos:
-    name: macos latest CLI
+    name: macos latest Python
     runs-on: macos-latest
-    # TODO: Remove this when the macos issue is fixed
-    if: false   
-    strategy:
-      fail-fast: false
+    env:
+      GAR_TEST_DATA: ${{ github.workspace }}/graphar-testing/
 
     steps:
     - uses: actions/checkout@v3
       with:
           submodules: true
 
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.13'
+
     - name: Install dependencies
       run: |
         brew bundle --file=cpp/Brewfile
-        
-    
-    - name: Build GraphAr And Run Tests
-      working-directory: "cli"
+
+        git clone https://github.com/apache/incubator-graphar-testing.git 
$GAR_TEST_DATA --depth 1
+
+    - name: Install GraphAr Python SDK
+      working-directory: "python"
+      run: |
+        pip install ./ -v
+    - name: Run Cli Test
+      working-directory: "python"
       run: |
-        pip install ./
         graphar --help
+        graphar --version
         graphar check -p ../testing/neo4j/MovieGraph.graph.yml
         graphar show -p ../testing/neo4j/MovieGraph.graph.yml -v Person
         graphar show -p ../testing/neo4j/MovieGraph.graph.yml -es Person -e 
ACTED_IN -ed Movie
-        graphar import -c ../testing/neo4j/data/import.mini.yml
+
+    - name: Run pytest
+      working-directory: "python"
+      run: |
+        pytest test -v
+
       
 # TODO: Add unit tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1a69b525..a5767647 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,26 @@ repos:
       - id: gitleaks
         args:
         - '--verbose'
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.0
+    hooks:
+    - id: ruff-check
+      files: ^python/
+      args: [--output-format, github, --fix]
+    - id: ruff-format
+  - repo: https://github.com/crate-ci/typos
+    rev: v1.38.1
+    hooks:
+    - id: typos
+      files: ^python/
+      args: [--force-exclude]
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v21.1.2
+    hooks:
+    - id: clang-format
+      files: ^python/
+      types_or: [c++]
+      args: [--style=file, --verbose]
 
   - repo: local
     hooks:
diff --git a/cli/test/merge.py b/cli/test/merge.py
deleted file mode 100644
index 53d7a704..00000000
--- a/cli/test/merge.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from enum import Enum
-from pathlib import Path
-from typing import List, Optional
-
-import pandas as pd
-import typer
-from typing_extensions import Annotated
-
-app = typer.Typer(no_args_is_help=True, context_settings={"help_option_names": 
["-h", "--help"]})
-
-
-support_file_types = {"parquet", "orc", "csv", "json"}
-
-
-class FileType(str, Enum):
-    parquet = "parquet"
-    csv = "csv"
-    orc = "orc"
-    json = "json"
-
-
[email protected](
-    "merge",
-    context_settings={"help_option_names": ["-h", "--help"]},
-    help="Merge source files",
-    no_args_is_help=True,
-)
-def merge_data(
-    files: Annotated[
-        List[str], typer.Option("--file", "-f", help="Files to merge", 
show_default=False)
-    ],
-    output_file: Annotated[
-        str, typer.Option("--output", "-o", help="Output file", 
show_default=False)
-    ],
-    type: Annotated[
-        Optional[FileType], typer.Option("--type", "-t", help="Type of data to 
output", show_default=False)
-    ] = None,
-):
-    if not files:
-        typer.echo("No files to merge")
-        raise typer.Exit(1)
-    if not output_file:
-        typer.echo("No output file")
-        raise typer.Exit(1)
-    data = []
-    for file in files:
-        path = Path(file)
-        if not path.is_file():
-            typer.echo(f"File {file} not found")
-            raise typer.Exit(1)
-        file_type = path.suffix.removeprefix(".")
-        if file_type == "":
-            typer.echo(f"File {file} has no file type suffix")
-            raise typer.Exit(1)
-        if file_type not in support_file_types:
-            typer.echo(f"File type {file_type} not supported")
-            raise typer.Exit(1)
-        if file_type == "parquet":
-            data.append(pd.read_parquet(file))
-        elif file_type == "csv":
-            data.append(pd.read_csv(file))
-        elif file_type == "orc":
-            data.append(pd.read_orc(file))
-        elif file_type == "json":
-            data.append(pd.read_json(file))
-    output_path = Path(output_file)
-    if output_path.is_file():
-        typer.echo(f"Output file {output_file} already exists")
-        if not typer.prompt("Do you want to overwrite it?", default=False):
-            raise typer.Exit(1)
-    if not type:
-        type = output_path.suffix.removeprefix(".")
-    result = pd.concat(data, ignore_index=True)
-    if type == "parquet":
-        result.to_parquet(output_file)
-    elif type == "csv":
-        result.to_csv(output_file)
-    elif type == "orc":
-        result.to_orc(output_file)
-    elif type == "json":
-        result.to_json(output_file, orient="records", lines=True)
-    typer.echo(f"Data merged to {output_file}")
-
-
-if __name__ == "__main__":
-    app()
diff --git a/cli/.clang-format b/python/.clang-format
similarity index 100%
rename from cli/.clang-format
rename to python/.clang-format
diff --git a/cli/.gitignore b/python/.gitignore
similarity index 100%
rename from cli/.gitignore
rename to python/.gitignore
diff --git a/cli/CMakeLists.txt b/python/CMakeLists.txt
similarity index 75%
rename from cli/CMakeLists.txt
rename to python/CMakeLists.txt
index 21caa61f..5bb7ccd5 100644
--- a/cli/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -45,8 +45,11 @@ else()
 endif()
 # Add a library using FindPython's tooling (pybind11 also provides a helper 
like
 # this)
-python_add_library(_core MODULE src/main.cc WITH_SOABI)
-
+python_add_library(_core MODULE src/bindings/_core_module.cc
+                                   src/bindings/cli_binding.cc
+                                   src/bindings/graph_info_binding.cc
+                                   src/bindings/types_binding.cc WITH_SOABI)
+                                   
 target_link_libraries(_core PRIVATE pybind11::headers graphar 
Arrow::arrow_shared
                                     Parquet::parquet_shared
                                     ArrowDataset::arrow_dataset_shared
@@ -60,6 +63,16 @@ target_include_directories(_core PRIVATE 
${CMAKE_CURRENT_SOURCE_DIR}/../cpp/thir
 target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION})
 
 # The install directory is the output (wheel) directory
-set_target_properties(_core PROPERTIES INSTALL_RPATH "$ORIGIN")
-install(TARGETS graphar DESTINATION graphar_cli)
-install(TARGETS _core DESTINATION graphar_cli)
+# Use platform-appropriate rpath so the Python extension can find the
+# packaged libgraphar at runtime inside the wheel.
+if(APPLE)
+  # macOS uses @loader_path to find libraries relative to the module
+  set_target_properties(_core PROPERTIES INSTALL_RPATH "@loader_path")
+else()
+  # On Linux and other Unix, use $ORIGIN (escaped so CMake preserves the $)
+  set_target_properties(_core PROPERTIES INSTALL_RPATH "\$ORIGIN")
+endif()
+
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/cli/ DESTINATION cli)
+install(TARGETS graphar DESTINATION graphar)
+install(TARGETS _core DESTINATION graphar)
\ No newline at end of file
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 00000000..78fecbca
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,94 @@
+# GraphAr Python SDK
+
+GraphAr Python SDK provides Python bindings for the GraphAr C++ library, 
allowing user to work with GraphAr formatted graph data in Python environments. 
It includes both a high-level API for data manipulation and a command-line 
interface for common operations.
+
+## Installation
+
+### Prerequisites
+
+- Python >= 3.7
+- pip (latest version recommended)
+- CMake >= 3.15 (for building from source)
+- Apache Arrow >= 12.0 (for building from source)
+
+### Install from Source
+
+Clone the repository and install the Python package:
+
+```bash
+git clone https://github.com/apache/incubator-graphar.git
+cd incubator-graphar
+pip install ./python
+```
+
+For verbose output during installation:
+
+```bash
+pip install -v ./python
+```
+
+### Using Docker (Recommended)
+
+The easiest way to get started is by using our pre-configured Docker 
environment:
+
+```bash
+docker run -it ghcr.io/apache/graphar-dev
+```
+
+## Quick Start
+
+### Importing the Package
+
+After installation, you can import the GraphAr Python SDK in your Python 
scripts:
+
+```python
+import graphar
+```
+
+### Basic Usage
+
+Loading graph information:
+
+```python
+import graphar
+
+# Load graph info from a YAML file
+graph_info = graphar.graph_info.GraphInfo.load("path/to/graph.yaml")
+
+# Access vertex information
+vertex_info = graph_info.get_vertex_info("person")
+print(f"Vertex type: {vertex_info.get_type()}")
+
+# Access edge information
+edge_info = graph_info.get_edge_info("person", "knows", "person")
+print(f"Edge type: {edge_info.get_edge_type()}")
+```
+
+## Command-Line Interface
+
+GraphAr Python SDK also provides a command-line interface for common 
operations such as checking metadata, showing graph information, and importing 
data.
+
+For detailed information about the CLI functionality, please see [CLI 
Documentation](src/cli/README.md).
+
+## API Documentation
+
+The Python SDK exposes the core GraphAr functionality through several modules:
+
+- `graphar.graph_info`: Main API for working with graph, vertex, and edge 
information
+
+## Examples
+> [!NOTE]
+> under development.
+
+You can find various examples in the [examples directory](../cpp/examples/) 
which demonstrate usage of the underlying C++ library. These concepts translate 
directly to the Python SDK.
+
+## Development
+
+To contribute to the Python SDK, please follow the guidelines in the main 
[CONTRIBUTING.md](../CONTRIBUTING.md) file.
+
+## License
+
+**GraphAr** is distributed under [Apache License
+2.0](https://github.com/apache/incubator-graphar/blob/main/LICENSE).
+Please note that third-party libraries may not have the same license as
+GraphAr.
diff --git a/cli/pyproject.toml b/python/pyproject.toml
similarity index 55%
rename from cli/pyproject.toml
rename to python/pyproject.toml
index 636a90b7..fb8b3f38 100644
--- a/cli/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,27 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 [build-system]
 requires = ["scikit-build-core>=0.3.3", "pybind11", "ninja ~= 1.11"]
 build-backend = "scikit_build_core.build"
 
 
 [project]
-name = "graphar_cli"
-version = "0.0.1"
+name = "graphar"
+version = "0.13.0"
 description = "GraphAr command line tool"
 readme = "README.md"
 authors = [{ name = "GraphAr community", email = "[email protected]" }]
 requires-python = ">=3.7"
-dependencies = ["typer ~= 0.1", "pydantic ~= 2.0, < 2.7", "pyyaml ~= 6.0"]
+dependencies = ["typer ~= 0.1", "pydantic ~= 2.0, < 2.12", "pyyaml ~= 6.0", 
"pytest ~= 7.2"]
+
 
 [project.optional-dependencies]
 test = ["pandas ~= 2.0", "typing_extensions ~= 4.0"]
 
-[project.scripts]
-graphar = "graphar_cli.graphar_cli:main"
-
-
 [tool.scikit-build]
 build-dir = "build"
 
+[project.scripts]
+graphar = "cli.graphar_cli:main"
+
 [tool.ruff]
 src = ["src"]
 line-length = 100
diff --git a/python/src/bindings/_core_module.cc 
b/python/src/bindings/_core_module.cc
new file mode 100644
index 00000000..50452e24
--- /dev/null
+++ b/python/src/bindings/_core_module.cc
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <pybind11/pybind11.h>
+
+// Declare binding functions
+extern "C" void bind_graph_info(pybind11::module_& m);
+extern "C" void bind_types(pybind11::module_& m);
+extern "C" void bind_cli(pybind11::module_& m);
+
+PYBIND11_MODULE(_core, m) {
+  m.doc() = "GraphAr core Python bindings";
+
+  bind_types(m);
+  bind_graph_info(m);
+  bind_cli(m);
+}
\ No newline at end of file
diff --git a/cli/src/main.cc b/python/src/bindings/cli_binding.cc
similarity index 97%
rename from cli/src/main.cc
rename to python/src/bindings/cli_binding.cc
index 4a0b0346..0fbc9957 100644
--- a/cli/src/main.cc
+++ b/python/src/bindings/cli_binding.cc
@@ -152,8 +152,10 @@ std::vector<std::vector<std::string>> GetEdgeTypes(const 
std::string& path) {
 }
 
 namespace py = pybind11;
-PYBIND11_MODULE(_core, m) {
-  m.doc() = "GraphAr Python bindings";
+
+// Changed from PYBIND11_MODULE to a regular function
+extern "C" void bind_cli(pybind11::module_& m) {
+  // CLI-level convenience functions
   m.def("show_graph", &ShowGraph, "Show the graph info");
   m.def("show_vertex", &ShowVertex, "Show the vertex info");
   m.def("show_edge", &ShowEdge, "Show the edge info");
@@ -170,4 +172,4 @@ PYBIND11_MODULE(_core, m) {
 #else
   m.attr("__version__") = "dev";
 #endif
-}
+}
\ No newline at end of file
diff --git a/python/src/bindings/graph_info_binding.cc 
b/python/src/bindings/graph_info_binding.cc
new file mode 100644
index 00000000..e4475b8f
--- /dev/null
+++ b/python/src/bindings/graph_info_binding.cc
@@ -0,0 +1,400 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+#include "utils/pybind_util.h"
+
+#include "graphar/graph_info.h"
+#include "graphar/types.h"
+#include "graphar/version_parser.h"
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace py = pybind11;
+
+// Changed from PYBIND11_MODULE to a regular function
+extern "C" void bind_graph_info(pybind11::module_& m) {
+  // Minimal binding for DataType so pybind11 recognizes
+  // std::shared_ptr<graphar::DataType> used in Property constructor defaults.
+  py::class_<graphar::DataType, std::shared_ptr<graphar::DataType>>(m,
+                                                                    "DataType")
+      .def(py::init<>())
+      .def(py::init<graphar::Type>())
+      .def("id", &graphar::DataType::id)
+      .def("to_type_name", &graphar::DataType::ToTypeName);
+
+  // Bind InfoVersion
+  py::class_<graphar::InfoVersion, std::shared_ptr<graphar::InfoVersion>>(
+      m, "InfoVersion")
+      .def(py::init<>())
+      .def(py::init<int>(), py::arg("version"))
+      .def(py::init<int, const std::vector<std::string>&>(), 
py::arg("version"),
+           py::arg("user_define_types"))
+      .def("get_version", &graphar::InfoVersion::version)
+      .def("get_user_define_types", &graphar::InfoVersion::user_define_types,
+           py::return_value_policy::reference_internal)
+      .def("to_string", &graphar::InfoVersion::ToString)
+      .def("check_type", &graphar::InfoVersion::CheckType)
+      .def_static("parse",
+                  [](const std::string& str) {
+                    return ThrowOrReturn(graphar::InfoVersion::Parse(str));
+                  })
+      .def("__eq__",
+           [](const graphar::InfoVersion& self,
+              const graphar::InfoVersion& other) { return self == other; });
+
+  // Bind Property
+  py::class_<graphar::Property>(m, "Property")
+      .def(py::init<>())
+      .def(py::init<const std::string&,
+                    const std::shared_ptr<graphar::DataType>&, bool, bool,
+                    graphar::Cardinality>(),
+           py::arg("name"), py::arg("type") = nullptr,
+           py::arg("is_primary") = false, py::arg("is_nullable") = true,
+           py::arg("cardinality") = graphar::Cardinality::SINGLE)
+      .def_readwrite("name", &graphar::Property::name)
+      .def_readwrite("type", &graphar::Property::type)
+      .def_readwrite("is_primary", &graphar::Property::is_primary)
+      .def_readwrite("is_nullable", &graphar::Property::is_nullable)
+      .def_readwrite("cardinality", &graphar::Property::cardinality);
+
+  // Bind PropertyGroup
+  py::class_<graphar::PropertyGroup, std::shared_ptr<graphar::PropertyGroup>>(
+      m, "PropertyGroup")
+      .def(py::init<const std::vector<graphar::Property>&, graphar::FileType,
+                    const std::string&>(),
+           py::arg("properties"), py::arg("file_type"), py::arg("prefix") = "")
+      .def("get_properties", &graphar::PropertyGroup::GetProperties,
+           py::return_value_policy::reference_internal)
+      .def("has_property", &graphar::PropertyGroup::HasProperty)
+      .def("get_file_type", &graphar::PropertyGroup::GetFileType)
+      .def("get_prefix", &graphar::PropertyGroup::GetPrefix)
+      .def("is_validated", &graphar::PropertyGroup::IsValidated);
+
+  // Bind AdjacentList
+  py::class_<graphar::AdjacentList, std::shared_ptr<graphar::AdjacentList>>(
+      m, "AdjacentList")
+      .def(py::init<graphar::AdjListType, graphar::FileType,
+                    const std::string&>(),
+           py::arg("type"), py::arg("file_type"), py::arg("prefix") = "")
+      .def("get_type", &graphar::AdjacentList::GetType)
+      .def("get_file_type", &graphar::AdjacentList::GetFileType)
+      .def("get_prefix", &graphar::AdjacentList::GetPrefix)
+      .def("is_validated", &graphar::AdjacentList::IsValidated);
+
+  // Bind VertexInfo
+  py::class_<graphar::VertexInfo, std::shared_ptr<graphar::VertexInfo>>(
+      m, "VertexInfo")
+      .def(py::init<const std::string&, graphar::IdType,
+                    const 
std::vector<std::shared_ptr<graphar::PropertyGroup>>&,
+                    const std::vector<std::string>&, const std::string&,
+                    std::shared_ptr<const graphar::InfoVersion>>(),
+           py::arg("type"), py::arg("chunk_size"), py::arg("property_groups"),
+           py::arg("labels") = std::vector<std::string>(),
+           py::arg("prefix") = "", py::arg("version") = nullptr)
+      .def("add_property_group",
+           [](const graphar::VertexInfo& self,
+              std::shared_ptr<graphar::PropertyGroup> property_group) {
+             return ThrowOrReturn(self.AddPropertyGroup(property_group));
+           })
+      .def("remove_property_group",
+           [](const graphar::VertexInfo& self,
+              std::shared_ptr<graphar::PropertyGroup> property_group) {
+             return ThrowOrReturn(self.RemovePropertyGroup(property_group));
+           })
+      .def("get_type", &graphar::VertexInfo::GetType,
+           py::return_value_policy::reference_internal)
+      .def("get_chunk_size", &graphar::VertexInfo::GetChunkSize)
+      .def("get_prefix", &graphar::VertexInfo::GetPrefix,
+           py::return_value_policy::reference_internal)
+      .def("version", &graphar::VertexInfo::version)
+      .def("get_labels", &graphar::VertexInfo::GetLabels,
+           py::return_value_policy::reference_internal)
+      .def("property_group_num", &graphar::VertexInfo::PropertyGroupNum)
+      .def("get_property_groups", &graphar::VertexInfo::GetPropertyGroups,
+           py::return_value_policy::reference_internal)
+      .def("get_property_group",
+           [](const graphar::VertexInfo& self,
+              const std::string& property_name) {
+             return self.GetPropertyGroup(property_name);
+           })
+      .def("get_property_group_by_index",
+           [](const graphar::VertexInfo& self, int index) {
+             return self.GetPropertyGroupByIndex(index);
+           })
+      .def("get_property_type",
+           [](const graphar::VertexInfo& self,
+              const std::string& property_name) {
+             return ThrowOrReturn(self.GetPropertyType(property_name));
+           })
+      .def("get_property_cardinality",
+           [](const graphar::VertexInfo& self,
+              const std::string& property_name) {
+             return ThrowOrReturn(self.GetPropertyCardinality(property_name));
+           })
+      .def("has_property", &graphar::VertexInfo::HasProperty)
+      .def("save",
+           [](const graphar::VertexInfo& self, const std::string& file_name) {
+             CheckStatus(self.Save(file_name));
+           })
+      .def("dump",
+           [](const graphar::VertexInfo& self) {
+             return ThrowOrReturn(self.Dump());
+           })
+      .def("is_primary_key", &graphar::VertexInfo::IsPrimaryKey)
+      .def("is_nullable_key", &graphar::VertexInfo::IsNullableKey)
+      .def("has_property_group", &graphar::VertexInfo::HasPropertyGroup)
+      .def(
+          "get_file_path",
+          [](const graphar::VertexInfo& self,
+             std::shared_ptr<graphar::PropertyGroup> property_group,
+             graphar::IdType chunk_index) {
+            return ThrowOrReturn(self.GetFilePath(property_group, 
chunk_index));
+          })
+      .def("get_path_prefix",
+           [](const graphar::VertexInfo& self,
+              std::shared_ptr<graphar::PropertyGroup> property_group) {
+             return ThrowOrReturn(self.GetPathPrefix(property_group));
+           })
+      .def("get_vertices_num_file_path",
+           [](const graphar::VertexInfo& self) {
+             return ThrowOrReturn(self.GetVerticesNumFilePath());
+           })
+      .def("is_validated", &graphar::VertexInfo::IsValidated);
+
+  // Bind EdgeInfo
+  py::class_<graphar::EdgeInfo, std::shared_ptr<graphar::EdgeInfo>>(m,
+                                                                    "EdgeInfo")
+      .def(py::init<const std::string&, const std::string&, const std::string&,
+                    graphar::IdType, graphar::IdType, graphar::IdType, bool,
+                    const std::vector<std::shared_ptr<graphar::AdjacentList>>&,
+                    const 
std::vector<std::shared_ptr<graphar::PropertyGroup>>&,
+                    const std::string&,
+                    std::shared_ptr<const graphar::InfoVersion>>(),
+           py::arg("src_type"), py::arg("edge_type"), py::arg("dst_type"),
+           py::arg("chunk_size"), py::arg("src_chunk_size"),
+           py::arg("dst_chunk_size"), py::arg("directed"),
+           py::arg("adjacent_lists"), py::arg("property_groups"),
+           py::arg("prefix") = "", py::arg("version") = nullptr)
+      .def("add_adjacent_list",
+           [](const graphar::EdgeInfo& self,
+              std::shared_ptr<graphar::AdjacentList> adj_list) {
+             return ThrowOrReturn(self.AddAdjacentList(adj_list));
+           })
+      .def("remove_adjacent_list",
+           [](const graphar::EdgeInfo& self,
+              std::shared_ptr<graphar::AdjacentList> adj_list) {
+             return ThrowOrReturn(self.RemoveAdjacentList(adj_list));
+           })
+      .def("add_property_group",
+           [](const graphar::EdgeInfo& self,
+              std::shared_ptr<graphar::PropertyGroup> property_group) {
+             return ThrowOrReturn(self.AddPropertyGroup(property_group));
+           })
+      .def("remove_property_group",
+           [](const graphar::EdgeInfo& self,
+              std::shared_ptr<graphar::PropertyGroup> property_group) {
+             return ThrowOrReturn(self.RemovePropertyGroup(property_group));
+           })
+      .def("get_src_type", &graphar::EdgeInfo::GetSrcType,
+           py::return_value_policy::reference_internal)
+      .def("get_edge_type", &graphar::EdgeInfo::GetEdgeType,
+           py::return_value_policy::reference_internal)
+      .def("get_dst_type", &graphar::EdgeInfo::GetDstType,
+           py::return_value_policy::reference_internal)
+      .def("get_chunk_size", &graphar::EdgeInfo::GetChunkSize)
+      .def("get_src_chunk_size", &graphar::EdgeInfo::GetSrcChunkSize)
+      .def("get_dst_chunk_size", &graphar::EdgeInfo::GetDstChunkSize)
+      .def("get_prefix", &graphar::EdgeInfo::GetPrefix,
+           py::return_value_policy::reference_internal)
+      .def("is_directed", &graphar::EdgeInfo::IsDirected)
+      .def("version", &graphar::EdgeInfo::version)
+      .def("has_adjacent_list_type", &graphar::EdgeInfo::HasAdjacentListType)
+      .def("has_property", &graphar::EdgeInfo::HasProperty)
+      .def("has_property_group", &graphar::EdgeInfo::HasPropertyGroup)
+      .def("get_adjacent_list", &graphar::EdgeInfo::GetAdjacentList)
+      .def("property_group_num", &graphar::EdgeInfo::PropertyGroupNum)
+      .def("get_property_groups", &graphar::EdgeInfo::GetPropertyGroups,
+           py::return_value_policy::reference_internal)
+      .def("get_property_group",
+           [](const graphar::EdgeInfo& self, const std::string& property) {
+             return self.GetPropertyGroup(property);
+           })
+      .def("get_property_group_by_index",
+           [](const graphar::EdgeInfo& self, int index) {
+             return self.GetPropertyGroupByIndex(index);
+           })
+      .def("get_vertices_num_file_path",
+           [](const graphar::EdgeInfo& self,
+              graphar::AdjListType adj_list_type) {
+             return ThrowOrReturn(self.GetVerticesNumFilePath(adj_list_type));
+           })
+      .def("get_edges_num_file_path",
+           [](const graphar::EdgeInfo& self, graphar::IdType 
vertex_chunk_index,
+              graphar::AdjListType adj_list_type) {
+             return ThrowOrReturn(
+                 self.GetEdgesNumFilePath(vertex_chunk_index, adj_list_type));
+           })
+      .def("get_adj_list_file_path",
+           [](const graphar::EdgeInfo& self, graphar::IdType 
vertex_chunk_index,
+              graphar::IdType edge_chunk_index,
+              graphar::AdjListType adj_list_type) {
+             return ThrowOrReturn(self.GetAdjListFilePath(
+                 vertex_chunk_index, edge_chunk_index, adj_list_type));
+           })
+      .def("get_adj_list_path_prefix",
+           [](const graphar::EdgeInfo& self,
+              graphar::AdjListType adj_list_type) {
+             return ThrowOrReturn(self.GetAdjListPathPrefix(adj_list_type));
+           })
+      .def("get_adj_list_offset_file_path",
+           [](const graphar::EdgeInfo& self, graphar::IdType 
vertex_chunk_index,
+              graphar::AdjListType adj_list_type) {
+             return ThrowOrReturn(self.GetAdjListOffsetFilePath(
+                 vertex_chunk_index, adj_list_type));
+           })
+      .def("get_offset_path_prefix",
+           [](const graphar::EdgeInfo& self,
+              graphar::AdjListType adj_list_type) {
+             return ThrowOrReturn(self.GetOffsetPathPrefix(adj_list_type));
+           })
+      .def("get_property_file_path",
+           [](const graphar::EdgeInfo& self,
+              const std::shared_ptr<graphar::PropertyGroup>& property_group,
+              graphar::AdjListType adj_list_type,
+              graphar::IdType vertex_chunk_index,
+              graphar::IdType edge_chunk_index) {
+             return ThrowOrReturn(self.GetPropertyFilePath(
+                 property_group, adj_list_type, vertex_chunk_index,
+                 edge_chunk_index));
+           })
+      .def("get_property_group_path_prefix",
+           [](const graphar::EdgeInfo& self,
+              const std::shared_ptr<graphar::PropertyGroup>& property_group,
+              graphar::AdjListType adj_list_type) {
+             return ThrowOrReturn(self.GetPropertyGroupPathPrefix(
+                 property_group, adj_list_type));
+           })
+      .def("get_property_type",
+           [](const graphar::EdgeInfo& self, const std::string& property_name) 
{
+             return ThrowOrReturn(self.GetPropertyType(property_name));
+           })
+      .def("is_primary_key", &graphar::EdgeInfo::IsPrimaryKey)
+      .def("is_nullable_key", &graphar::EdgeInfo::IsNullableKey)
+      .def("save",
+           [](const graphar::EdgeInfo& self, const std::string& file_name) {
+             CheckStatus(self.Save(file_name));
+           })
+      .def("dump",
+           [](const graphar::EdgeInfo& self) {
+             return ThrowOrReturn(self.Dump());
+           })
+      .def("is_validated", &graphar::EdgeInfo::IsValidated);
+
+  // Bind GraphInfo
+  py::class_<graphar::GraphInfo, std::shared_ptr<graphar::GraphInfo>>(
+      m, "GraphInfo")
+      .def(py::init<const std::string&,
+                    const std::vector<std::shared_ptr<graphar::VertexInfo>>&,
+                    const std::vector<std::shared_ptr<graphar::EdgeInfo>>&,
+                    const std::vector<std::string>&, const std::string&,
+                    std::shared_ptr<const graphar::InfoVersion>,
+                    const std::unordered_map<std::string, std::string>&>(),
+           py::arg("graph_name"), py::arg("vertex_infos"),
+           py::arg("edge_infos"),
+           py::arg("labels") = std::vector<std::string>(),
+           py::arg("prefix") = "./", py::arg("version") = nullptr,
+           py::arg("extra_info") =
+               std::unordered_map<std::string, std::string>())
+      .def_static("load",
+                  [](const std::string& path) {
+                    return ThrowOrReturn(graphar::GraphInfo::Load(path));
+                  })
+      .def_static(
+          "load",
+          [](const std::string& input, const std::string& relative_path) {
+            return ThrowOrReturn(
+                graphar::GraphInfo::Load(input, relative_path));
+          })
+      .def("add_vertex",
+           [](const graphar::GraphInfo& self,
+              std::shared_ptr<graphar::VertexInfo> vertex_info) {
+             return ThrowOrReturn(self.AddVertex(vertex_info));
+           })
+      .def("remove_vertex",
+           [](const graphar::GraphInfo& self,
+              std::shared_ptr<graphar::VertexInfo> vertex_info) {
+             return ThrowOrReturn(self.RemoveVertex(vertex_info));
+           })
+      .def("add_edge",
+           [](const graphar::GraphInfo& self,
+              std::shared_ptr<graphar::EdgeInfo> edge_info) {
+             return ThrowOrReturn(self.AddEdge(edge_info));
+           })
+      .def("remove_edge",
+           [](const graphar::GraphInfo& self,
+              std::shared_ptr<graphar::EdgeInfo> edge_info) {
+             return ThrowOrReturn(self.RemoveEdge(edge_info));
+           })
+      .def("get_name", &graphar::GraphInfo::GetName,
+           py::return_value_policy::reference_internal)
+      .def("get_labels", &graphar::GraphInfo::GetLabels,
+           py::return_value_policy::reference_internal)
+      .def("get_prefix", &graphar::GraphInfo::GetPrefix,
+           py::return_value_policy::reference_internal)
+      .def("version", &graphar::GraphInfo::version)
+      .def("get_extra_info", &graphar::GraphInfo::GetExtraInfo,
+           py::return_value_policy::reference_internal)
+      .def("get_vertex_info",
+           [](const graphar::GraphInfo& self, const std::string& type) {
+             return self.GetVertexInfo(type);
+           })
+      .def("get_edge_info",
+           [](const graphar::GraphInfo& self, const std::string& src_type,
+              const std::string& edge_type, const std::string& dst_type) {
+             return self.GetEdgeInfo(src_type, edge_type, dst_type);
+           })
+      .def("get_vertex_info_index", &graphar::GraphInfo::GetVertexInfoIndex)
+      .def("get_edge_info_index", &graphar::GraphInfo::GetEdgeInfoIndex)
+      .def("vertex_info_num", &graphar::GraphInfo::VertexInfoNum)
+      .def("edge_info_num", &graphar::GraphInfo::EdgeInfoNum)
+      .def("get_vertex_info_by_index",
+           [](const graphar::GraphInfo& self, int index) {
+             return self.GetVertexInfoByIndex(index);
+           })
+      .def("get_edge_info_by_index",
+           [](const graphar::GraphInfo& self, int index) {
+             return self.GetEdgeInfoByIndex(index);
+           })
+      .def("get_vertex_infos", &graphar::GraphInfo::GetVertexInfos,
+           py::return_value_policy::reference_internal)
+      .def("get_edge_infos", &graphar::GraphInfo::GetEdgeInfos,
+           py::return_value_policy::reference_internal)
+      .def("save",
+           [](const graphar::GraphInfo& self, const std::string& path) {
+             CheckStatus(self.Save(path));
+           })
+      .def("dump",
+           [](const graphar::GraphInfo& self) {
+             return ThrowOrReturn(self.Dump());
+           })
+      .def("is_validated", &graphar::GraphInfo::IsValidated);
+}  // namespace graphar
\ No newline at end of file
diff --git a/cli/src/importer.h b/python/src/bindings/importer.h
similarity index 99%
rename from cli/src/importer.h
rename to python/src/bindings/importer.h
index 11e4a6bd..dc1abf1a 100644
--- a/cli/src/importer.h
+++ b/python/src/bindings/importer.h
@@ -30,7 +30,7 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
-#include "util.h"
+#include "utils/import_util.h"
 
 namespace py = pybind11;
 namespace fs = std::filesystem;
diff --git a/python/src/bindings/types_binding.cc 
b/python/src/bindings/types_binding.cc
new file mode 100644
index 00000000..9d39e7f2
--- /dev/null
+++ b/python/src/bindings/types_binding.cc
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+
+#include "graphar/types.h"
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace py = pybind11;
+
+// Changed from PYBIND11_MODULE to a regular function
+extern "C" void bind_types(pybind11::module_& m) {
+  // Bind Type enum
+  py::enum_<graphar::Type>(m, "Type")
+      .value("BOOL", graphar::Type::BOOL)
+      .value("INT32", graphar::Type::INT32)
+      .value("INT64", graphar::Type::INT64)
+      .value("FLOAT", graphar::Type::FLOAT)
+      .value("DOUBLE", graphar::Type::DOUBLE)
+      .value("STRING", graphar::Type::STRING)
+      .value("LIST", graphar::Type::LIST)
+      .value("DATE", graphar::Type::DATE)
+      .value("TIMESTAMP", graphar::Type::TIMESTAMP)
+      .value("USER_DEFINED", graphar::Type::USER_DEFINED)
+      .export_values();
+
+  // Bind FileType enum
+  py::enum_<graphar::FileType>(m, "FileType")
+      .value("CSV", graphar::FileType::CSV)
+      .value("PARQUET", graphar::FileType::PARQUET)
+      .value("ORC", graphar::FileType::ORC)
+      .value("JSON", graphar::FileType::JSON)
+      .export_values();
+
+  // Bind AdjListType enum
+  py::enum_<graphar::AdjListType>(m, "AdjListType")
+      .value("unordered_by_source", graphar::AdjListType::unordered_by_source)
+      .value("unordered_by_dest", graphar::AdjListType::unordered_by_dest)
+      .value("ordered_by_source", graphar::AdjListType::ordered_by_source)
+      .value("ordered_by_dest", graphar::AdjListType::ordered_by_dest)
+      .export_values();
+
+  // Bind Cardinality enum
+  py::enum_<graphar::Cardinality>(m, "Cardinality")
+      .value("SINGLE", graphar::Cardinality::SINGLE)
+      .value("LIST", graphar::Cardinality::LIST)
+      .value("SET", graphar::Cardinality::SET)
+      .export_values();
+}  // namespace graphar
\ No newline at end of file
diff --git a/cli/src/util.h b/python/src/bindings/utils/import_util.h
similarity index 100%
rename from cli/src/util.h
rename to python/src/bindings/utils/import_util.h
diff --git a/python/src/bindings/utils/pybind_util.h 
b/python/src/bindings/utils/pybind_util.h
new file mode 100644
index 00000000..6728900e
--- /dev/null
+++ b/python/src/bindings/utils/pybind_util.h
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+#include "graphar/fwd.h"
+
+// Helper function to convert Status to Python exception
+inline void CheckStatus(const graphar::Status& status) {
+  if (!status.ok()) {
+    PyErr_SetString(PyExc_ValueError, status.message().c_str());
+    throw pybind11::error_already_set();
+  }
+}
+
+template <typename T>
+T ThrowOrReturn(const graphar::Result<T>& result) {
+  if (result.has_error()) {
+    // TODO(yxk) handle different error type
+    PyErr_SetString(PyExc_ValueError, result.status().message().c_str());
+    throw pybind11::error_already_set();
+  }
+  return result.value();
+}
\ No newline at end of file
diff --git a/cli/README.md b/python/src/cli/README.md
similarity index 92%
rename from cli/README.md
rename to python/src/cli/README.md
index 0f2ac54f..22e68b1d 100644
--- a/cli/README.md
+++ b/python/src/cli/README.md
@@ -1,6 +1,6 @@
-# GraphAr Cli
+# GraphAr Python CLI
 
-GraphAr Cli uses [pybind11][] and [scikit-build-core][] to bind C++ code into 
Python and build command line tools through Python. Command line tools 
developed using [typer][].
+GraphAr python cli uses [pybind11][] and [scikit-build-core][] to bind C++ 
code into Python and build command line tools through Python. Command line 
tools developed using [typer][].
 
 [pybind11]: https://pybind11.readthedocs.io
 [scikit-build-core]: https://scikit-build-core.readthedocs.io
@@ -22,7 +22,7 @@ And using Python in conda or venv is a good choice.
 ## Installation
 
 - Clone this repository
-- `pip install ./cli` or set verbose level `pip install -v ./cli`
+- `pip install ./python` or set verbose level `pip install -v ./python`
 
 ## Usage
 
diff --git a/cli/src/graphar_cli/__init__.py b/python/src/cli/__init__.py
similarity index 77%
copy from cli/src/graphar_cli/__init__.py
copy to python/src/cli/__init__.py
index e8091abd..09f891db 100644
--- a/cli/src/graphar_cli/__init__.py
+++ b/python/src/cli/__init__.py
@@ -15,8 +15,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import annotations
+"""CLI module for GraphAr."""
 
-from ._core import __doc__, __version__
+try:
+    from importlib import metadata
+except ImportError:
+    import importlib_metadata as metadata
 
-__all__ = ["__doc__", "__version__"]
+try:
+    __version__ = metadata.version("graphar")
+except Exception:
+    __version__ = "unknown"
diff --git a/cli/src/graphar_cli/graphar_cli.py b/python/src/cli/graphar_cli.py
similarity index 65%
rename from cli/src/graphar_cli/graphar_cli.py
rename to python/src/cli/graphar_cli.py
index 1c5be2e6..c7c1fc62 100644
--- a/cli/src/graphar_cli/graphar_cli.py
+++ b/python/src/cli/graphar_cli.py
@@ -17,16 +17,11 @@
 
 from logging import getLogger
 from pathlib import Path
-from typing import List
+from typing import List, Optional
 
 import typer
-import yaml
 
-from ._core import (  # type: ignore  # noqa: PGH003
-    check_edge,
-    check_graph,
-    check_vertex,
-    do_import,
+from graphar._core import (
     get_edge_count,
     get_edge_types,
     get_vertex_count,
@@ -35,9 +30,11 @@ from ._core import (  # type: ignore  # noqa: PGH003
     show_graph,
     show_vertex,
 )
-from .config import ImportConfig
-from .importer import validate
-from .logging import setup_logging
+from graphar.logging import setup_logging
+
+from graphar.importer import data_import
+
+from . import __version__
 
 app = typer.Typer(
     help="GraphAr Cli",
@@ -47,7 +44,21 @@ app = typer.Typer(
 )
 
 setup_logging()
-logger = getLogger(__name__)
+logger = getLogger("graphar_cli")
+
+
[email protected](invoke_without_command=True)
+def _callback(
+    ctx: typer.Context,
+    version: Optional[bool] = typer.Option(
+        False, "--version", "-v", help="Show GraphAr version and exit", 
is_eager=True
+    ),
+):
+    """Top-level callback to support global options like --version."""
+    if version:
+        # Print version and exit immediately
+        typer.echo(f"GraphAr CLI Version: {__version__}")
+        raise typer.Exit()
 
 
 @app.command(
@@ -109,33 +120,12 @@ def show(
 def check(
     path: str = typer.Option(None, "--path", "-p", help="Path to the GraphAr 
config file"),
 ):
-    if not Path(path).exists():
-        logger.error("File not found: %s", path)
-        raise typer.Exit(1)
-    path = Path(path).resolve() if Path(path).is_absolute() else 
Path(Path.cwd(), path).resolve()
-    path = str(path)
-    vertex_types = get_vertex_types(path)
-    for vertex_type in vertex_types:
-        if not check_vertex(path, vertex_type):
-            logger.error("Vertex type %s is not valid", vertex_type)
-            raise typer.Exit(1)
-    edge_types = get_edge_types(path)
-    for edge_type in edge_types:
-        if edge_type[0] not in vertex_types:
-            logger.error("Source vertex type %s not found in the graph", 
edge_type[0])
-            raise typer.Exit(1)
-        if edge_type[2] not in vertex_types:
-            logger.error("Destination vertex type %s not found in the graph", 
edge_type[2])
-            raise typer.Exit(1)
-        if not check_edge(path, edge_type[0], edge_type[1], edge_type[2]):
-            logger.error(
-                "Edge type %s_%s_%s is not valid", edge_type[0], edge_type[1], 
edge_type[2]
-            )
-            raise typer.Exit(1)
-    if not check_graph(path):
-        logger.error("Graph is not valid")
+    try:
+        result_str = data_import.check(path)
+    except Exception as e:
+        logger.error(e)
         raise typer.Exit(1)
-    logger.info("Graph is valid")
+    logger.info(result_str)
 
 
 @app.command(
@@ -147,25 +137,12 @@ def check(
 def import_data(
     config_file: str = typer.Option(None, "--config", "-c", help="Path of the 
GraphAr config file"),
 ):
-    if not Path(config_file).is_file():
-        logger.error("File not found: %s", config_file)
-        raise typer.Exit(1)
-
     try:
-        with Path(config_file).open(encoding="utf-8") as file:
-            config = yaml.safe_load(file)
-        import_config = ImportConfig(**config)
-        validate(import_config)
+        result_str = data_import.import_data(config_file)
     except Exception as e:
-        logger.error("Invalid config: %s", e)
-        raise typer.Exit(1) from None
-    try:
-        logger.info("Starting import")
-        res = do_import(import_config.model_dump())
-        logger.info(res)
-    except Exception as e:
-        logger.error("Import failed: %s", e)
-        raise typer.Exit(1) from None
+        logger.error(e)
+        raise typer.Exit(1)
+    logger.info(result_str)
 
 
 def main() -> None:
diff --git a/cli/src/graphar_cli/__init__.py b/python/src/graphar/__init__.py
similarity index 83%
copy from cli/src/graphar_cli/__init__.py
copy to python/src/graphar/__init__.py
index e8091abd..bd438bff 100644
--- a/cli/src/graphar_cli/__init__.py
+++ b/python/src/graphar/__init__.py
@@ -17,6 +17,6 @@
 
 from __future__ import annotations
 
-from ._core import __doc__, __version__
+from .graph_info import GraphInfo, VertexInfo, EdgeInfo, PropertyGroup, 
Property
 
-__all__ = ["__doc__", "__version__"]
+__all__ = ["GraphInfo", "VertexInfo", "EdgeInfo", "PropertyGroup", "Property"]
diff --git a/cli/test/test_basic.py b/python/src/graphar/graph_info.py
similarity index 86%
copy from cli/test/test_basic.py
copy to python/src/graphar/graph_info.py
index 44afa889..13f8cb1c 100644
--- a/cli/test/test_basic.py
+++ b/python/src/graphar/graph_info.py
@@ -15,10 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import annotations
+from .types import Type
 
-import graphar_cli as m
-
-
-def test_version():
-    assert m.__version__ == "0.0.1"
+from ._core import DataType, Property, PropertyGroup, AdjacentList, 
VertexInfo, EdgeInfo, GraphInfo
diff --git a/cli/test/test_basic.py b/python/src/graphar/importer/__init__.py
similarity index 86%
copy from cli/test/test_basic.py
copy to python/src/graphar/importer/__init__.py
index 44afa889..13a83393 100644
--- a/cli/test/test_basic.py
+++ b/python/src/graphar/importer/__init__.py
@@ -14,11 +14,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-from __future__ import annotations
-
-import graphar_cli as m
-
-
-def test_version():
-    assert m.__version__ == "0.0.1"
diff --git a/cli/src/graphar_cli/config.py 
b/python/src/graphar/importer/config.py
similarity index 100%
rename from cli/src/graphar_cli/config.py
rename to python/src/graphar/importer/config.py
diff --git a/python/src/graphar/importer/data_import.py 
b/python/src/graphar/importer/data_import.py
new file mode 100644
index 00000000..d0b1e60e
--- /dev/null
+++ b/python/src/graphar/importer/data_import.py
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+
+import yaml
+
+from .._core import (
+    check_edge,
+    check_graph,
+    check_vertex,
+    do_import,
+    get_edge_types,
+    get_vertex_types,
+)
+from .config import ImportConfig
+from .importer import validate
+
+
+def check(path: str):
+    if not Path(path).exists():
+        raise ValueError(f"File not found: {path}")
+    path = Path(path).resolve() if Path(path).is_absolute() else 
Path(Path.cwd(), path).resolve()
+    path = str(path)
+    vertex_types = get_vertex_types(path)
+    for vertex_type in vertex_types:
+        if not check_vertex(path, vertex_type):
+            raise ValueError(f"Vertex type {vertex_type} is not valid")
+    edge_types = get_edge_types(path)
+    for edge_type in edge_types:
+        if edge_type[0] not in vertex_types:
+            raise ValueError(f"Source vertex type {edge_type[0]} not found in 
the graph")
+        if edge_type[2] not in vertex_types:
+            raise ValueError(f"Destination vertex type {edge_type[2]} not 
found in the graph")
+        if not check_edge(path, edge_type[0], edge_type[1], edge_type[2]):
+            raise ValueError(f"Edge type 
{edge_type[0]}_{edge_type[1]}_{edge_type[2]} is not valid")
+    if not check_graph(path):
+        raise ValueError("Graph is not valid")
+    return "Graph is valid"
+
+
+def import_data(config_file: str):
+    if not Path(config_file).is_file():
+        raise ValueError(f"File not found: {config_file}")
+
+    try:
+        with Path(config_file).open(encoding="utf-8") as file:
+            config = yaml.safe_load(file)
+        import_config = ImportConfig(**config)
+        validate(import_config)
+    except Exception as e:
+        raise ValueError(f"Invalid config: {e}")
+    try:
+        res = do_import(import_config.model_dump())
+    except Exception as e:
+        raise ValueError(f"Import failed: {e}")
+    return res
diff --git a/cli/src/graphar_cli/importer.py 
b/python/src/graphar/importer/importer.py
similarity index 100%
rename from cli/src/graphar_cli/importer.py
rename to python/src/graphar/importer/importer.py
diff --git a/cli/src/graphar_cli/logging.py b/python/src/graphar/logging.py
similarity index 100%
rename from cli/src/graphar_cli/logging.py
rename to python/src/graphar/logging.py
diff --git a/cli/src/graphar_cli/__init__.py b/python/src/graphar/types.py
similarity index 87%
rename from cli/src/graphar_cli/__init__.py
rename to python/src/graphar/types.py
index e8091abd..7a1c1b24 100644
--- a/cli/src/graphar_cli/__init__.py
+++ b/python/src/graphar/types.py
@@ -15,8 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import annotations
-
-from ._core import __doc__, __version__
-
-__all__ = ["__doc__", "__version__"]
+from ._core import Type, FileType, AdjListType, Cardinality
diff --git a/cli/test/test_basic.py b/python/test/conftest.py
similarity index 83%
rename from cli/test/test_basic.py
rename to python/test/conftest.py
index 44afa889..9b62460a 100644
--- a/cli/test/test_basic.py
+++ b/python/test/conftest.py
@@ -15,10 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import annotations
+import os
+import pytest
 
-import graphar_cli as m
 
-
-def test_version():
-    assert m.__version__ == "0.0.1"
[email protected]
+def test_data_root():
+    test_data_root = os.environ.get("GAR_TEST_DATA", "../../")
+    return test_data_root
diff --git a/python/test/test_graph_info.py b/python/test/test_graph_info.py
new file mode 100644
index 00000000..095a22ea
--- /dev/null
+++ b/python/test/test_graph_info.py
@@ -0,0 +1,160 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import typer
+
+import graphar as gar
+
+
[email protected]
+def sample_graph(test_data_root):
+    return test_data_root + "/ldbc_sample/csv/" + "ldbc_sample.graph.yml"
+
+
[email protected]
+def sample_graph_info(sample_graph):
+    return gar.graph_info.GraphInfo.load(sample_graph)
+
+
[email protected]
+def sample_graph_vertex(sample_graph_info):
+    return sample_graph_info.get_vertex_info("person")
+
+
[email protected]
+def sample_graph_edge(sample_graph_info):
+    return sample_graph_info.get_edge_info("person", "knows", "person")
+
+
+def test_graph_info_basics(sample_graph_info):
+    """Test basic graph info functionality."""
+    assert sample_graph_info is not None
+    assert sample_graph_info.get_name() == "ldbc_sample"
+
+    # Test vertex and edge info counts
+    assert len(sample_graph_info.get_vertex_infos()) == 1
+    assert sample_graph_info.vertex_info_num() == 1
+    assert len(sample_graph_info.get_edge_infos()) == 1
+    assert sample_graph_info.edge_info_num() == 1
+
+    # Test getting specific vertex and edge info
+    person_vertex_info = sample_graph_info.get_vertex_info("person")
+    assert person_vertex_info is not None
+
+    knows_edge_info = sample_graph_info.get_edge_info("person", "knows", 
"person")
+    assert knows_edge_info is not None
+
+    # Test version
+    assert sample_graph_info.version().get_version() == 1
+
+
+def test_person_vertex_info_basics(sample_graph_vertex):
+    """Test person vertex info basics."""
+    assert sample_graph_vertex.get_type() == "person"
+    assert sample_graph_vertex.get_chunk_size() == 100
+    assert sample_graph_vertex.get_prefix() == "vertex/person/"
+    assert sample_graph_vertex.property_group_num() == 2
+    assert sample_graph_vertex.version().get_version() == 1
+
+
+def test_person_vertex_property_groups(sample_graph_vertex):
+    """Test person vertex property groups."""
+    # Test first property group (id)
+    id_property_group = sample_graph_vertex.get_property_group_by_index(0)
+    assert id_property_group is not None
+    assert id_property_group.get_prefix() == "id/"
+    assert id_property_group.get_file_type() == gar.types.FileType.CSV
+
+    # Check id property
+    assert sample_graph_vertex.has_property("id")
+    id_property_type = sample_graph_vertex.get_property_type("id")
+    assert id_property_type.to_type_name() == "int64"
+    assert sample_graph_vertex.is_primary_key("id")
+    assert not sample_graph_vertex.is_nullable_key("id")
+
+    # Test second property group (firstName_lastName_gender)
+    name_property_group = sample_graph_vertex.get_property_group_by_index(1)
+    assert name_property_group is not None
+    assert name_property_group.get_prefix() == "firstName_lastName_gender/"
+    assert name_property_group.get_file_type() == gar.types.FileType.CSV
+
+    # Check name properties
+    assert sample_graph_vertex.has_property("firstName")
+    first_name_type = sample_graph_vertex.get_property_type("firstName")
+    assert first_name_type.to_type_name() == "string"
+
+    assert sample_graph_vertex.has_property("lastName")
+    last_name_type = sample_graph_vertex.get_property_type("lastName")
+    assert last_name_type.to_type_name() == "string"
+
+    assert sample_graph_vertex.has_property("gender")
+    gender_type = sample_graph_vertex.get_property_type("gender")
+    assert gender_type.to_type_name() == "string"
+
+
+def test_knows_edge_info_basics(sample_graph_edge):
+    """Test knows edge info basics."""
+    assert sample_graph_edge.get_edge_type() == "knows"
+    assert sample_graph_edge.get_chunk_size() == 1024
+    assert sample_graph_edge.get_src_type() == "person"
+    assert sample_graph_edge.get_src_chunk_size() == 100
+    assert sample_graph_edge.get_dst_type() == "person"
+    assert sample_graph_edge.get_dst_chunk_size() == 100
+    assert not sample_graph_edge.is_directed()
+    assert sample_graph_edge.get_prefix() == "edge/person_knows_person/"
+    assert sample_graph_edge.version().get_version() == 1
+
+
+def test_knows_edge_adjacency_lists(sample_graph_edge):
+    """Test knows edge adjacency lists."""
+    # Check that edge has both ordered_by_source and ordered_by_dest adjacency 
lists
+    assert 
sample_graph_edge.has_adjacent_list_type(gar.types.AdjListType.ordered_by_source)
+    assert 
sample_graph_edge.has_adjacent_list_type(gar.types.AdjListType.ordered_by_dest)
+
+    # Test ordered_by_source adjacency list
+    adj_by_source = 
sample_graph_edge.get_adjacent_list(gar.types.AdjListType.ordered_by_source)
+    assert adj_by_source is not None
+    assert adj_by_source.get_file_type() == gar.types.FileType.CSV
+    assert adj_by_source.get_type() == gar.types.AdjListType.ordered_by_source
+    assert adj_by_source.get_prefix() == "ordered_by_source/"
+
+
+def test_knows_edge_property_groups(sample_graph_edge):
+    """Test knows edge property groups."""
+    assert sample_graph_edge.property_group_num() == 1
+
+    # Test property group
+    property_group = sample_graph_edge.get_property_group_by_index(0)
+    assert property_group is not None
+    assert property_group.get_prefix() == "creationDate/"
+    assert property_group.get_file_type() == gar.types.FileType.CSV
+
+    # Check creationDate property
+    assert sample_graph_edge.has_property("creationDate")
+    creation_date_type = sample_graph_edge.get_property_type("creationDate")
+    assert creation_date_type.to_type_name() == "string"
+    assert not sample_graph_edge.is_primary_key("creationDate")
+    assert sample_graph_edge.is_nullable_key("creationDate")
+
+
+def test_graph_validation(sample_graph_info, sample_graph_vertex, 
sample_graph_edge):
+    """Test graph validation."""
+    # Test that the sample graph is validated
+    assert sample_graph_info.is_validated()
+    assert sample_graph_vertex.is_validated()
+    assert sample_graph_edge.is_validated()
diff --git a/python/test/test_graphar_cli.py b/python/test/test_graphar_cli.py
new file mode 100644
index 00000000..028ee842
--- /dev/null
+++ b/python/test/test_graphar_cli.py
@@ -0,0 +1,80 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import typer
+from pathlib import Path
+
+from cli import graphar_cli
+
+
[email protected]
+def sample_cfg():
+    return (
+        Path(__file__).parent
+        / "../"
+        / ".."
+        / "testing"
+        / "ldbc_sample"
+        / "parquet"
+        / "ldbc_sample.graph.yml"
+    ).resolve()
+
+
+def test_show_file_not_found(tmp_path):
+    """Test show command with a non-existent file path."""
+    # path that does not exist
+    missing = tmp_path / "nope.yaml"
+    with pytest.raises(typer.Exit):
+        graphar_cli.show(path=str(missing))
+
+
+def test_show_edge_not_all_set(sample_cfg):
+    """Test show command with incomplete edge parameters."""
+    cfg = sample_cfg
+    # only provide edge_src, missing others
+    with pytest.raises(typer.Exit):
+        graphar_cli.show(path=str(cfg), edge_src="s")
+
+
+def test_show_graph_default(sample_cfg):
+    """Test show command with default parameters (show entire graph)."""
+    cfg = sample_cfg
+    # This should run without throwing exceptions
+    try:
+        graphar_cli.show(path=str(cfg))
+    except typer.Exit:
+        # typer.Exit is expected when the command completes successfully
+        pass
+
+
+def test_check_success(sample_cfg):
+    """Test check command with a valid graph configuration."""
+    cfg = sample_cfg
+    # This should run without throwing exceptions
+    try:
+        graphar_cli.check(path=str(cfg))
+    except typer.Exit:
+        # typer.Exit is expected when the command completes successfully
+        pass
+
+
+def test_import_data_exception():
+    """Test import_data command with a non-existent config file."""
+    # Using a non-existent config file should raise an exception
+    with pytest.raises(typer.Exit):
+        graphar_cli.import_data(config_file="non_existent_config.yaml")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to