This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 567ed43 feat: add python binding (#21)
567ed43 is described below
commit 567ed43be723222a9d6b6bbd8ab2fab02083030c
Author: Shiyan Xu <[email protected]>
AuthorDate: Wed Jun 12 15:12:24 2024 -0500
feat: add python binding (#21)
---
.cargo/config.toml | 22 ++++++++++
.github/workflows/ci.yml | 3 +-
Cargo.toml | 1 +
crates/core/src/lib.rs | 4 ++
python/.cargo/config.toml | 28 ++++++++++++
python/.gitignore | 89 ++++++++++++++++++++++++++++++++++++++
python/Cargo.toml | 40 +++++++++++++++++
python/Makefile | 42 ++++++++++++++++++
python/README.md | 23 ++++++++++
python/hudi/__init__.py | 16 +++++++
python/hudi/_internal.pyi | 20 +++++++++
python/hudi/py.typed | 16 +++++++
python/pyproject.toml | 44 +++++++++++++++++++
{crates/core => python}/src/lib.rs | 19 ++++----
14 files changed, 356 insertions(+), 11 deletions(-)
diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 0000000..0b24f30
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[target.x86_64-apple-darwin]
+rustflags = [
+ "-C", "link-arg=-undefined",
+ "-C", "link-arg=dynamic_lookup",
+]
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b1be13a..e8182c7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -61,7 +61,8 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Unit test with code coverage
- run: cargo tarpaulin --verbose --no-fail-fast --all-features
--workspace --out xml
+ #
https://github.com/xd009642/tarpaulin/issues/1092#issuecomment-1407739176
+ run: cargo tarpaulin --engine llvm --no-dead-code --no-fail-fast
--all-features --workspace --out xml
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4
with:
diff --git a/Cargo.toml b/Cargo.toml
index 8c9a163..e24d132 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@
[workspace]
members = [
"crates/*",
+ "python",
]
resolver = "2"
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
index c36d538..abf9a66 100644
--- a/crates/core/src/lib.rs
+++ b/crates/core/src/lib.rs
@@ -30,3 +30,7 @@ pub const BASE_FILE_EXTENSIONS: [&str; 1] = ["parquet"];
pub fn is_base_file_format_supported(ext: &str) -> bool {
BASE_FILE_EXTENSIONS.contains(&ext)
}
+
+pub fn crate_version() -> &'static str {
+ env!("CARGO_PKG_VERSION")
+}
diff --git a/python/.cargo/config.toml b/python/.cargo/config.toml
new file mode 100644
index 0000000..406230e
--- /dev/null
+++ b/python/.cargo/config.toml
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[target.x86_64-apple-darwin]
+rustflags = [
+ "-C", "link-arg=-undefined",
+ "-C", "link-arg=dynamic_lookup",
+]
+
+[target.aarch64-apple-darwin]
+rustflags = [
+ "-C", "link-arg=-undefined",
+ "-C", "link-arg=dynamic_lookup",
+]
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 0000000..db2736b
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+/target
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+.pytest_cache/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+.venv/
+env/
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+include/
+man/
+venv/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+pip-selfcheck.json
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+.DS_Store
+
+# Sphinx documentation
+docs/_build/
+
+# PyCharm
+.idea/
+
+# VSCode
+.vscode/
+
+# Pyenv
+.python-version
diff --git a/python/Cargo.toml b/python/Cargo.toml
new file mode 100644
index 0000000..0a6c666
--- /dev/null
+++ b/python/Cargo.toml
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "hudi-python"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at
https://doc.rust-lang.org/cargo/reference/manifest.html
+[lib]
+name = "hudi"
+crate-type = ["cdylib"]
+doc = false
+
+[dependencies]
+# runtime
+futures = { workspace = true }
+num_cpus = { workspace = true }
+tokio = { workspace = true, features = ["rt-multi-thread"] }
+
+[dependencies.pyo3]
+version = "0.21.2"
+features = ["extension-module", "abi3", "abi3-py38", "gil-refs"]
+
+[dependencies.hudi]
+path = "../crates/hudi"
diff --git a/python/Makefile b/python/Makefile
new file mode 100644
index 0000000..c73bacc
--- /dev/null
+++ b/python/Makefile
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+.DEFAULT_GOAL := help
+
+VENV := .venv
+MATURIN_VERSION := $(shell grep 'requires =' pyproject.toml | cut -d= -f2- |
tr -d '[ "]')
+PACKAGE_VERSION := $(shell grep version Cargo.toml | head -n 1 | awk '{print
$$3}' | tr -d '"' )
+
+.PHONY: setup-venv
+setup-venv: ## Setup the virtualenv
+ $(info --- Setup virtualenv ---)
+ python -m venv $(VENV)
+
+.PHONY: setup
+setup: ## Setup the requirements
+ $(info --- Setup dependencies ---)
+ pip install "$(MATURIN_VERSION)"
+
+.PHONY: build
+build: setup ## Build Python binding of delta-rs
+ $(info --- Build Python binding ---)
+ maturin build $(MATURIN_EXTRA_ARGS)
+
+.PHONY: develop
+develop: setup ## Install Python binding of delta-rs
+ $(info --- Develop with Python binding ---)
+ maturin develop --extras=devel,pandas $(MATURIN_EXTRA_ARGS)
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..4f2576e
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,23 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+-->
+
+# hudi-python
+
+Native [Apache Hudi](https://github.com/apache/hudi) Python binding based
+on [hudi-rs](https://github.com/apache/hudi-rs).
diff --git a/python/hudi/__init__.py b/python/hudi/__init__.py
new file mode 100644
index 0000000..a67d5ea
--- /dev/null
+++ b/python/hudi/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi
new file mode 100644
index 0000000..75aaa86
--- /dev/null
+++ b/python/hudi/_internal.pyi
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+__version__: str
+
+def rust_core_version() -> str: ...
diff --git a/python/hudi/py.typed b/python/hudi/py.typed
new file mode 100644
index 0000000..13a8339
--- /dev/null
+++ b/python/hudi/py.typed
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyproject.toml b/python/pyproject.toml
new file mode 100644
index 0000000..1abc6c1
--- /dev/null
+++ b/python/pyproject.toml
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[build-system]
+requires = ["maturin>=1.5,<2.0"]
+build-backend = "maturin"
+
+[project]
+name = "hudi"
+description = "Native Hudi Python binding based on hudi-rs"
+readme = "README.md"
+requires-python = ">=3.8"
+license = "Apache License 2.0"
+classifiers = [
+ "License :: OSI Approved :: Apache Software License",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12"
+]
+dependencies = [
+ "pyarrow>=8"
+]
+
+dynamic = ["version"]
+
+[tool.maturin]
+module-name = "hudi._internal"
+features = ["pyo3/extension-module"]
diff --git a/crates/core/src/lib.rs b/python/src/lib.rs
similarity index 75%
copy from crates/core/src/lib.rs
copy to python/src/lib.rs
index c36d538..ad03f49 100644
--- a/crates/core/src/lib.rs
+++ b/python/src/lib.rs
@@ -17,16 +17,15 @@
* under the License.
*/
-use crate::table::Table;
+use pyo3::prelude::*;
-mod error;
-mod file_group;
-pub mod table;
-pub type HudiTable = Table;
-mod timeline;
-
-pub const BASE_FILE_EXTENSIONS: [&str; 1] = ["parquet"];
+#[pyfunction]
+fn rust_core_version() -> &'static str {
+ hudi::crate_version()
+}
-pub fn is_base_file_format_supported(ext: &str) -> bool {
- BASE_FILE_EXTENSIONS.contains(&ext)
+#[pymodule]
+fn _internal(_py: Python, m: &PyModule) -> PyResult<()> {
+ m.add_function(wrap_pyfunction!(rust_core_version, m)?)?;
+ Ok(())
}