This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 2bb004b  build: add info for rust and python artifacts (#60)
2bb004b is described below

commit 2bb004b48efb5624813671e38c890c6abff01712
Author: Shiyan Xu <[email protected]>
AuthorDate: Wed Jul 10 03:05:36 2024 -0500

    build: add info for rust and python artifacts (#60)
    
    - Make `datafusion` a feature to hudi crate
    - Add `__version__` to python package
    - Add more info for package repositories
---
 Cargo.toml                   |  7 +++++--
 crates/datafusion/Cargo.toml |  2 --
 crates/datafusion/src/lib.rs |  4 ++--
 crates/hudi/Cargo.toml       |  6 ++++++
 crates/hudi/src/lib.rs       |  3 +++
 python/README.md             | 23 +++++++++++++++++++++++
 python/hudi/__init__.py      |  1 +
 python/hudi/_internal.pyi    |  2 ++
 python/pyproject.toml        |  1 +
 python/src/lib.rs            |  2 ++
 10 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 24412e8..8259243 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,6 +27,11 @@ version = "0.1.0"
 edition = "2021"
 license = "Apache-2.0"
 rust-version = "1.75.0"
+keywords = ["apachehudi", "hudi", "datalake", "arrow"]
+readme = "README.md"
+description = "A native Rust library for Apache Hudi"
+homepage = "https://github.com/apache/hudi-rs";
+repository = "https://github.com/apache/hudi-rs";
 
 [workspace.dependencies]
 # arrow
@@ -48,8 +53,6 @@ parquet = { version = "52.0.0", features = ["async", 
"object_store"] }
 datafusion = { version = "39.0.0" }
 datafusion-expr = { version = "39.0.0" }
 datafusion-common = { version = "39.0.0" }
-datafusion-proto = { version = "39.0.0" }
-datafusion-sql = { version = "39.0.0" }
 datafusion-physical-expr = { version = "39.0.0" }
 
 # serde
diff --git a/crates/datafusion/Cargo.toml b/crates/datafusion/Cargo.toml
index 94e1ae7..de7f2a1 100644
--- a/crates/datafusion/Cargo.toml
+++ b/crates/datafusion/Cargo.toml
@@ -31,8 +31,6 @@ arrow-schema = { workspace = true }
 datafusion = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-common = { workspace = true }
-datafusion-proto = { workspace = true }
-datafusion-sql = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 
 # runtime / async
diff --git a/crates/datafusion/src/lib.rs b/crates/datafusion/src/lib.rs
index 5f46b23..766d3d0 100644
--- a/crates/datafusion/src/lib.rs
+++ b/crates/datafusion/src/lib.rs
@@ -31,11 +31,11 @@ use datafusion::datasource::physical_plan::FileScanConfig;
 use datafusion::datasource::TableProvider;
 use datafusion::execution::context::SessionState;
 use datafusion::physical_plan::ExecutionPlan;
+use datafusion_common::DFSchema;
+use datafusion_common::DataFusionError::Execution;
 use datafusion_common::Result;
-use datafusion_common::{DFSchema, DataFusionError};
 use datafusion_expr::{Expr, TableType};
 use datafusion_physical_expr::create_physical_expr;
-use DataFusionError::Execution;
 
 use hudi_core::config::read::HudiReadConfig::InputPartitions;
 use hudi_core::storage::utils::{empty_options, get_scheme_authority, 
parse_uri};
diff --git a/crates/hudi/Cargo.toml b/crates/hudi/Cargo.toml
index b6a08a8..b30bcb5 100644
--- a/crates/hudi/Cargo.toml
+++ b/crates/hudi/Cargo.toml
@@ -24,3 +24,9 @@ rust-version.workspace = true
 
 [dependencies]
 hudi-core = { path = "../core" }
+hudi-datafusion = { path = "../datafusion", optional = true }
+
+[features]
+datafusion = [
+    "hudi-datafusion",
+]
diff --git a/crates/hudi/src/lib.rs b/crates/hudi/src/lib.rs
index 84459f4..2fa1096 100644
--- a/crates/hudi/src/lib.rs
+++ b/crates/hudi/src/lib.rs
@@ -18,3 +18,6 @@
  */
 
 pub use hudi_core::*;
+
+#[cfg(feature = "datafusion")]
+pub use hudi_datafusion::*;
diff --git a/python/README.md b/python/README.md
index 4f2576e..1ff25e2 100644
--- a/python/README.md
+++ b/python/README.md
@@ -21,3 +21,26 @@
 
 Native [Apache Hudi](https://github.com/apache/hudi) Python binding based
 on [hudi-rs](https://github.com/apache/hudi-rs).
+
+## Installation
+
+```shell
+pip install hudi
+```
+
+## Example
+
+```python
+from hudi import HudiTable
+
+hudi_table = HudiTable("/tmp/trips_table")
+records = hudi_table.read_snapshot()
+
+import pyarrow as pa
+
+arrow_table = pa.Table.from_batches(records)
+result = arrow_table.select(
+    ["rider", "ts", "fare"]).filter(
+    pa.compute.field("fare") > 20.0)
+print(result)
+```
diff --git a/python/hudi/__init__.py b/python/hudi/__init__.py
index 1dee57b..09a9339 100644
--- a/python/hudi/__init__.py
+++ b/python/hudi/__init__.py
@@ -15,5 +15,6 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+from ._internal import __version__ as __version__
 from ._internal import HudiFileSlice as HudiFileSlice
 from ._internal import HudiTable as HudiTable
diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi
index 421a80c..fd97cc3 100644
--- a/python/hudi/_internal.pyi
+++ b/python/hudi/_internal.pyi
@@ -19,6 +19,8 @@ from typing import Optional, Dict, List
 
 import pyarrow
 
+__version__: str
+
 
 @dataclass(init=False)
 class HudiFileSlice:
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 175773c..13c1c8b 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -25,6 +25,7 @@ description = "Native Hudi Python binding based on hudi-rs"
 urls = { repository = "https://github.com/apache/hudi-rs/tree/main/python/"; }
 readme = "README.md"
 requires-python = ">=3.8"
+keywords = ["apachehudi", "hudi", "datalake", "arrow"]
 license = "Apache License 2.0"
 classifiers = [
     "License :: OSI Approved :: Apache Software License",
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 99b7ef9..ad96dc6 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -23,6 +23,8 @@ mod internal;
 #[cfg(not(tarpaulin))]
 #[pymodule]
 fn _internal(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
+
     use internal::{HudiFileSlice, HudiTable};
     m.add_class::<HudiFileSlice>()?;
     m.add_class::<HudiTable>()?;

Reply via email to