This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f75f12d style: enforce Python code style (#101)
f75f12d is described below
commit f75f12d8c436ef64daca1b23147007f578de1bb2
Author: muyihao <[email protected]>
AuthorDate: Wed Aug 7 09:50:31 2024 +0800
style: enforce Python code style (#101)
- Add ruff and mypy configurations to pyproject.toml
- Integrate Python code check in GitHub Actions
- Format code with Ruff and Ruff Format
---------
Co-authored-by: yanghao14 <[email protected]>
Co-authored-by: Shiyan Xu <[email protected]>
---
.github/workflows/compliance.yml | 19 +++++-
python/Makefile | 9 +++
python/hudi/__init__.py | 2 +-
python/hudi/_internal.pyi | 21 ++-----
python/pyproject.toml | 18 ++++++
python/tests/test_table_read.py | 130 +++++++++++++++++++++++++++++----------
6 files changed, 149 insertions(+), 50 deletions(-)
diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml
index 6f0e4db..dcc4973 100644
--- a/.github/workflows/compliance.yml
+++ b/.github/workflows/compliance.yml
@@ -47,5 +47,22 @@ jobs:
- uses: actions/checkout@v4
- name: Check license header
uses: apache/skywalking-eyes/[email protected]
- - name: Check code style
+ - name: Check rust code style
run: cd python && make check-rust
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: pip
+ cache-dependency-path: pyproject.toml
+ - name: Install python linter dependencies
+ working-directory: ./python
+ run: |
+ make setup-env
+ source venv/bin/activate
+ pip install ruff mypy
+ - name: Run python linter
+ working-directory: ./python
+ run: |
+ source venv/bin/activate
+ make check-python
diff --git a/python/Makefile b/python/Makefile
index 4badaca..a60205d 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -48,6 +48,15 @@ check-rust: ## Run check on Rust
$(info --- Check Rust format ---)
cargo fmt --all -- --check
+.PHONY: check-python
+check-python: ## Run check on Python
+ $(info --- Check Python format ---)
+ ruff format --check --diff .
+ $(info --- Check Python linting ---)
+ ruff check .
+ $(info --- Check Python typing ---)
+ mypy .
+
.PHONY: test-rust
test-rust: ## Run tests on Rust
$(info --- Run Rust tests ---)
diff --git a/python/hudi/__init__.py b/python/hudi/__init__.py
index 09a9339..b0a792e 100644
--- a/python/hudi/__init__.py
+++ b/python/hudi/__init__.py
@@ -15,6 +15,6 @@
# specific language governing permissions and limitations
# under the License.
-from ._internal import __version__ as __version__
from ._internal import HudiFileSlice as HudiFileSlice
from ._internal import HudiTable as HudiTable
+from ._internal import __version__ as __version__
diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi
index fd97cc3..0f83aee 100644
--- a/python/hudi/_internal.pyi
+++ b/python/hudi/_internal.pyi
@@ -15,13 +15,12 @@
# specific language governing permissions and limitations
# under the License.
from dataclasses import dataclass
-from typing import Optional, Dict, List
+from typing import Dict, List, Optional
-import pyarrow
+import pyarrow # type: ignore
__version__: str
-
@dataclass(init=False)
class HudiFileSlice:
file_group_id: str
@@ -33,24 +32,16 @@ class HudiFileSlice:
def base_file_relative_path(self) -> str: ...
-
@dataclass(init=False)
class HudiTable:
-
def __init__(
- self,
- table_uri: str,
- options: Optional[Dict[str, str]] = None,
+ self,
+ table_uri: str,
+ options: Optional[Dict[str, str]] = None,
): ...
-
def get_schema(self) -> "pyarrow.Schema": ...
-
def split_file_slices(self, n: int) -> List[List[HudiFileSlice]]: ...
-
def get_file_slices(self) -> List[HudiFileSlice]: ...
-
- def read_file_slice(self, base_file_relative_path) -> pyarrow.RecordBatch:
...
-
+ def read_file_slice(self, base_file_relative_path: str) ->
pyarrow.RecordBatch: ...
def read_snapshot(self) -> List["pyarrow.RecordBatch"]: ...
-
def read_snapshot_as_of(self, timestamp: str) ->
List["pyarrow.RecordBatch"]: ...
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 36f350e..367cf46 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -42,6 +42,8 @@ dependencies = [
optional-dependencies = { devel = [
"pytest",
"coverage",
+ "ruff==0.5.2",
+ "mypy==1.10.1",
] }
dynamic = ["version"]
@@ -49,9 +51,25 @@ dynamic = ["version"]
[tool.maturin]
module-name = "hudi._internal"
+[tool.ruff]
+target-version = 'py38'
+# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by
default.
+lint.select = [
+ "E4",
+ "E7",
+ "E9",
+ "F",
+ # isort
+ "I",
+]
+# don't ignore any rule unless it becomes imperative
+lint.ignore = []
+lint.isort.known-first-party = ["hudi"]
+
[tool.mypy]
files = "hudi/*.py"
exclude = "^tests"
+strict = true
[tool.pytest.ini_options]
testpaths = [
diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py
index e56463c..c3c84c9 100644
--- a/python/tests/test_table_read.py
+++ b/python/tests/test_table_read.py
@@ -20,28 +20,49 @@ import pytest
from hudi import HudiTable
-PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if
s.isnumeric()) < (8, 0, 0)
-pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="hudi only supported
if pyarrow >= 8.0.0")
+PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if
s.isnumeric()) < (
+ 8,
+ 0,
+ 0,
+)
+pytestmark = pytest.mark.skipif(
+ PYARROW_LE_8_0_0, reason="hudi only supported if pyarrow >= 8.0.0"
+)
def test_sample_table(get_sample_table):
table_path = get_sample_table
table = HudiTable(table_path)
- assert table.get_schema().names == ['_hoodie_commit_time',
'_hoodie_commit_seqno', '_hoodie_record_key',
- '_hoodie_partition_path',
'_hoodie_file_name', 'ts', 'uuid', 'rider', 'driver',
- 'fare', 'city']
+ assert table.get_schema().names == [
+ "_hoodie_commit_time",
+ "_hoodie_commit_seqno",
+ "_hoodie_record_key",
+ "_hoodie_partition_path",
+ "_hoodie_file_name",
+ "ts",
+ "uuid",
+ "rider",
+ "driver",
+ "fare",
+ "city",
+ ]
file_slices = table.get_file_slices()
assert len(file_slices) == 5
- assert set(f.commit_time for f in file_slices) == {'20240402123035233',
'20240402144910683'}
+ assert set(f.commit_time for f in file_slices) == {
+ "20240402123035233",
+ "20240402144910683",
+ }
assert all(f.num_records == 1 for f in file_slices)
file_slice_paths = [f.base_file_relative_path() for f in file_slices]
- assert set(file_slice_paths) ==
{'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet',
-
'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet',
-
'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet',
-
'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet',
-
'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet'}
+ assert set(file_slice_paths) == {
+
"chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet",
+
"san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet",
+
"san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet",
+
"san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet",
+
"sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet",
+ }
batch = table.read_file_slice(file_slice_paths[0])
t = pa.Table.from_batches([batch])
@@ -54,28 +75,71 @@ def test_sample_table(get_sample_table):
batches = table.read_snapshot()
t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
- assert t.to_pylist() == [{'_hoodie_commit_time': '20240402144910683',
'ts': 1695046462179,
- 'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00',
'fare': 339.0},
- {'_hoodie_commit_time': '20240402123035233',
'ts': 1695091554788,
- 'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721',
'fare': 27.7},
- {'_hoodie_commit_time': '20240402123035233',
'ts': 1695115999911,
- 'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa',
'fare': 17.85},
- {'_hoodie_commit_time': '20240402123035233',
'ts': 1695159649087,
- 'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330',
'fare': 19.1},
- {'_hoodie_commit_time': '20240402123035233',
'ts': 1695516137016,
- 'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c',
'fare': 34.15}]
+ assert t.to_pylist() == [
+ {
+ "_hoodie_commit_time": "20240402144910683",
+ "ts": 1695046462179,
+ "uuid": "9909a8b1-2d15-4d3d-8ec9-efc48c536a00",
+ "fare": 339.0,
+ },
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695091554788,
+ "uuid": "e96c4396-3fad-413a-a942-4cb36106d721",
+ "fare": 27.7,
+ },
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695115999911,
+ "uuid": "c8abbe79-8d89-47ea-b4ce-4d224bae5bfa",
+ "fare": 17.85,
+ },
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695159649087,
+ "uuid": "334e26e9-8355-45cc-97c6-c31daf0df330",
+ "fare": 19.1,
+ },
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695516137016,
+ "uuid": "e3cf430c-889d-4015-bc98-59bdce1e530c",
+ "fare": 34.15,
+ },
+ ]
- table = HudiTable(table_path, {
- "hoodie.read.as.of.timestamp": "20240402123035233"})
+ table = HudiTable(table_path, {"hoodie.read.as.of.timestamp":
"20240402123035233"})
batches = table.read_snapshot()
t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
- assert t.to_pylist() == [{'_hoodie_commit_time': '20240402123035233',
'ts': 1695046462179,
- 'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00',
'fare': 33.9},
- {'_hoodie_commit_time': '20240402123035233',
'ts': 1695091554788,
- 'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721',
'fare': 27.7},
- {'_hoodie_commit_time': '20240402123035233',
'ts': 1695115999911,
- 'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa',
'fare': 17.85},
- {'_hoodie_commit_time': '20240402123035233',
'ts': 1695159649087,
- 'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330',
'fare': 19.1},
- {'_hoodie_commit_time': '20240402123035233',
'ts': 1695516137016,
- 'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c',
'fare': 34.15}]
+ assert t.to_pylist() == [
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695046462179,
+ "uuid": "9909a8b1-2d15-4d3d-8ec9-efc48c536a00",
+ "fare": 33.9,
+ },
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695091554788,
+ "uuid": "e96c4396-3fad-413a-a942-4cb36106d721",
+ "fare": 27.7,
+ },
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695115999911,
+ "uuid": "c8abbe79-8d89-47ea-b4ce-4d224bae5bfa",
+ "fare": 17.85,
+ },
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695159649087,
+ "uuid": "334e26e9-8355-45cc-97c6-c31daf0df330",
+ "fare": 19.1,
+ },
+ {
+ "_hoodie_commit_time": "20240402123035233",
+ "ts": 1695516137016,
+ "uuid": "e3cf430c-889d-4015-bc98-59bdce1e530c",
+ "fare": 34.15,
+ },
+ ]