This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 346ab71565 [#5199] feat(client-python): add single field partitioning
DTOs (#8129)
346ab71565 is described below
commit 346ab715653e5af7b1ceababda6722bee913d499
Author: George T. C. Lai <[email protected]>
AuthorDate: Tue Aug 19 12:00:30 2025 +0800
[#5199] feat(client-python): add single field partitioning DTOs (#8129)
### What changes were proposed in this pull request?
This PR is aimed at implementing the following classes corresponding to
the Java client:
- IdentityPartitioningDTO
- YearPartitioningDTO
- MonthPartitioningDTO
- DayPartitioningDTO
- HourPartitioningDTO
### Why are the changes needed?
We need to support table partitioning, bucketing and sort ordering and
indexes
#5199
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
---------
Signed-off-by: George T. C. Lai <[email protected]>
---
.../dto/rel/partitioning/day_partitioning_dto.py | 36 ++++++++++
.../dto/rel/partitioning/hour_partitioning_dto.py | 36 ++++++++++
.../rel/partitioning/identity_partitioning_dto.py | 36 ++++++++++
.../dto/rel/partitioning/month_partitioning_dto.py | 36 ++++++++++
.../dto/rel/partitioning/year_partitioning_dto.py | 36 ++++++++++
.../dto/rel/test_single_field_partitioning_dto.py | 80 ++++++++++++++++++++++
6 files changed, 260 insertions(+)
diff --git
a/clients/client-python/gravitino/dto/rel/partitioning/day_partitioning_dto.py
b/clients/client-python/gravitino/dto/rel/partitioning/day_partitioning_dto.py
new file mode 100644
index 0000000000..b7e9f34889
--- /dev/null
+++
b/clients/client-python/gravitino/dto/rel/partitioning/day_partitioning_dto.py
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import final
+
+from gravitino.dto.rel.partitioning.partitioning import SingleFieldPartitioning
+
+
+@final
+class DayPartitioningDTO(SingleFieldPartitioning):
+ """Represents a DayPartitioning Data Transfer Object (DTO)."""
+
+ def __init__(self, /, *field_name: str):
+ super().__init__(list(field_name))
+
+ def strategy(self) -> SingleFieldPartitioning.Strategy:
+ """Returns the strategy of the partitioning.
+
+ Returns:
+ Strategy: The strategy of the partitioning.
+ """
+ return SingleFieldPartitioning.Strategy.DAY
diff --git
a/clients/client-python/gravitino/dto/rel/partitioning/hour_partitioning_dto.py
b/clients/client-python/gravitino/dto/rel/partitioning/hour_partitioning_dto.py
new file mode 100644
index 0000000000..88eb73fc40
--- /dev/null
+++
b/clients/client-python/gravitino/dto/rel/partitioning/hour_partitioning_dto.py
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import final
+
+from gravitino.dto.rel.partitioning.partitioning import SingleFieldPartitioning
+
+
+@final
+class HourPartitioningDTO(SingleFieldPartitioning):
+ """Represents a HourPartitioning Data Transfer Object (DTO)."""
+
+ def __init__(self, /, *field_name: str):
+ super().__init__(list(field_name))
+
+ def strategy(self) -> SingleFieldPartitioning.Strategy:
+ """Returns the strategy of the partitioning.
+
+ Returns:
+ Strategy: The strategy of the partitioning.
+ """
+ return SingleFieldPartitioning.Strategy.HOUR
diff --git
a/clients/client-python/gravitino/dto/rel/partitioning/identity_partitioning_dto.py
b/clients/client-python/gravitino/dto/rel/partitioning/identity_partitioning_dto.py
new file mode 100644
index 0000000000..8efbc4bb28
--- /dev/null
+++
b/clients/client-python/gravitino/dto/rel/partitioning/identity_partitioning_dto.py
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import final
+
+from gravitino.dto.rel.partitioning.partitioning import SingleFieldPartitioning
+
+
+@final
+class IdentityPartitioningDTO(SingleFieldPartitioning):
+ """Represents the identity partitioning."""
+
+ def __init__(self, /, *field_name: str):
+ super().__init__(list(field_name))
+
+ def strategy(self) -> SingleFieldPartitioning.Strategy:
+ """Returns the strategy of the partitioning.
+
+ Returns:
+ Strategy: The strategy of the partitioning.
+ """
+ return SingleFieldPartitioning.Strategy.IDENTITY
diff --git
a/clients/client-python/gravitino/dto/rel/partitioning/month_partitioning_dto.py
b/clients/client-python/gravitino/dto/rel/partitioning/month_partitioning_dto.py
new file mode 100644
index 0000000000..ba633281cc
--- /dev/null
+++
b/clients/client-python/gravitino/dto/rel/partitioning/month_partitioning_dto.py
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import final
+
+from gravitino.dto.rel.partitioning.partitioning import SingleFieldPartitioning
+
+
+@final
+class MonthPartitioningDTO(SingleFieldPartitioning):
+ """Represents a MonthPartitioning Data Transfer Object (DTO)."""
+
+ def __init__(self, /, *field_name: str):
+ super().__init__(list(field_name))
+
+ def strategy(self) -> SingleFieldPartitioning.Strategy:
+ """Returns the strategy of the partitioning.
+
+ Returns:
+ Strategy: The strategy of the partitioning.
+ """
+ return SingleFieldPartitioning.Strategy.MONTH
diff --git
a/clients/client-python/gravitino/dto/rel/partitioning/year_partitioning_dto.py
b/clients/client-python/gravitino/dto/rel/partitioning/year_partitioning_dto.py
new file mode 100644
index 0000000000..9415e9a7f0
--- /dev/null
+++
b/clients/client-python/gravitino/dto/rel/partitioning/year_partitioning_dto.py
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import final
+
+from gravitino.dto.rel.partitioning.partitioning import SingleFieldPartitioning
+
+
+@final
+class YearPartitioningDTO(SingleFieldPartitioning):
+ """Represents the YearPartitioning Data Transfer Object (DTO)."""
+
+ def __init__(self, /, *field_name: str):
+ super().__init__(list(field_name))
+
+ def strategy(self) -> SingleFieldPartitioning.Strategy:
+ """Returns the strategy of the partitioning.
+
+ Returns:
+ Strategy: The strategy of the partitioning.
+ """
+ return SingleFieldPartitioning.Strategy.YEAR
diff --git
a/clients/client-python/tests/unittests/dto/rel/test_single_field_partitioning_dto.py
b/clients/client-python/tests/unittests/dto/rel/test_single_field_partitioning_dto.py
new file mode 100644
index 0000000000..1efb860541
--- /dev/null
+++
b/clients/client-python/tests/unittests/dto/rel/test_single_field_partitioning_dto.py
@@ -0,0 +1,80 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+
+from gravitino.api.expressions.named_reference import NamedReference
+from gravitino.api.types.types import Types
+from gravitino.dto.rel.column_dto import ColumnDTO
+from gravitino.dto.rel.partitioning.day_partitioning_dto import
DayPartitioningDTO
+from gravitino.dto.rel.partitioning.hour_partitioning_dto import
HourPartitioningDTO
+from gravitino.dto.rel.partitioning.identity_partitioning_dto import (
+ IdentityPartitioningDTO,
+)
+from gravitino.dto.rel.partitioning.month_partitioning_dto import
MonthPartitioningDTO
+from gravitino.dto.rel.partitioning.partitioning import Partitioning
+from gravitino.dto.rel.partitioning.year_partitioning_dto import
YearPartitioningDTO
+from gravitino.exceptions.base import IllegalArgumentException
+
+
+class TestSingleFieldPartitioningDTO(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls) -> None:
+ cls.field_name = [f"dummy_field_{i}" for i in range(3)]
+ cls.partitioning_dto = {
+ Partitioning.Strategy.IDENTITY: IdentityPartitioningDTO,
+ Partitioning.Strategy.YEAR: YearPartitioningDTO,
+ Partitioning.Strategy.MONTH: MonthPartitioningDTO,
+ Partitioning.Strategy.DAY: DayPartitioningDTO,
+ Partitioning.Strategy.HOUR: HourPartitioningDTO,
+ }
+
+ def test_single_field_partitioning_dto_empty_field_name(self):
+ for partition_dto in self.partitioning_dto.values():
+ self.assertRaisesRegex(
+ IllegalArgumentException,
+ "field_name cannot be null or empty",
+ partition_dto,
+ )
+
+ def test_single_field_partitioning_dto(self):
+ arguments = [NamedReference.field(self.field_name)]
+ column_dtos = [
+ ColumnDTO.builder()
+ .with_name(name=f"dummy_field_{i}")
+ .with_data_type(Types.StringType.get())
+ .build()
+ for i in range(3)
+ ]
+ not_existing_column_dto = (
+ ColumnDTO.builder()
+ .with_name(name="not_exist_field")
+ .with_data_type(Types.StringType.get())
+ .build()
+ )
+ for strategy, dto_class in self.partitioning_dto.items():
+ dto = dto_class(*self.field_name)
+ dto.validate(columns=column_dtos)
+ with self.assertRaisesRegex(
+ IllegalArgumentException,
+ "not found in table",
+ ):
+ dto.validate(columns=[not_existing_column_dto])
+ self.assertEqual(strategy, dto.strategy())
+ self.assertEqual(strategy.value, dto.name())
+ self.assertListEqual(self.field_name, dto.field_name())
+ self.assertListEqual(arguments, dto.arguments())