This is an automated email from the ASF dual-hosted git repository.
liuxun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new e9d8ee7bc [#5203] feat(client-python): porting partitions from java
client (#5964)
e9d8ee7bc is described below
commit e9d8ee7bc05d3226c5f0ce0b492b2c207018ed73
Author: Eric Chang <[email protected]>
AuthorDate: Thu Jan 9 11:32:48 2025 +0800
[#5203] feat(client-python): porting partitions from java client (#5964)
### What changes were proposed in this pull request?
Porting `interface Partitions`, `interface IdentityPartition`,
`interface ListPartition`, `interface RangePartition`, and `class
Partitions` from java to python.
Fix: #5203
### Does this PR introduce _any_ user-facing change?
Yes.
### How was this patch tested?
Unit tests.
---
.../expressions/partitions/identity_partition.py | 54 +++++
.../api/expressions/partitions/list_partition.py | 47 +++++
.../api/expressions/partitions/partition.py | 44 ++++
.../api/expressions/partitions/partitions.py | 231 +++++++++++++++++++++
.../api/expressions/partitions/range_partition.py | 52 +++++
.../tests/unittests/rel/test_partitions.py | 108 ++++++++++
6 files changed, 536 insertions(+)
diff --git
a/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py
b/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py
new file mode 100644
index 000000000..e4b660c09
--- /dev/null
+++
b/clients/client-python/gravitino/api/expressions/partitions/identity_partition.py
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import abstractmethod
+from typing import List, Any
+
+from .partition import Partition
+from ..literals.literal import Literal
+
+
+class IdentityPartition(Partition):
+ """
+ An identity partition represents a result of identity partitioning. For
example, for Hive
+ partition
+
+ ```
+ PARTITION (dt='2008-08-08',country='us')
+ ```
+
+ its partition name is "dt=2008-08-08/country=us", field names are [["dt"],
["country"]] and
+ values are ["2008-08-08", "us"].
+
+ APIs that are still evolving towards becoming stable APIs, and can change
from one feature release to another (0.5.0 to 0.6.0).
+ """
+
+ @abstractmethod
+ def field_names(self) -> List[List[str]]:
+ """
+ Returns:
+ List[List[str]]: A list of lists representing the field names of
the identity partition.
+ """
+ pass
+
+ @abstractmethod
+ def values(self) -> List[Literal[Any]]:
+ """
+ Returns:
+ List[Literal[Any]]: The values of the identity partition.
+ """
+ pass
diff --git
a/clients/client-python/gravitino/api/expressions/partitions/list_partition.py
b/clients/client-python/gravitino/api/expressions/partitions/list_partition.py
new file mode 100644
index 000000000..8316e4daa
--- /dev/null
+++
b/clients/client-python/gravitino/api/expressions/partitions/list_partition.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import abstractmethod
+from typing import List, Any
+
+from gravitino.api.expressions.literals.literal import Literal
+from gravitino.api.expressions.partitions.partition import Partition
+
+
+class ListPartition(Partition):
+ """
+ A list partition represents a result of list partitioning. For example,
for list partition
+
+ ```
+ PARTITION p202204_California VALUES IN (
+ ("2022-04-01", "Los Angeles"),
+ ("2022-04-01", "San Francisco")
+ )
+ ```
+
+ its name is "p202204_California" and lists are [["2022-04-01","Los
Angeles"], ["2022-04-01", "San Francisco"]].
+
+ APIs that are still evolving towards becoming stable APIs, and can change
from one feature release to another (0.5.0 to 0.6.0).
+ """
+
+ @abstractmethod
+ def lists(self) -> List[List[Literal[Any]]]:
+ """
+ Returns:
+ List[List[Literal[Any]]]: The values of the list partition.
+ """
+ pass
diff --git
a/clients/client-python/gravitino/api/expressions/partitions/partition.py
b/clients/client-python/gravitino/api/expressions/partitions/partition.py
new file mode 100644
index 000000000..7f9a0b873
--- /dev/null
+++ b/clients/client-python/gravitino/api/expressions/partitions/partition.py
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import ABC, abstractmethod
+from typing import Dict
+
+
+class Partition(ABC):
+ """
+ A partition represents a result of partitioning a table. The partition can
be either a
+ `IdentityPartition`, `ListPartition`, or `RangePartition`. It depends on
the `Table.partitioning()`.
+
+ APIs that are still evolving towards becoming stable APIs, and can change
from one feature release to another (0.5.0 to 0.6.0).
+ """
+
+ @abstractmethod
+ def name(self) -> str:
+ """
+ Returns:
+ str: The name of the partition.
+ """
+ pass
+
+ @abstractmethod
+ def properties(self) -> Dict[str, str]:
+ """
+ Returns:
+ Dict[str, str]: The properties of the partition, such as
statistics, location, etc.
+ """
+ pass
diff --git
a/clients/client-python/gravitino/api/expressions/partitions/partitions.py
b/clients/client-python/gravitino/api/expressions/partitions/partitions.py
new file mode 100644
index 000000000..6cb4b4a47
--- /dev/null
+++ b/clients/client-python/gravitino/api/expressions/partitions/partitions.py
@@ -0,0 +1,231 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Dict, Any, Optional
+
+from gravitino.api.expressions.literals.literal import Literal
+from gravitino.api.expressions.partitions.identity_partition import
IdentityPartition
+from gravitino.api.expressions.partitions.list_partition import ListPartition
+from gravitino.api.expressions.partitions.partition import Partition
+from gravitino.api.expressions.partitions.range_partition import RangePartition
+
+
+class Partitions:
+ """The helper class for partition expressions."""
+
+ EMPTY_PARTITIONS: List[Partition] = []
+ """
+ An empty array of partitions
+ """
+
+ @staticmethod
+ def range(
+ name: str,
+ upper: Literal[Any],
+ lower: Literal[Any],
+ properties: Optional[Dict[str, str]],
+ ) -> RangePartition:
+ """
+ Creates a range partition.
+
+ Args:
+ name: The name of the partition.
+ upper: The upper bound of the partition.
+ lower: The lower bound of the partition.
+ properties: The properties of the partition.
+
+ Returns:
+ The created partition.
+ """
+ return RangePartitionImpl(name, upper, lower, properties)
+
+ @staticmethod
+ def list(
+ name: str,
+ lists: List[List[Literal[Any]]],
+ properties: Optional[Dict[str, str]],
+ ) -> ListPartition:
+ """
+ Creates a list partition.
+
+ Args:
+ name: The name of the partition.
+ lists: The values of the list partition.
+ properties: The properties of the partition.
+
+ Returns:
+ The created partition.
+ """
+ return ListPartitionImpl(name, lists, properties or {})
+
+ @staticmethod
+ def identity(
+ name: Optional[str],
+ field_names: List[List[str]],
+ values: List[Literal[Any]],
+ properties: Optional[Dict[str, str]] = None,
+ ) -> IdentityPartition:
+ """
+ Creates an identity partition.
+
+ The `values` must correspond to the `field_names`.
+
+ Args:
+ name: The name of the partition.
+ field_names: The field names of the identity partition.
+ values: The value of the identity partition.
+ properties: The properties of the partition.
+
+ Returns:
+ The created partition.
+ """
+ return IdentityPartitionImpl(name, field_names, values, properties or
{})
+
+
+class RangePartitionImpl(RangePartition):
+ """
+ Represents a result of range partitioning.
+ """
+
+ def __init__(
+ self,
+ name: str,
+ upper: Literal[Any],
+ lower: Literal[Any],
+ properties: Optional[Dict[str, str]],
+ ):
+ self._name = name
+ self._upper = upper
+ self._lower = lower
+ self._properties = properties
+
+ def upper(self) -> Literal[Any]:
+ """Returns the upper bound of the partition."""
+ return self._upper
+
+ def lower(self) -> Literal[Any]:
+ """Returns the lower bound of the partition."""
+ return self._lower
+
+ def name(self) -> str:
+ return self._name
+
+ def properties(self) -> Dict[str, str]:
+ return self._properties
+
+ def __eq__(self, other: Any) -> bool:
+ if not isinstance(other, RangePartitionImpl):
+ return False
+ return (
+ self._name == other._name
+ and self._upper == other._upper
+ and self._lower == other._lower
+ and self._properties == other._properties
+ )
+
+ def __hash__(self) -> int:
+ return hash(
+ (self._name, self._upper, self._lower,
frozenset(self._properties.items()))
+ )
+
+
+class ListPartitionImpl(ListPartition):
+ def __init__(
+ self,
+ name: str,
+ lists: List[List[Literal[Any]]],
+ properties: Optional[Dict[str, str]],
+ ):
+ self._name = name
+ self._lists = lists
+ self._properties = properties
+
+ def lists(self) -> List[List[Literal[Any]]]:
+ """Returns the values of the list partition."""
+ return self._lists
+
+ def name(self) -> str:
+ return self._name
+
+ def properties(self) -> Dict[str, str]:
+ return self._properties
+
+ def __eq__(self, other: Any) -> bool:
+ if not isinstance(other, ListPartitionImpl):
+ return False
+ return (
+ self._name == other._name
+ and self._lists == other._lists
+ and self._properties == other._properties
+ )
+
+ def __hash__(self) -> int:
+ return hash(
+ (
+ self._name,
+ tuple(tuple(l) for l in self._lists),
+ frozenset(self._properties.items()),
+ )
+ )
+
+
+class IdentityPartitionImpl(IdentityPartition):
+ def __init__(
+ self,
+ name: str,
+ field_names: List[List[str]],
+ values: List[Literal[Any]],
+ properties: Dict[str, str],
+ ):
+ self._name = name
+ self._field_names = field_names
+ self._values = values
+ self._properties = properties
+
+ def field_names(self) -> List[List[str]]:
+ """Returns the field names of the identity partition."""
+ return self._field_names
+
+ def values(self) -> List[Literal[Any]]:
+ """Returns the values of the identity partition."""
+ return self._values
+
+ def name(self) -> str:
+ return self._name
+
+ def properties(self) -> Dict[str, str]:
+ return self._properties
+
+ def __eq__(self, other: Any) -> bool:
+ if not isinstance(other, IdentityPartitionImpl):
+ return False
+ return (
+ self._name == other._name
+ and self._field_names == other._field_names
+ and self._values == other._values
+ and self._properties == other._properties
+ )
+
+ def __hash__(self) -> int:
+ return hash(
+ (
+ self._name,
+ tuple(tuple(fn) for fn in self._field_names),
+ tuple(self._values),
+ frozenset(self._properties.items()),
+ )
+ )
diff --git
a/clients/client-python/gravitino/api/expressions/partitions/range_partition.py
b/clients/client-python/gravitino/api/expressions/partitions/range_partition.py
new file mode 100644
index 000000000..7155c033c
--- /dev/null
+++
b/clients/client-python/gravitino/api/expressions/partitions/range_partition.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import abstractmethod
+from typing import Any
+
+from gravitino.api.expressions.literals.literal import Literal
+from gravitino.api.expressions.partitions.partition import Partition
+
+
+class RangePartition(Partition):
+ """
+ A range partition represents a result of range partitioning. For example,
for range partition
+
+ ```
+ PARTITION p20200321 VALUES LESS THAN ("2020-03-22")
+ ```
+
+ its upper bound is "2020-03-22" and its lower bound is null.
+
+ APIs that are still evolving towards becoming stable APIs, and can change
from one feature release to another (0.5.0 to 0.6.0).
+ """
+
+ @abstractmethod
+ def upper(self) -> Literal[Any]:
+ """
+ Returns:
+ Literal[Any]: The upper bound of the partition.
+ """
+ pass
+
+ @abstractmethod
+ def lower(self) -> Literal[Any]:
+ """
+ Returns:
+ Literal[Any]: The lower bound of the partition.
+ """
+ pass
diff --git a/clients/client-python/tests/unittests/rel/test_partitions.py
b/clients/client-python/tests/unittests/rel/test_partitions.py
new file mode 100644
index 000000000..a14eb079d
--- /dev/null
+++ b/clients/client-python/tests/unittests/rel/test_partitions.py
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import unittest
+from datetime import date
+
+from gravitino.api.expressions.literals.literals import Literals
+from gravitino.api.expressions.partitions.partitions import Partitions
+
+
+class TestPartitions(unittest.TestCase):
+ def test_partitions(self):
+ # Test RangePartition
+ partition = Partitions.range(
+ "p0", Literals.NULL, Literals.integer_literal(6), {}
+ )
+ self.assertEqual("p0", partition.name())
+ self.assertEqual({}, partition.properties())
+ self.assertEqual(Literals.NULL, partition.upper())
+ self.assertEqual(Literals.integer_literal(6), partition.lower())
+
+ # Test ListPartition
+ partition = Partitions.list(
+ "p202204_California",
+ [
+ [
+ Literals.date_literal(date(2022, 4, 1)),
+ Literals.string_literal("Los Angeles"),
+ ],
+ [
+ Literals.date_literal(date(2022, 4, 1)),
+ Literals.string_literal("San Francisco"),
+ ],
+ ],
+ {},
+ )
+ self.assertEqual("p202204_California", partition.name())
+ self.assertEqual({}, partition.properties())
+ self.assertEqual(
+ Literals.date_literal(date(2022, 4, 1)), partition.lists()[0][0]
+ )
+ self.assertEqual(
+ Literals.string_literal("Los Angeles"), partition.lists()[0][1]
+ )
+ self.assertEqual(
+ Literals.date_literal(date(2022, 4, 1)), partition.lists()[1][0]
+ )
+ self.assertEqual(
+ Literals.string_literal("San Francisco"), partition.lists()[1][1]
+ )
+
+ # Test IdentityPartition
+ partition = Partitions.identity(
+ "dt=2008-08-08/country=us",
+ [["dt"], ["country"]],
+ [Literals.date_literal(date(2008, 8, 8)),
Literals.string_literal("us")],
+ {"location": "/user/hive/warehouse/tpch_flat_orc_2.db/orders"},
+ )
+ self.assertEqual("dt=2008-08-08/country=us", partition.name())
+ self.assertEqual(
+ {"location": "/user/hive/warehouse/tpch_flat_orc_2.db/orders"},
+ partition.properties(),
+ )
+ self.assertEqual(["dt"], partition.field_names()[0])
+ self.assertEqual(["country"], partition.field_names()[1])
+ self.assertEqual(Literals.date_literal(date(2008, 8, 8)),
partition.values()[0])
+ self.assertEqual(Literals.string_literal("us"), partition.values()[1])
+
+ def test_eq(self):
+ """
+ Test the correctness of the __eq__ method.
+ """
+ partition1 = Partitions.range(
+ "p1", Literals.NULL, Literals.integer_literal(6), {}
+ )
+ partition2 = Partitions.range(
+ "p1", Literals.NULL, Literals.integer_literal(6), {}
+ )
+ partition3 = Partitions.range(
+ "p2", Literals.NULL, Literals.integer_literal(10), {}
+ )
+
+ # Test same objects are equal
+ self.assertEqual(partition1, partition2) # Should be equal
+ self.assertNotEqual(partition1, partition3) # Should not be equal
+
+ # Test different objects are not equal
+ partition4 = Partitions.range(
+ "p1", Literals.NULL, Literals.integer_literal(10), {}
+ )
+ self.assertNotEqual(partition1, partition4)
+
+ # Test comparison with different types
+ self.assertNotEqual(partition1, "not_a_partition") # Different type
+ self.assertNotEqual(partition1, None) # NoneType