This is an automated email from the ASF dual-hosted git repository.
mchades pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 671ae2da8f [#5202] feat(client-python): Support Gravitino Type Serdes
- deserializer (#7241)
671ae2da8f is described below
commit 671ae2da8fa6e1e5fd464f7cccdc0e3a4eb99e56
Author: George T. C. Lai <[email protected]>
AuthorDate: Sun Jun 1 16:24:52 2025 +0800
[#5202] feat(client-python): Support Gravitino Type Serdes - deserializer
(#7241)
### What changes were proposed in this pull request?
This is the second part (totally 4 planned) of implementation to the
following classes from Java to support Column and its default value,
including:
- JsonUtils: `readDataType()` and its related methods to deserialize
various Gravitino Types.
- TypeDeserializer
The `TypeDeserializer` will be used in the incoming `ColumnDTO`
implementation to deserialize `data_type` field.
### Why are the changes needed?
We need to support Column and its default value in python client.
#5202
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests.
---------
Signed-off-by: George T. C. Lai <[email protected]>
---
.../api/types/json_serdes/_helper/serdes_utils.py | 158 +++++++++++++++++++
.../gravitino/api/types/json_serdes/type_serdes.py | 4 +-
.../unittests/json_serdes/test_type_serdes.py | 175 +++++++++++++++++++++
3 files changed, 334 insertions(+), 3 deletions(-)
diff --git
a/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
index 3d6a3d7f29..0bff743414 100644
---
a/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
+++
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
@@ -15,12 +15,16 @@
# specific language governing permissions and limitations
# under the License.
+import json
import re
from types import MappingProxyType
from typing import Any, ClassVar, Dict, Mapping, Pattern, Set, Union, overload
+from dataclasses_json.core import Json
+
from gravitino.api.types.type import Name, Type
from gravitino.api.types.types import Types
+from gravitino.utils.precondition import Precondition
class SerdesUtils:
@@ -209,3 +213,157 @@ class SerdesUtils:
else unparsed_type
),
}
+
+ @classmethod
+ def read_data_type(cls, type_data: Json) -> Type:
+ """Read Gravitino Type from JSON data. Used for Gravitino Type JSON
Deserialization.
+
+ Args:
+ type_data (Json): The serialized data.
+
+ Returns:
+ Type: The Gravitino Type.
+ """
+ if type_data is None or isinstance(type_data, (dict, str)):
+ Precondition.check_argument(
+ type_data is not None and len(type_data) > 0,
+ f"Cannot parse type from invalid JSON: {type_data}",
+ )
+
+ if isinstance(type_data, str):
+ return cls.from_primitive_type_string(type_data.lower())
+
+ if isinstance(type_data, dict) and type_data.get(cls.TYPE):
+ type_str = type_data[cls.TYPE]
+ if cls.STRUCT == type_str:
+ return cls.read_struct_type(type_data)
+ if cls.LIST == type_str:
+ return cls.read_list_type(type_data)
+ if cls.MAP == type_str:
+ return cls.read_map_type(type_data)
+ if cls.UNION == type_str:
+ return cls.read_union_type(type_data)
+ if cls.UNPARSED == type_str:
+ return cls.read_unparsed_type(type_data)
+ if cls.EXTERNAL == type_str:
+ return cls.read_external_type(type_data)
+
+ return Types.UnparsedType.of(unparsed_type=json.dumps(type_data))
+
+ @classmethod
+ def from_primitive_type_string(cls, type_string: str) -> Type:
+ type_instance = cls.TYPES.get(type_string)
+ if type_instance is not None:
+ return type_instance
+
+ decimal_matched = cls.DECIMAL_PATTERN.match(type_string)
+ if decimal_matched:
+ return Types.DecimalType.of(
+ precision=int(decimal_matched.group(1)),
+ scale=int(decimal_matched.group(2)),
+ )
+
+ fixed_matched = cls.FIXED_PATTERN.match(type_string)
+ if fixed_matched:
+ return Types.FixedType.of(length=int(fixed_matched.group(1)))
+
+ fixedchar_matched = cls.FIXEDCHAR_PATTERN.match(type_string)
+ if fixedchar_matched:
+ return
Types.FixedCharType.of(length=int(fixedchar_matched.group(1)))
+
+ varchar_matched = cls.VARCHAR_PATTERN.match(type_string)
+ if varchar_matched:
+ return Types.VarCharType.of(length=int(varchar_matched.group(1)))
+
+ return Types.UnparsedType.of(type_string)
+
+ @classmethod
+ def read_struct_type(cls, struct_data: Dict[str, Any]) -> Types.StructType:
+ fields = struct_data.get(cls.FIELDS)
+ Precondition.check_argument(
+ fields is not None and isinstance(fields, list),
+ f"Cannot parse struct fields from non-array: {fields}",
+ )
+ field_list = [cls.read_struct_field(field) for field in
struct_data[cls.FIELDS]]
+ return Types.StructType.of(*field_list)
+
+ @classmethod
+ def read_struct_field(cls, field_data: Dict[str, Any]) ->
Types.StructType.Field:
+ Precondition.check_argument(
+ field_data is not None and isinstance(field_data, dict),
+ f"Cannot parse struct field from invalid JSON: {field_data}",
+ )
+ Precondition.check_argument(
+ field_data.get(cls.STRUCT_FIELD_NAME) is not None,
+ f"Cannot parse struct field from missing name: {field_data}",
+ )
+ Precondition.check_argument(
+ field_data.get(cls.TYPE) is not None,
+ f"Cannot parse struct field from missing type: {field_data}",
+ )
+
+ name = field_data[cls.STRUCT_FIELD_NAME]
+ field_type = cls.read_data_type(field_data[cls.TYPE])
+ nullable = field_data.get(cls.STRUCT_FIELD_NULLABLE, True)
+ comment = field_data.get(cls.STRUCT_FIELD_COMMENT, "")
+
+ return Types.StructType.Field(
+ name=name, field_type=field_type, nullable=nullable,
comment=comment
+ )
+
+ @classmethod
+ def read_list_type(cls, list_data: Dict[str, Any]) -> Types.ListType:
+ Precondition.check_argument(
+ list_data.get(cls.LIST_ELEMENT_TYPE) is not None,
+ f"Cannot parse list type from missing element type: {list_data}",
+ )
+ element_type = cls.read_data_type(list_data[cls.LIST_ELEMENT_TYPE])
+ nullable = list_data.get(cls.LIST_ELEMENT_NULLABLE, True)
+ return Types.ListType.of(element_type=element_type,
element_nullable=nullable)
+
+ @classmethod
+ def read_map_type(cls, map_data: Dict[str, Any]) -> Types.MapType:
+ Precondition.check_argument(
+ map_data.get(cls.MAP_KEY_TYPE) is not None,
+ f"Cannot parse map type from missing key type: {map_data}",
+ )
+ Precondition.check_argument(
+ map_data.get(cls.MAP_VALUE_TYPE) is not None,
+ f"Cannot parse map type from missing value type: {map_data}",
+ )
+ key_type = cls.read_data_type(map_data[cls.MAP_KEY_TYPE])
+ value_type = cls.read_data_type(map_data[cls.MAP_VALUE_TYPE])
+ nullable = map_data.get(cls.MAP_VALUE_NULLABLE, True)
+ return Types.MapType.of(key_type, value_type, nullable)
+
+ @classmethod
+ def read_union_type(cls, union_data: Dict[str, Any]) -> Types.UnionType:
+ Precondition.check_argument(
+ union_data.get(cls.UNION_TYPES) is not None,
+ f"Cannot parse union type from missing types: {union_data}",
+ )
+ types = union_data.get(cls.UNION_TYPES)
+ Precondition.check_argument(
+ types is not None and isinstance(types, list),
+ f"Cannot parse union types from non-array: {types}",
+ )
+
+ union_types = [cls.read_data_type(type_data) for type_data in types]
+ return Types.UnionType.of(*union_types)
+
+ @classmethod
+ def read_unparsed_type(cls, data: Dict[str, Any]) -> Types.UnparsedType:
+ Precondition.check_argument(
+ data.get(cls.UNPARSED_TYPE) is not None,
+ f"Cannot parse unparsed type from missing unparsed type: {data}",
+ )
+
+ return Types.UnparsedType.of(data[cls.UNPARSED_TYPE])
+
+ @classmethod
+ def read_external_type(cls, external_data: Dict[str, Any]) ->
Types.ExternalType:
+ Precondition.check_argument(
+ external_data.get(cls.CATALOG_STRING) is not None,
+ f"Cannot parse external type from missing catalogString:
{external_data}",
+ )
+ return Types.ExternalType.of(external_data[cls.CATALOG_STRING])
diff --git
a/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
b/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
index ab86cea789..4577987486 100644
--- a/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
+++ b/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
@@ -50,6 +50,4 @@ class TypeSerdes(JsonSerializable[Type]):
Type: The deserialized Gravitino Type.
"""
- # TODO: We shall implement the deserialization logic here.
- # For now, it's just a placeholder.
- pass
+ return SerdesUtils.read_data_type(data)
diff --git
a/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
b/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
index 8a77a889bb..8d71ab9d45 100644
--- a/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
+++ b/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+import random
import unittest
from itertools import combinations, product
@@ -22,6 +23,7 @@ from gravitino.api.types.json_serdes import TypeSerdes
from gravitino.api.types.json_serdes._helper.serdes_utils import SerdesUtils
from gravitino.api.types.type import PrimitiveType
from gravitino.api.types.types import Types
+from gravitino.exceptions.base import IllegalArgumentException
class MockType(PrimitiveType):
@@ -148,3 +150,176 @@ class TestTypeSerdes(unittest.TestCase):
self.assertEqual(
result.get(SerdesUtils.UNPARSED_TYPE), mock_type.simple_string()
)
+
+ def test_deserialize_primitive_and_none_type(self):
+ for simple_string, type_ in self._primitive_and_none_types.items():
+ self.assertEqual(TypeSerdes.deserialize(data=simple_string), type_)
+
+ def test_deserialize_invalid_primitive_and_non_type(self):
+ invalid_data = ["", {}, None]
+ for data in invalid_data:
+ self.assertRaises(
+ IllegalArgumentException,
+ TypeSerdes.deserialize,
+ data=data,
+ )
+
+ def test_deserialize_primitive_and_non_type_unparsed(self):
+ unparsed_data = [
+ int(random.random() * 10),
+ random.random(),
+ "invalid_type",
+ {"invalid_key": "value"},
+ list(range(10)),
+ True,
+ ]
+ for data in unparsed_data:
+ result = TypeSerdes.deserialize(data=data)
+ self.assertIsInstance(result, Types.UnparsedType)
+
+ def test_deserialize_struct_type(self):
+ types = self._primitive_and_none_types.values()
+ fields = [
+ Types.StructType.Field.not_null_field(
+ name=f"field_{field_idx}",
+ field_type=type_,
+ comment=f"comment {field_idx}" if field_idx % 2 == 0 else "",
+ )
+ for type_, field_idx in zip(types, range(len(types)))
+ ]
+
+ struct_type = Types.StructType.of(*fields)
+ serialized_result = TypeSerdes.serialize(struct_type)
+ deserialized_result = TypeSerdes.deserialize(data=serialized_result)
+ self.assertEqual(deserialized_result, struct_type)
+
+ def test_deserialize_struct_type_invalid_fields(self):
+ message_prefix = "Cannot parse struct fields? from"
+ field_data = {"type": SerdesUtils.STRUCT}
+ invalid_data = (
+ {**field_data, **{"fields": "non-array-fields"}},
+ {**field_data, **{"fields": ["invalid_field"]}},
+ {**field_data, **{"fields": [{"invalid_name": "value"}]}},
+ {
+ **field_data,
+ **{"fields": [{"name": "valid_field_name", "invalid_type":
"value"}]},
+ },
+ )
+ messages = (
+ f"{message_prefix} non-array",
+ f"{message_prefix} invalid JSON",
+ f"{message_prefix} missing name",
+ f"{message_prefix} missing type",
+ )
+ for data, message in zip(invalid_data, messages):
+ self.assertRaisesRegex(
+ IllegalArgumentException,
+ message,
+ TypeSerdes.deserialize,
+ data=data,
+ )
+
+ def test_deserialize_list_type(self):
+ types = self._primitive_and_none_types.values()
+ for type_ in types:
+ list_type = Types.ListType.of(element_type=type_,
element_nullable=False)
+ serialized_result = TypeSerdes.serialize(list_type)
+ deserialized_result =
TypeSerdes.deserialize(data=serialized_result)
+ self.assertEqual(
+ list_type.simple_string(), deserialized_result.simple_string()
+ )
+
+ def test_deserialize_list_type_invalid_data(self):
+ list_data = {"type": "list", "invalid_element_type": "value"}
+ self.assertRaisesRegex(
+ IllegalArgumentException,
+ "Cannot parse list type from missing element type",
+ TypeSerdes.deserialize,
+ data=list_data,
+ )
+
+ def test_deserialize_map_type(self):
+ types = self._primitive_and_none_types.values()
+ for key_type, value_type in product(types, types):
+ map_type = Types.MapType.of(
+ key_type=key_type, value_type=value_type, value_nullable=False
+ )
+ serialized_result = TypeSerdes.serialize(map_type)
+ deserialized_result =
TypeSerdes.deserialize(data=serialized_result)
+ self.assertEqual(
+ map_type.simple_string(), deserialized_result.simple_string()
+ )
+
+ def test_deserialize_map_type_invalid_data(self):
+ invalid_map_data = (
+ {"type": "map", "invalid_key_type": "value"},
+ {
+ "type": "map",
+ "keyType": "valid_key",
+ "invalid_value_type": "invalid_value",
+ },
+ )
+ for data in invalid_map_data:
+ self.assertRaisesRegex(
+ IllegalArgumentException,
+ "Cannot parse map type from missing (key|value) type",
+ TypeSerdes.deserialize,
+ data=data,
+ )
+
+ def test_deserialize_union_type(self):
+ types = self._primitive_and_none_types.values()
+ for types in combinations(types, 2):
+ union_type = Types.UnionType.of(*types)
+ serialized_result = TypeSerdes.serialize(union_type)
+ deserialized_result =
TypeSerdes.deserialize(data=serialized_result)
+ self.assertEqual(
+ union_type.simple_string(), deserialized_result.simple_string()
+ )
+
+ def test_deserialize_union_type_invalid_data(self):
+ invalid_union_data = (
+ {"type": "union", "invalid_types": "invalid_types"},
+ {"type": "union", "types": "invalid_types_value"},
+ )
+ for data in invalid_union_data:
+ self.assertRaisesRegex(
+ IllegalArgumentException,
+ "Cannot parse union types? from (?:non-array|missing types)",
+ TypeSerdes.deserialize,
+ data=data,
+ )
+
+ def test_deserialize_unparsed_type(self):
+ unparsed_type = Types.UnparsedType.of(unparsed_type="unparsed_type")
+ serialized_result = TypeSerdes.serialize(unparsed_type)
+ deserialized_result = TypeSerdes.deserialize(data=serialized_result)
+ self.assertEqual(
+ unparsed_type.simple_string(), deserialized_result.simple_string()
+ )
+
+ def test_deserialize_unparsed_type_invalid_data(self):
+ invalid_data = {"type": "unparsed"}
+ self.assertRaisesRegex(
+ IllegalArgumentException,
+ "Cannot parse unparsed type from missing unparsed type",
+ TypeSerdes.deserialize,
+ data=invalid_data,
+ )
+
+ def test_deserialize_external_type(self):
+ external_type = Types.ExternalType.of(catalog_string="catalog_string")
+ serialized_result = TypeSerdes.serialize(external_type)
+ deserialized_result = TypeSerdes.deserialize(data=serialized_result)
+ self.assertEqual(
+ external_type.simple_string(), deserialized_result.simple_string()
+ )
+
+ def test_deserialize_external_type_invalid_data(self):
+ invalid_data = {"type": "external"}
+ self.assertRaisesRegex(
+ IllegalArgumentException,
+ "Cannot parse external type from missing catalogString",
+ TypeSerdes.deserialize,
+ data=invalid_data,
+ )