This is an automated email from the ASF dual-hosted git repository.

mchades pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new 671ae2da8f [#5202] feat(client-python): Support Gravitino Type Serdes 
- deserializer (#7241)
671ae2da8f is described below

commit 671ae2da8fa6e1e5fd464f7cccdc0e3a4eb99e56
Author: George T. C. Lai <[email protected]>
AuthorDate: Sun Jun 1 16:24:52 2025 +0800

    [#5202] feat(client-python): Support Gravitino Type Serdes - deserializer 
(#7241)
    
    ### What changes were proposed in this pull request?
    
    This is the second part (totally 4 planned) of implementation to the
    following classes from Java to support Column and its default value,
    including:
    
    - JsonUtils: `readDataType()` and its related methods to deserialize
    various Gravitino Types.
    - TypeDeserializer
    
    The `TypeDeserializer` will be used in the incoming `ColumnDTO`
    implementation to deserialize `data_type` field.
    
    ### Why are the changes needed?
    
    We need to support Column and its default value in python client.
    
    #5202
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Unit tests.
    
    ---------
    
    Signed-off-by: George T. C. Lai <[email protected]>
---
 .../api/types/json_serdes/_helper/serdes_utils.py  | 158 +++++++++++++++++++
 .../gravitino/api/types/json_serdes/type_serdes.py |   4 +-
 .../unittests/json_serdes/test_type_serdes.py      | 175 +++++++++++++++++++++
 3 files changed, 334 insertions(+), 3 deletions(-)

diff --git 
a/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py 
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
index 3d6a3d7f29..0bff743414 100644
--- 
a/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
+++ 
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
@@ -15,12 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import json
 import re
 from types import MappingProxyType
 from typing import Any, ClassVar, Dict, Mapping, Pattern, Set, Union, overload
 
+from dataclasses_json.core import Json
+
 from gravitino.api.types.type import Name, Type
 from gravitino.api.types.types import Types
+from gravitino.utils.precondition import Precondition
 
 
 class SerdesUtils:
@@ -209,3 +213,157 @@ class SerdesUtils:
                 else unparsed_type
             ),
         }
+
+    @classmethod
+    def read_data_type(cls, type_data: Json) -> Type:
+        """Read Gravitino Type from JSON data. Used for Gravitino Type JSON 
Deserialization.
+
+        Args:
+            type_data (Json): The serialized data.
+
+        Returns:
+            Type: The Gravitino Type.
+        """
+        if type_data is None or isinstance(type_data, (dict, str)):
+            Precondition.check_argument(
+                type_data is not None and len(type_data) > 0,
+                f"Cannot parse type from invalid JSON: {type_data}",
+            )
+
+        if isinstance(type_data, str):
+            return cls.from_primitive_type_string(type_data.lower())
+
+        if isinstance(type_data, dict) and type_data.get(cls.TYPE):
+            type_str = type_data[cls.TYPE]
+            if cls.STRUCT == type_str:
+                return cls.read_struct_type(type_data)
+            if cls.LIST == type_str:
+                return cls.read_list_type(type_data)
+            if cls.MAP == type_str:
+                return cls.read_map_type(type_data)
+            if cls.UNION == type_str:
+                return cls.read_union_type(type_data)
+            if cls.UNPARSED == type_str:
+                return cls.read_unparsed_type(type_data)
+            if cls.EXTERNAL == type_str:
+                return cls.read_external_type(type_data)
+
+        return Types.UnparsedType.of(unparsed_type=json.dumps(type_data))
+
+    @classmethod
+    def from_primitive_type_string(cls, type_string: str) -> Type:
+        type_instance = cls.TYPES.get(type_string)
+        if type_instance is not None:
+            return type_instance
+
+        decimal_matched = cls.DECIMAL_PATTERN.match(type_string)
+        if decimal_matched:
+            return Types.DecimalType.of(
+                precision=int(decimal_matched.group(1)),
+                scale=int(decimal_matched.group(2)),
+            )
+
+        fixed_matched = cls.FIXED_PATTERN.match(type_string)
+        if fixed_matched:
+            return Types.FixedType.of(length=int(fixed_matched.group(1)))
+
+        fixedchar_matched = cls.FIXEDCHAR_PATTERN.match(type_string)
+        if fixedchar_matched:
+            return 
Types.FixedCharType.of(length=int(fixedchar_matched.group(1)))
+
+        varchar_matched = cls.VARCHAR_PATTERN.match(type_string)
+        if varchar_matched:
+            return Types.VarCharType.of(length=int(varchar_matched.group(1)))
+
+        return Types.UnparsedType.of(type_string)
+
+    @classmethod
+    def read_struct_type(cls, struct_data: Dict[str, Any]) -> Types.StructType:
+        fields = struct_data.get(cls.FIELDS)
+        Precondition.check_argument(
+            fields is not None and isinstance(fields, list),
+            f"Cannot parse struct fields from non-array: {fields}",
+        )
+        field_list = [cls.read_struct_field(field) for field in 
struct_data[cls.FIELDS]]
+        return Types.StructType.of(*field_list)
+
+    @classmethod
+    def read_struct_field(cls, field_data: Dict[str, Any]) -> 
Types.StructType.Field:
+        Precondition.check_argument(
+            field_data is not None and isinstance(field_data, dict),
+            f"Cannot parse struct field from invalid JSON: {field_data}",
+        )
+        Precondition.check_argument(
+            field_data.get(cls.STRUCT_FIELD_NAME) is not None,
+            f"Cannot parse struct field from missing name: {field_data}",
+        )
+        Precondition.check_argument(
+            field_data.get(cls.TYPE) is not None,
+            f"Cannot parse struct field from missing type: {field_data}",
+        )
+
+        name = field_data[cls.STRUCT_FIELD_NAME]
+        field_type = cls.read_data_type(field_data[cls.TYPE])
+        nullable = field_data.get(cls.STRUCT_FIELD_NULLABLE, True)
+        comment = field_data.get(cls.STRUCT_FIELD_COMMENT, "")
+
+        return Types.StructType.Field(
+            name=name, field_type=field_type, nullable=nullable, 
comment=comment
+        )
+
+    @classmethod
+    def read_list_type(cls, list_data: Dict[str, Any]) -> Types.ListType:
+        Precondition.check_argument(
+            list_data.get(cls.LIST_ELEMENT_TYPE) is not None,
+            f"Cannot parse list type from missing element type: {list_data}",
+        )
+        element_type = cls.read_data_type(list_data[cls.LIST_ELEMENT_TYPE])
+        nullable = list_data.get(cls.LIST_ELEMENT_NULLABLE, True)
+        return Types.ListType.of(element_type=element_type, 
element_nullable=nullable)
+
+    @classmethod
+    def read_map_type(cls, map_data: Dict[str, Any]) -> Types.MapType:
+        Precondition.check_argument(
+            map_data.get(cls.MAP_KEY_TYPE) is not None,
+            f"Cannot parse map type from missing key type: {map_data}",
+        )
+        Precondition.check_argument(
+            map_data.get(cls.MAP_VALUE_TYPE) is not None,
+            f"Cannot parse map type from missing value type: {map_data}",
+        )
+        key_type = cls.read_data_type(map_data[cls.MAP_KEY_TYPE])
+        value_type = cls.read_data_type(map_data[cls.MAP_VALUE_TYPE])
+        nullable = map_data.get(cls.MAP_VALUE_NULLABLE, True)
+        return Types.MapType.of(key_type, value_type, nullable)
+
+    @classmethod
+    def read_union_type(cls, union_data: Dict[str, Any]) -> Types.UnionType:
+        Precondition.check_argument(
+            union_data.get(cls.UNION_TYPES) is not None,
+            f"Cannot parse union type from missing types: {union_data}",
+        )
+        types = union_data.get(cls.UNION_TYPES)
+        Precondition.check_argument(
+            types is not None and isinstance(types, list),
+            f"Cannot parse union types from non-array: {types}",
+        )
+
+        union_types = [cls.read_data_type(type_data) for type_data in types]
+        return Types.UnionType.of(*union_types)
+
+    @classmethod
+    def read_unparsed_type(cls, data: Dict[str, Any]) -> Types.UnparsedType:
+        Precondition.check_argument(
+            data.get(cls.UNPARSED_TYPE) is not None,
+            f"Cannot parse unparsed type from missing unparsed type: {data}",
+        )
+
+        return Types.UnparsedType.of(data[cls.UNPARSED_TYPE])
+
+    @classmethod
+    def read_external_type(cls, external_data: Dict[str, Any]) -> 
Types.ExternalType:
+        Precondition.check_argument(
+            external_data.get(cls.CATALOG_STRING) is not None,
+            f"Cannot parse external type from missing catalogString: 
{external_data}",
+        )
+        return Types.ExternalType.of(external_data[cls.CATALOG_STRING])
diff --git 
a/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py 
b/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
index ab86cea789..4577987486 100644
--- a/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
+++ b/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
@@ -50,6 +50,4 @@ class TypeSerdes(JsonSerializable[Type]):
             Type: The deserialized Gravitino Type.
         """
 
-        # TODO: We shall implement the deserialization logic here.
-        # For now, it's just a placeholder.
-        pass
+        return SerdesUtils.read_data_type(data)
diff --git 
a/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py 
b/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
index 8a77a889bb..8d71ab9d45 100644
--- a/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
+++ b/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import random
 import unittest
 from itertools import combinations, product
 
@@ -22,6 +23,7 @@ from gravitino.api.types.json_serdes import TypeSerdes
 from gravitino.api.types.json_serdes._helper.serdes_utils import SerdesUtils
 from gravitino.api.types.type import PrimitiveType
 from gravitino.api.types.types import Types
+from gravitino.exceptions.base import IllegalArgumentException
 
 
 class MockType(PrimitiveType):
@@ -148,3 +150,176 @@ class TestTypeSerdes(unittest.TestCase):
         self.assertEqual(
             result.get(SerdesUtils.UNPARSED_TYPE), mock_type.simple_string()
         )
+
+    def test_deserialize_primitive_and_none_type(self):
+        for simple_string, type_ in self._primitive_and_none_types.items():
+            self.assertEqual(TypeSerdes.deserialize(data=simple_string), type_)
+
+    def test_deserialize_invalid_primitive_and_non_type(self):
+        invalid_data = ["", {}, None]
+        for data in invalid_data:
+            self.assertRaises(
+                IllegalArgumentException,
+                TypeSerdes.deserialize,
+                data=data,
+            )
+
+    def test_deserialize_primitive_and_non_type_unparsed(self):
+        unparsed_data = [
+            int(random.random() * 10),
+            random.random(),
+            "invalid_type",
+            {"invalid_key": "value"},
+            list(range(10)),
+            True,
+        ]
+        for data in unparsed_data:
+            result = TypeSerdes.deserialize(data=data)
+            self.assertIsInstance(result, Types.UnparsedType)
+
+    def test_deserialize_struct_type(self):
+        types = self._primitive_and_none_types.values()
+        fields = [
+            Types.StructType.Field.not_null_field(
+                name=f"field_{field_idx}",
+                field_type=type_,
+                comment=f"comment {field_idx}" if field_idx % 2 == 0 else "",
+            )
+            for type_, field_idx in zip(types, range(len(types)))
+        ]
+
+        struct_type = Types.StructType.of(*fields)
+        serialized_result = TypeSerdes.serialize(struct_type)
+        deserialized_result = TypeSerdes.deserialize(data=serialized_result)
+        self.assertEqual(deserialized_result, struct_type)
+
+    def test_deserialize_struct_type_invalid_fields(self):
+        message_prefix = "Cannot parse struct fields? from"
+        field_data = {"type": SerdesUtils.STRUCT}
+        invalid_data = (
+            {**field_data, **{"fields": "non-array-fields"}},
+            {**field_data, **{"fields": ["invalid_field"]}},
+            {**field_data, **{"fields": [{"invalid_name": "value"}]}},
+            {
+                **field_data,
+                **{"fields": [{"name": "valid_field_name", "invalid_type": 
"value"}]},
+            },
+        )
+        messages = (
+            f"{message_prefix} non-array",
+            f"{message_prefix} invalid JSON",
+            f"{message_prefix} missing name",
+            f"{message_prefix} missing type",
+        )
+        for data, message in zip(invalid_data, messages):
+            self.assertRaisesRegex(
+                IllegalArgumentException,
+                message,
+                TypeSerdes.deserialize,
+                data=data,
+            )
+
+    def test_deserialize_list_type(self):
+        types = self._primitive_and_none_types.values()
+        for type_ in types:
+            list_type = Types.ListType.of(element_type=type_, 
element_nullable=False)
+            serialized_result = TypeSerdes.serialize(list_type)
+            deserialized_result = 
TypeSerdes.deserialize(data=serialized_result)
+            self.assertEqual(
+                list_type.simple_string(), deserialized_result.simple_string()
+            )
+
+    def test_deserialize_list_type_invalid_data(self):
+        list_data = {"type": "list", "invalid_element_type": "value"}
+        self.assertRaisesRegex(
+            IllegalArgumentException,
+            "Cannot parse list type from missing element type",
+            TypeSerdes.deserialize,
+            data=list_data,
+        )
+
+    def test_deserialize_map_type(self):
+        types = self._primitive_and_none_types.values()
+        for key_type, value_type in product(types, types):
+            map_type = Types.MapType.of(
+                key_type=key_type, value_type=value_type, value_nullable=False
+            )
+            serialized_result = TypeSerdes.serialize(map_type)
+            deserialized_result = 
TypeSerdes.deserialize(data=serialized_result)
+            self.assertEqual(
+                map_type.simple_string(), deserialized_result.simple_string()
+            )
+
+    def test_deserialize_map_type_invalid_data(self):
+        invalid_map_data = (
+            {"type": "map", "invalid_key_type": "value"},
+            {
+                "type": "map",
+                "keyType": "valid_key",
+                "invalid_value_type": "invalid_value",
+            },
+        )
+        for data in invalid_map_data:
+            self.assertRaisesRegex(
+                IllegalArgumentException,
+                "Cannot parse map type from missing (key|value) type",
+                TypeSerdes.deserialize,
+                data=data,
+            )
+
+    def test_deserialize_union_type(self):
+        types = self._primitive_and_none_types.values()
+        for types in combinations(types, 2):
+            union_type = Types.UnionType.of(*types)
+            serialized_result = TypeSerdes.serialize(union_type)
+            deserialized_result = 
TypeSerdes.deserialize(data=serialized_result)
+            self.assertEqual(
+                union_type.simple_string(), deserialized_result.simple_string()
+            )
+
+    def test_deserialize_union_type_invalid_data(self):
+        invalid_union_data = (
+            {"type": "union", "invalid_types": "invalid_types"},
+            {"type": "union", "types": "invalid_types_value"},
+        )
+        for data in invalid_union_data:
+            self.assertRaisesRegex(
+                IllegalArgumentException,
+                "Cannot parse union types? from (?:non-array|missing types)",
+                TypeSerdes.deserialize,
+                data=data,
+            )
+
+    def test_deserialize_unparsed_type(self):
+        unparsed_type = Types.UnparsedType.of(unparsed_type="unparsed_type")
+        serialized_result = TypeSerdes.serialize(unparsed_type)
+        deserialized_result = TypeSerdes.deserialize(data=serialized_result)
+        self.assertEqual(
+            unparsed_type.simple_string(), deserialized_result.simple_string()
+        )
+
+    def test_deserialize_unparsed_type_invalid_data(self):
+        invalid_data = {"type": "unparsed"}
+        self.assertRaisesRegex(
+            IllegalArgumentException,
+            "Cannot parse unparsed type from missing unparsed type",
+            TypeSerdes.deserialize,
+            data=invalid_data,
+        )
+
+    def test_deserialize_external_type(self):
+        external_type = Types.ExternalType.of(catalog_string="catalog_string")
+        serialized_result = TypeSerdes.serialize(external_type)
+        deserialized_result = TypeSerdes.deserialize(data=serialized_result)
+        self.assertEqual(
+            external_type.simple_string(), deserialized_result.simple_string()
+        )
+
+    def test_deserialize_external_type_invalid_data(self):
+        invalid_data = {"type": "external"}
+        self.assertRaisesRegex(
+            IllegalArgumentException,
+            "Cannot parse external type from missing catalogString",
+            TypeSerdes.deserialize,
+            data=invalid_data,
+        )

Reply via email to