This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new cd7875380a [#5202] feat(client-python): Support Gravitino Type Serdes 
- serializer (#6903)
cd7875380a is described below

commit cd7875380af77911132631d563ada7041f6766fb
Author: George T. C. Lai <[email protected]>
AuthorDate: Fri May 23 11:09:37 2025 +0800

    [#5202] feat(client-python): Support Gravitino Type Serdes - serializer 
(#6903)
    
    ### What changes were proposed in this pull request?
    
    This is the second part (totally 4 planned) of implementation to the
    following classes from Java to support Column and its default value,
    including:
    
    - JsonUtils
    - TypeSerializer
    
    The `TypeSerializer` will be used in the incoming `ColumnDTO`
    implementation to serialize `data_type` field.
    
    ### Why are the changes needed?
    
    We need to support Column and its default value in python client.
    
    #5202
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Unit tests
    
    ---------
    
    Signed-off-by: George T. C. Lai <[email protected]>
---
 .../gravitino/api/types/json_serdes/__init__.py    |  21 ++
 .../api/types/json_serdes/_helper/serdes_utils.py  | 211 +++++++++++++++++++++
 .../gravitino/api/types/json_serdes/base.py        |  55 ++++++
 .../gravitino/api/types/json_serdes/type_serdes.py |  55 ++++++
 .../unittests/json_serdes/test_type_serdes.py      | 150 +++++++++++++++
 5 files changed, 492 insertions(+)

diff --git a/clients/client-python/gravitino/api/types/json_serdes/__init__.py 
b/clients/client-python/gravitino/api/types/json_serdes/__init__.py
new file mode 100644
index 0000000000..fb9e11264c
--- /dev/null
+++ b/clients/client-python/gravitino/api/types/json_serdes/__init__.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from gravitino.api.types.json_serdes.base import JsonSerializable
+from gravitino.api.types.json_serdes.type_serdes import TypeSerdes
+
+__all__ = ["JsonSerializable", "TypeSerdes"]
diff --git 
a/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py 
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
new file mode 100644
index 0000000000..3d6a3d7f29
--- /dev/null
+++ 
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
@@ -0,0 +1,211 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+from types import MappingProxyType
+from typing import Any, ClassVar, Dict, Mapping, Pattern, Set, Union, overload
+
+from gravitino.api.types.type import Name, Type
+from gravitino.api.types.types import Types
+
+
+class SerdesUtils:
+    EXPRESSION_TYPE: ClassVar[str] = "type"
+    DATA_TYPE: ClassVar[str] = "dataType"
+    LITERAL_VALUE: ClassVar[str] = "value"
+    FIELD_NAME: ClassVar[str] = "fieldName"
+    FUNCTION_NAME: ClassVar[str] = "funcName"
+    FUNCTION_ARGS: ClassVar[str] = "funcArgs"
+    UNPARSED_EXPRESSION: ClassVar[str] = "unparsedExpression"
+    TYPE: ClassVar[str] = "type"
+    STRUCT: ClassVar[str] = "struct"
+    FIELDS: ClassVar[str] = "fields"
+    STRUCT_FIELD_NAME: ClassVar[str] = "name"
+    STRUCT_FIELD_NULLABLE: ClassVar[str] = "nullable"
+    STRUCT_FIELD_COMMENT: ClassVar[str] = "comment"
+    LIST: ClassVar[str] = "list"
+    LIST_ELEMENT_TYPE: ClassVar[str] = "elementType"
+    LIST_ELEMENT_NULLABLE: ClassVar[str] = "containsNull"
+    MAP: ClassVar[str] = "map"
+    MAP_KEY_TYPE: ClassVar[str] = "keyType"
+    MAP_VALUE_TYPE: ClassVar[str] = "valueType"
+    MAP_VALUE_NULLABLE: ClassVar[str] = "valueContainsNull"
+    UNION: ClassVar[str] = "union"
+    UNION_TYPES: ClassVar[str] = "types"
+    UNPARSED: ClassVar[str] = "unparsed"
+    UNPARSED_TYPE: ClassVar[str] = "unparsedType"
+    EXTERNAL: ClassVar[str] = "external"
+    CATALOG_STRING: ClassVar[str] = "catalogString"
+
+    NON_PRIMITIVE_TYPES: ClassVar[Set[Name]] = {
+        Name.STRUCT,
+        Name.LIST,
+        Name.MAP,
+        Name.UNION,
+        Name.UNPARSED,
+        Name.EXTERNAL,
+    }
+    PRIMITIVE_AND_NULL_TYPES: ClassVar[Set[Name]] = (
+        set(list(Name)) - NON_PRIMITIVE_TYPES
+    )
+
+    DECIMAL_PATTERN: ClassVar[Pattern[str]] = re.compile(
+        r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)"
+    )
+    FIXED_PATTERN: ClassVar[Pattern[str]] = re.compile(r"fixed\(\s*(\d+)\s*\)")
+    FIXEDCHAR_PATTERN: ClassVar[Pattern[str]] = 
re.compile(r"char\(\s*(\d+)\s*\)")
+    VARCHAR_PATTERN: ClassVar[Pattern[str]] = 
re.compile(r"varchar\(\s*(\d+)\s*\)")
+    TYPES: ClassVar[Mapping] = MappingProxyType(
+        {
+            type_instance.simple_string(): type_instance
+            for type_instance in {
+                Types.NullType.get(),
+                Types.BooleanType.get(),
+                Types.ByteType.get(),
+                Types.ByteType.unsigned(),
+                Types.IntegerType.get(),
+                Types.IntegerType.unsigned(),
+                Types.ShortType.get(),
+                Types.ShortType.unsigned(),
+                Types.LongType.get(),
+                Types.LongType.unsigned(),
+                Types.FloatType.get(),
+                Types.DoubleType.get(),
+                Types.DateType.get(),
+                Types.TimeType.get(),
+                Types.TimestampType.with_time_zone(),
+                Types.TimestampType.without_time_zone(),
+                Types.IntervalYearType.get(),
+                Types.IntervalDayType.get(),
+                Types.StringType.get(),
+                Types.UUIDType.get(),
+            }
+        }
+    )
+
+    @classmethod
+    def write_data_type(cls, data_type: Type) -> Union[str, Dict[str, Any]]:
+        """Write Gravitino Type to JSON data. Used for Gravitino Type JSON 
Serialization.
+
+        Args:
+            data_type (Any): The Gravitino Type.
+
+        Returns:
+            Union[str, Dict[str, Any]]: The serialized data.
+        """
+
+        type_name = data_type.name()
+        if type_name in cls.PRIMITIVE_AND_NULL_TYPES:
+            return data_type.simple_string()
+
+        if type_name is Name.STRUCT:
+            return cls.write_struct_type(data_type)
+
+        if type_name is Name.LIST:
+            return cls.write_list_type(data_type)
+
+        if type_name is Name.MAP:
+            return cls.write_map_type(data_type)
+
+        if type_name is Name.UNION:
+            return cls.write_union_type(data_type)
+
+        if type_name is Name.EXTERNAL:
+            return cls.write_external_type(data_type)
+
+        if type_name is Name.UNPARSED:
+            return cls.write_unparsed_type(data_type)
+
+        return cls.write_unparsed_type(data_type.simple_string())
+
+    @classmethod
+    def write_struct_type(cls, struct_type: Types.StructType) -> Dict[str, 
Any]:
+        struct_data = {
+            cls.TYPE: cls.STRUCT,
+            cls.FIELDS: [
+                cls.write_struct_field(field) for field in struct_type.fields()
+            ],
+        }
+        return struct_data
+
+    @classmethod
+    def write_struct_field(cls, struct_field: Types.StructType.Field) -> 
Dict[str, Any]:
+        struct_field_data = {
+            cls.STRUCT_FIELD_NAME: struct_field.name(),
+            cls.TYPE: cls.write_data_type(struct_field.type()),
+            cls.STRUCT_FIELD_NULLABLE: struct_field.nullable(),
+        }
+        comment = struct_field.comment()
+        if comment is not None:
+            struct_field_data.update({cls.STRUCT_FIELD_COMMENT: comment})
+        return struct_field_data
+
+    @classmethod
+    def write_list_type(cls, list_type: Types.ListType) -> Dict[str, Any]:
+        list_data = {
+            cls.TYPE: cls.LIST,
+            cls.LIST_ELEMENT_TYPE: 
cls.write_data_type(list_type.element_type()),
+            cls.LIST_ELEMENT_NULLABLE: list_type.element_nullable(),
+        }
+        return list_data
+
+    @classmethod
+    def write_map_type(cls, map_type: Types.MapType) -> Dict[str, Any]:
+        map_data = {
+            cls.TYPE: cls.MAP,
+            cls.MAP_VALUE_NULLABLE: map_type.is_value_nullable(),
+            cls.MAP_KEY_TYPE: 
cls.write_data_type(data_type=map_type.key_type()),
+            cls.MAP_VALUE_TYPE: 
cls.write_data_type(data_type=map_type.value_type()),
+        }
+        return map_data
+
+    @classmethod
+    def write_union_type(cls, union_type: Types.UnionType) -> Dict[str, Any]:
+        union_data = {
+            cls.TYPE: cls.UNION,
+            cls.UNION_TYPES: [
+                cls.write_data_type(data_type=type_) for type_ in 
union_type.types()
+            ],
+        }
+        return union_data
+
+    @classmethod
+    def write_external_type(cls, external_type: Types.ExternalType) -> 
Dict[str, str]:
+        external_data = {
+            cls.TYPE: cls.EXTERNAL,
+            cls.CATALOG_STRING: external_type.catalog_string(),
+        }
+        return external_data
+
+    @classmethod
+    @overload
+    def write_unparsed_type(cls, unparsed_type: str) -> Dict[str, str]: ...
+
+    @classmethod
+    @overload
+    def write_unparsed_type(cls, unparsed_type: Type) -> Dict[str, str]: ...
+
+    @classmethod
+    def write_unparsed_type(cls, unparsed_type) -> Dict[str, str]:
+        return {
+            cls.TYPE: cls.UNPARSED,
+            cls.UNPARSED_TYPE: (
+                unparsed_type.unparsed_type()
+                if isinstance(unparsed_type, Types.UnparsedType)
+                else unparsed_type
+            ),
+        }
diff --git a/clients/client-python/gravitino/api/types/json_serdes/base.py 
b/clients/client-python/gravitino/api/types/json_serdes/base.py
new file mode 100644
index 0000000000..27c838478b
--- /dev/null
+++ b/clients/client-python/gravitino/api/types/json_serdes/base.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar
+
+from dataclasses_json.core import Json
+
+from gravitino.api.types.types import Type
+
+GravitinoTypeT = TypeVar("GravitinoTypeT", bound=Type)
+
+
+class JsonSerializable(ABC, Generic[GravitinoTypeT]):
+    """Customized generic Serializer for DataClassJson."""
+
+    @classmethod
+    @abstractmethod
+    def serialize(cls, data_type: GravitinoTypeT) -> Json:
+        """To serialize the given `data`.
+
+        Args:
+            data (GravitinoTypeT): The data to be serialized.
+
+        Returns:
+            Json: The serialized data.
+        """
+        pass
+
+    @classmethod
+    @abstractmethod
+    def deserialize(cls, data: Json) -> GravitinoTypeT:
+        """To deserialize the given `data`.
+
+        Args:
+            data (Json): The data to be deserialized.
+
+        Returns:
+            GravitinoTypeT: The deserialized data.
+        """
+        pass
diff --git 
a/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py 
b/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
new file mode 100644
index 0000000000..ab86cea789
--- /dev/null
+++ b/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from dataclasses_json.core import Json
+
+from gravitino.api.types.json_serdes._helper.serdes_utils import SerdesUtils
+from gravitino.api.types.json_serdes.base import JsonSerializable
+from gravitino.api.types.type import Type
+
+
+class TypeSerdes(JsonSerializable[Type]):
+    """Custom JSON serializer for Gravitino Type objects."""
+
+    @classmethod
+    def serialize(cls, data_type: Type) -> Json:
+        """Serialize the given Gravitino Type.
+
+        Args:
+            data (Type): The Gravitino Type to be serialized.
+
+        Returns:
+            Json: The serialized data corresponding to the given Gravitino 
Type.
+        """
+
+        return SerdesUtils.write_data_type(data_type)
+
+    @classmethod
+    def deserialize(cls, data: Json) -> Type:
+        """Deserialize the given data to a Gravitino Type.
+
+        Args:
+            data (Json): The data to be deserialized.
+
+        Returns:
+            Type: The deserialized Gravitino Type.
+        """
+
+        # TODO: We shall implement the deserialization logic here.
+        # For now, it's just a placeholder.
+        pass
diff --git 
a/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py 
b/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
new file mode 100644
index 0000000000..8a77a889bb
--- /dev/null
+++ b/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
@@ -0,0 +1,150 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+from itertools import combinations, product
+
+from gravitino.api.types.json_serdes import TypeSerdes
+from gravitino.api.types.json_serdes._helper.serdes_utils import SerdesUtils
+from gravitino.api.types.type import PrimitiveType
+from gravitino.api.types.types import Types
+
+
+class MockType(PrimitiveType):
+    def __init__(self, name: str):
+        self._name = name
+
+    def name(self) -> str:
+        return self._name
+
+    def simple_string(self):
+        return "mock_type"
+
+
+class TestTypeSerdes(unittest.TestCase):
+    def setUp(self):
+        self._primitive_and_none_types = {
+            **SerdesUtils.TYPES,
+            **{
+                "decimal(10,2)": Types.DecimalType.of(10, 2),
+                "fixed(10)": Types.FixedType.of(10),
+                "char(10)": Types.FixedCharType.of(10),
+                "varchar(10)": Types.VarCharType.of(10),
+            },
+        }
+
+    def test_serialize_primitive_and_none_type(self):
+        for simple_string, type_ in self._primitive_and_none_types.items():
+            self.assertEqual(TypeSerdes.serialize(data_type=type_), 
simple_string)
+
+    def test_serialize_struct_type_of_primitive_and_none_types(self):
+        types = self._primitive_and_none_types.values()
+        fields = [
+            Types.StructType.Field.not_null_field(
+                name=f"field_{field_idx}",
+                field_type=type_,
+                comment=f"comment {field_idx}" if field_idx % 2 == 0 else None,
+            )
+            for type_, field_idx in zip(types, range(len(types)))
+        ]
+
+        struct_type = Types.StructType.of(*fields)
+        serialized_result = TypeSerdes.serialize(struct_type)
+        serialized_fields = serialized_result.get(SerdesUtils.FIELDS)
+
+        self.assertEqual(serialized_result.get(SerdesUtils.TYPE), 
SerdesUtils.STRUCT)
+        for field, serialized_field in zip(fields, serialized_fields):
+            self.assertEqual(
+                serialized_field.get(SerdesUtils.STRUCT_FIELD_NAME), 
field.name()
+            )
+            self.assertEqual(
+                serialized_field.get(SerdesUtils.TYPE),
+                SerdesUtils.write_data_type(field.type()),
+            )
+            self.assertEqual(
+                serialized_field.get(SerdesUtils.STRUCT_FIELD_NULLABLE),
+                field.nullable(),
+            )
+            self.assertEqual(
+                serialized_field.get(SerdesUtils.STRUCT_FIELD_COMMENT), 
field.comment()
+            )
+
+    def test_serialize_list_type_of_primitive_and_none_types(self):
+        for simple_string, type_ in self._primitive_and_none_types.items():
+            list_type = Types.ListType.of(element_type=type_, 
element_nullable=False)
+            serialized_result = TypeSerdes.serialize(list_type)
+            self.assertEqual(serialized_result.get(SerdesUtils.TYPE), 
SerdesUtils.LIST)
+            self.assertEqual(
+                serialized_result.get(SerdesUtils.LIST_ELEMENT_TYPE), 
simple_string
+            )
+            self.assertEqual(
+                serialized_result.get(SerdesUtils.LIST_ELEMENT_NULLABLE), False
+            )
+
+    def test_serialize_map_type_of_primitive_and_none_types(self):
+        types = self._primitive_and_none_types.values()
+        for key_type, value_type in product(types, types):
+            map_type = Types.MapType.of(
+                key_type=key_type, value_type=value_type, value_nullable=False
+            )
+            serialized_result = TypeSerdes.serialize(map_type)
+            self.assertEqual(serialized_result.get(SerdesUtils.TYPE), 
SerdesUtils.MAP)
+            self.assertEqual(
+                serialized_result.get(SerdesUtils.MAP_KEY_TYPE),
+                key_type.simple_string(),
+            )
+            self.assertEqual(
+                serialized_result.get(SerdesUtils.MAP_VALUE_TYPE),
+                value_type.simple_string(),
+            )
+            self.assertEqual(
+                serialized_result.get(SerdesUtils.MAP_VALUE_NULLABLE), False
+            )
+
+    def test_serialize_union_type_of_primitive_and_none_types(self):
+        types = self._primitive_and_none_types.values()
+        for types in combinations(types, 2):
+            union_type = Types.UnionType.of(*types)
+            serialized_result = TypeSerdes.serialize(union_type)
+            self.assertEqual(serialized_result.get(SerdesUtils.TYPE), 
SerdesUtils.UNION)
+            self.assertListEqual(
+                serialized_result.get(SerdesUtils.UNION_TYPES),
+                [type_.simple_string() for type_ in types],
+            )
+
+    def test_serialize_external_type(self):
+        external_type = Types.ExternalType.of(catalog_string="catalog_string")
+        serialized_result = TypeSerdes.serialize(external_type)
+        self.assertEqual(serialized_result.get(SerdesUtils.TYPE), 
SerdesUtils.EXTERNAL)
+        self.assertEqual(
+            serialized_result.get(SerdesUtils.CATALOG_STRING), "catalog_string"
+        )
+
+    def test_write_unparsed_type(self):
+        unparsed_type = Types.UnparsedType.of(unparsed_type="unparsed_type")
+        serialized_result = TypeSerdes.serialize(unparsed_type)
+        self.assertEqual(serialized_result.get(SerdesUtils.TYPE), 
SerdesUtils.UNPARSED)
+        self.assertEqual(
+            serialized_result.get(SerdesUtils.UNPARSED_TYPE), "unparsed_type"
+        )
+
+        mock_type = MockType(name="mock")
+        result = TypeSerdes.serialize(mock_type)
+        self.assertEqual(result.get(SerdesUtils.TYPE), SerdesUtils.UNPARSED)
+        self.assertEqual(
+            result.get(SerdesUtils.UNPARSED_TYPE), mock_type.simple_string()
+        )

Reply via email to