This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new cd7875380a [#5202] feat(client-python): Support Gravitino Type Serdes
- serializer (#6903)
cd7875380a is described below
commit cd7875380af77911132631d563ada7041f6766fb
Author: George T. C. Lai <[email protected]>
AuthorDate: Fri May 23 11:09:37 2025 +0800
[#5202] feat(client-python): Support Gravitino Type Serdes - serializer
(#6903)
### What changes were proposed in this pull request?
This is the second part (totally 4 planned) of implementation to the
following classes from Java to support Column and its default value,
including:
- JsonUtils
- TypeSerializer
The `TypeSerializer` will be used in the incoming `ColumnDTO`
implementation to serialize `data_type` field.
### Why are the changes needed?
We need to support Column and its default value in python client.
#5202
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
---------
Signed-off-by: George T. C. Lai <[email protected]>
---
.../gravitino/api/types/json_serdes/__init__.py | 21 ++
.../api/types/json_serdes/_helper/serdes_utils.py | 211 +++++++++++++++++++++
.../gravitino/api/types/json_serdes/base.py | 55 ++++++
.../gravitino/api/types/json_serdes/type_serdes.py | 55 ++++++
.../unittests/json_serdes/test_type_serdes.py | 150 +++++++++++++++
5 files changed, 492 insertions(+)
diff --git a/clients/client-python/gravitino/api/types/json_serdes/__init__.py
b/clients/client-python/gravitino/api/types/json_serdes/__init__.py
new file mode 100644
index 0000000000..fb9e11264c
--- /dev/null
+++ b/clients/client-python/gravitino/api/types/json_serdes/__init__.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from gravitino.api.types.json_serdes.base import JsonSerializable
+from gravitino.api.types.json_serdes.type_serdes import TypeSerdes
+
+__all__ = ["JsonSerializable", "TypeSerdes"]
diff --git
a/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
new file mode 100644
index 0000000000..3d6a3d7f29
--- /dev/null
+++
b/clients/client-python/gravitino/api/types/json_serdes/_helper/serdes_utils.py
@@ -0,0 +1,211 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+from types import MappingProxyType
+from typing import Any, ClassVar, Dict, Mapping, Pattern, Set, Union, overload
+
+from gravitino.api.types.type import Name, Type
+from gravitino.api.types.types import Types
+
+
+class SerdesUtils:
+ EXPRESSION_TYPE: ClassVar[str] = "type"
+ DATA_TYPE: ClassVar[str] = "dataType"
+ LITERAL_VALUE: ClassVar[str] = "value"
+ FIELD_NAME: ClassVar[str] = "fieldName"
+ FUNCTION_NAME: ClassVar[str] = "funcName"
+ FUNCTION_ARGS: ClassVar[str] = "funcArgs"
+ UNPARSED_EXPRESSION: ClassVar[str] = "unparsedExpression"
+ TYPE: ClassVar[str] = "type"
+ STRUCT: ClassVar[str] = "struct"
+ FIELDS: ClassVar[str] = "fields"
+ STRUCT_FIELD_NAME: ClassVar[str] = "name"
+ STRUCT_FIELD_NULLABLE: ClassVar[str] = "nullable"
+ STRUCT_FIELD_COMMENT: ClassVar[str] = "comment"
+ LIST: ClassVar[str] = "list"
+ LIST_ELEMENT_TYPE: ClassVar[str] = "elementType"
+ LIST_ELEMENT_NULLABLE: ClassVar[str] = "containsNull"
+ MAP: ClassVar[str] = "map"
+ MAP_KEY_TYPE: ClassVar[str] = "keyType"
+ MAP_VALUE_TYPE: ClassVar[str] = "valueType"
+ MAP_VALUE_NULLABLE: ClassVar[str] = "valueContainsNull"
+ UNION: ClassVar[str] = "union"
+ UNION_TYPES: ClassVar[str] = "types"
+ UNPARSED: ClassVar[str] = "unparsed"
+ UNPARSED_TYPE: ClassVar[str] = "unparsedType"
+ EXTERNAL: ClassVar[str] = "external"
+ CATALOG_STRING: ClassVar[str] = "catalogString"
+
+ NON_PRIMITIVE_TYPES: ClassVar[Set[Name]] = {
+ Name.STRUCT,
+ Name.LIST,
+ Name.MAP,
+ Name.UNION,
+ Name.UNPARSED,
+ Name.EXTERNAL,
+ }
+ PRIMITIVE_AND_NULL_TYPES: ClassVar[Set[Name]] = (
+ set(list(Name)) - NON_PRIMITIVE_TYPES
+ )
+
+ DECIMAL_PATTERN: ClassVar[Pattern[str]] = re.compile(
+ r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)"
+ )
+ FIXED_PATTERN: ClassVar[Pattern[str]] = re.compile(r"fixed\(\s*(\d+)\s*\)")
+ FIXEDCHAR_PATTERN: ClassVar[Pattern[str]] =
re.compile(r"char\(\s*(\d+)\s*\)")
+ VARCHAR_PATTERN: ClassVar[Pattern[str]] =
re.compile(r"varchar\(\s*(\d+)\s*\)")
+ TYPES: ClassVar[Mapping] = MappingProxyType(
+ {
+ type_instance.simple_string(): type_instance
+ for type_instance in {
+ Types.NullType.get(),
+ Types.BooleanType.get(),
+ Types.ByteType.get(),
+ Types.ByteType.unsigned(),
+ Types.IntegerType.get(),
+ Types.IntegerType.unsigned(),
+ Types.ShortType.get(),
+ Types.ShortType.unsigned(),
+ Types.LongType.get(),
+ Types.LongType.unsigned(),
+ Types.FloatType.get(),
+ Types.DoubleType.get(),
+ Types.DateType.get(),
+ Types.TimeType.get(),
+ Types.TimestampType.with_time_zone(),
+ Types.TimestampType.without_time_zone(),
+ Types.IntervalYearType.get(),
+ Types.IntervalDayType.get(),
+ Types.StringType.get(),
+ Types.UUIDType.get(),
+ }
+ }
+ )
+
+ @classmethod
+ def write_data_type(cls, data_type: Type) -> Union[str, Dict[str, Any]]:
+ """Write Gravitino Type to JSON data. Used for Gravitino Type JSON
Serialization.
+
+ Args:
+ data_type (Any): The Gravitino Type.
+
+ Returns:
+ Union[str, Dict[str, Any]]: The serialized data.
+ """
+
+ type_name = data_type.name()
+ if type_name in cls.PRIMITIVE_AND_NULL_TYPES:
+ return data_type.simple_string()
+
+ if type_name is Name.STRUCT:
+ return cls.write_struct_type(data_type)
+
+ if type_name is Name.LIST:
+ return cls.write_list_type(data_type)
+
+ if type_name is Name.MAP:
+ return cls.write_map_type(data_type)
+
+ if type_name is Name.UNION:
+ return cls.write_union_type(data_type)
+
+ if type_name is Name.EXTERNAL:
+ return cls.write_external_type(data_type)
+
+ if type_name is Name.UNPARSED:
+ return cls.write_unparsed_type(data_type)
+
+ return cls.write_unparsed_type(data_type.simple_string())
+
+ @classmethod
+ def write_struct_type(cls, struct_type: Types.StructType) -> Dict[str,
Any]:
+ struct_data = {
+ cls.TYPE: cls.STRUCT,
+ cls.FIELDS: [
+ cls.write_struct_field(field) for field in struct_type.fields()
+ ],
+ }
+ return struct_data
+
+ @classmethod
+ def write_struct_field(cls, struct_field: Types.StructType.Field) ->
Dict[str, Any]:
+ struct_field_data = {
+ cls.STRUCT_FIELD_NAME: struct_field.name(),
+ cls.TYPE: cls.write_data_type(struct_field.type()),
+ cls.STRUCT_FIELD_NULLABLE: struct_field.nullable(),
+ }
+ comment = struct_field.comment()
+ if comment is not None:
+ struct_field_data.update({cls.STRUCT_FIELD_COMMENT: comment})
+ return struct_field_data
+
+ @classmethod
+ def write_list_type(cls, list_type: Types.ListType) -> Dict[str, Any]:
+ list_data = {
+ cls.TYPE: cls.LIST,
+ cls.LIST_ELEMENT_TYPE:
cls.write_data_type(list_type.element_type()),
+ cls.LIST_ELEMENT_NULLABLE: list_type.element_nullable(),
+ }
+ return list_data
+
+ @classmethod
+ def write_map_type(cls, map_type: Types.MapType) -> Dict[str, Any]:
+ map_data = {
+ cls.TYPE: cls.MAP,
+ cls.MAP_VALUE_NULLABLE: map_type.is_value_nullable(),
+ cls.MAP_KEY_TYPE:
cls.write_data_type(data_type=map_type.key_type()),
+ cls.MAP_VALUE_TYPE:
cls.write_data_type(data_type=map_type.value_type()),
+ }
+ return map_data
+
+ @classmethod
+ def write_union_type(cls, union_type: Types.UnionType) -> Dict[str, Any]:
+ union_data = {
+ cls.TYPE: cls.UNION,
+ cls.UNION_TYPES: [
+ cls.write_data_type(data_type=type_) for type_ in
union_type.types()
+ ],
+ }
+ return union_data
+
+ @classmethod
+ def write_external_type(cls, external_type: Types.ExternalType) ->
Dict[str, str]:
+ external_data = {
+ cls.TYPE: cls.EXTERNAL,
+ cls.CATALOG_STRING: external_type.catalog_string(),
+ }
+ return external_data
+
+ @classmethod
+ @overload
+ def write_unparsed_type(cls, unparsed_type: str) -> Dict[str, str]: ...
+
+ @classmethod
+ @overload
+ def write_unparsed_type(cls, unparsed_type: Type) -> Dict[str, str]: ...
+
+ @classmethod
+ def write_unparsed_type(cls, unparsed_type) -> Dict[str, str]:
+ return {
+ cls.TYPE: cls.UNPARSED,
+ cls.UNPARSED_TYPE: (
+ unparsed_type.unparsed_type()
+ if isinstance(unparsed_type, Types.UnparsedType)
+ else unparsed_type
+ ),
+ }
diff --git a/clients/client-python/gravitino/api/types/json_serdes/base.py
b/clients/client-python/gravitino/api/types/json_serdes/base.py
new file mode 100644
index 0000000000..27c838478b
--- /dev/null
+++ b/clients/client-python/gravitino/api/types/json_serdes/base.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar
+
+from dataclasses_json.core import Json
+
+from gravitino.api.types.types import Type
+
+GravitinoTypeT = TypeVar("GravitinoTypeT", bound=Type)
+
+
+class JsonSerializable(ABC, Generic[GravitinoTypeT]):
+ """Customized generic Serializer for DataClassJson."""
+
+ @classmethod
+ @abstractmethod
+ def serialize(cls, data_type: GravitinoTypeT) -> Json:
+ """To serialize the given `data`.
+
+ Args:
+ data (GravitinoTypeT): The data to be serialized.
+
+ Returns:
+ Json: The serialized data.
+ """
+ pass
+
+ @classmethod
+ @abstractmethod
+ def deserialize(cls, data: Json) -> GravitinoTypeT:
+ """To deserialize the given `data`.
+
+ Args:
+ data (Json): The data to be deserialized.
+
+ Returns:
+ GravitinoTypeT: The deserialized data.
+ """
+ pass
diff --git
a/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
b/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
new file mode 100644
index 0000000000..ab86cea789
--- /dev/null
+++ b/clients/client-python/gravitino/api/types/json_serdes/type_serdes.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from dataclasses_json.core import Json
+
+from gravitino.api.types.json_serdes._helper.serdes_utils import SerdesUtils
+from gravitino.api.types.json_serdes.base import JsonSerializable
+from gravitino.api.types.type import Type
+
+
+class TypeSerdes(JsonSerializable[Type]):
+ """Custom JSON serializer for Gravitino Type objects."""
+
+ @classmethod
+ def serialize(cls, data_type: Type) -> Json:
+ """Serialize the given Gravitino Type.
+
+ Args:
+ data (Type): The Gravitino Type to be serialized.
+
+ Returns:
+ Json: The serialized data corresponding to the given Gravitino
Type.
+ """
+
+ return SerdesUtils.write_data_type(data_type)
+
+ @classmethod
+ def deserialize(cls, data: Json) -> Type:
+ """Deserialize the given data to a Gravitino Type.
+
+ Args:
+ data (Json): The data to be deserialized.
+
+ Returns:
+ Type: The deserialized Gravitino Type.
+ """
+
+ # TODO: We shall implement the deserialization logic here.
+ # For now, it's just a placeholder.
+ pass
diff --git
a/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
b/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
new file mode 100644
index 0000000000..8a77a889bb
--- /dev/null
+++ b/clients/client-python/tests/unittests/json_serdes/test_type_serdes.py
@@ -0,0 +1,150 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+from itertools import combinations, product
+
+from gravitino.api.types.json_serdes import TypeSerdes
+from gravitino.api.types.json_serdes._helper.serdes_utils import SerdesUtils
+from gravitino.api.types.type import PrimitiveType
+from gravitino.api.types.types import Types
+
+
+class MockType(PrimitiveType):
+ def __init__(self, name: str):
+ self._name = name
+
+ def name(self) -> str:
+ return self._name
+
+ def simple_string(self):
+ return "mock_type"
+
+
+class TestTypeSerdes(unittest.TestCase):
+ def setUp(self):
+ self._primitive_and_none_types = {
+ **SerdesUtils.TYPES,
+ **{
+ "decimal(10,2)": Types.DecimalType.of(10, 2),
+ "fixed(10)": Types.FixedType.of(10),
+ "char(10)": Types.FixedCharType.of(10),
+ "varchar(10)": Types.VarCharType.of(10),
+ },
+ }
+
+ def test_serialize_primitive_and_none_type(self):
+ for simple_string, type_ in self._primitive_and_none_types.items():
+ self.assertEqual(TypeSerdes.serialize(data_type=type_),
simple_string)
+
+ def test_serialize_struct_type_of_primitive_and_none_types(self):
+ types = self._primitive_and_none_types.values()
+ fields = [
+ Types.StructType.Field.not_null_field(
+ name=f"field_{field_idx}",
+ field_type=type_,
+ comment=f"comment {field_idx}" if field_idx % 2 == 0 else None,
+ )
+ for type_, field_idx in zip(types, range(len(types)))
+ ]
+
+ struct_type = Types.StructType.of(*fields)
+ serialized_result = TypeSerdes.serialize(struct_type)
+ serialized_fields = serialized_result.get(SerdesUtils.FIELDS)
+
+ self.assertEqual(serialized_result.get(SerdesUtils.TYPE),
SerdesUtils.STRUCT)
+ for field, serialized_field in zip(fields, serialized_fields):
+ self.assertEqual(
+ serialized_field.get(SerdesUtils.STRUCT_FIELD_NAME),
field.name()
+ )
+ self.assertEqual(
+ serialized_field.get(SerdesUtils.TYPE),
+ SerdesUtils.write_data_type(field.type()),
+ )
+ self.assertEqual(
+ serialized_field.get(SerdesUtils.STRUCT_FIELD_NULLABLE),
+ field.nullable(),
+ )
+ self.assertEqual(
+ serialized_field.get(SerdesUtils.STRUCT_FIELD_COMMENT),
field.comment()
+ )
+
+ def test_serialize_list_type_of_primitive_and_none_types(self):
+ for simple_string, type_ in self._primitive_and_none_types.items():
+ list_type = Types.ListType.of(element_type=type_,
element_nullable=False)
+ serialized_result = TypeSerdes.serialize(list_type)
+ self.assertEqual(serialized_result.get(SerdesUtils.TYPE),
SerdesUtils.LIST)
+ self.assertEqual(
+ serialized_result.get(SerdesUtils.LIST_ELEMENT_TYPE),
simple_string
+ )
+ self.assertEqual(
+ serialized_result.get(SerdesUtils.LIST_ELEMENT_NULLABLE), False
+ )
+
+ def test_serialize_map_type_of_primitive_and_none_types(self):
+ types = self._primitive_and_none_types.values()
+ for key_type, value_type in product(types, types):
+ map_type = Types.MapType.of(
+ key_type=key_type, value_type=value_type, value_nullable=False
+ )
+ serialized_result = TypeSerdes.serialize(map_type)
+ self.assertEqual(serialized_result.get(SerdesUtils.TYPE),
SerdesUtils.MAP)
+ self.assertEqual(
+ serialized_result.get(SerdesUtils.MAP_KEY_TYPE),
+ key_type.simple_string(),
+ )
+ self.assertEqual(
+ serialized_result.get(SerdesUtils.MAP_VALUE_TYPE),
+ value_type.simple_string(),
+ )
+ self.assertEqual(
+ serialized_result.get(SerdesUtils.MAP_VALUE_NULLABLE), False
+ )
+
+ def test_serialize_union_type_of_primitive_and_none_types(self):
+ types = self._primitive_and_none_types.values()
+ for types in combinations(types, 2):
+ union_type = Types.UnionType.of(*types)
+ serialized_result = TypeSerdes.serialize(union_type)
+ self.assertEqual(serialized_result.get(SerdesUtils.TYPE),
SerdesUtils.UNION)
+ self.assertListEqual(
+ serialized_result.get(SerdesUtils.UNION_TYPES),
+ [type_.simple_string() for type_ in types],
+ )
+
+ def test_serialize_external_type(self):
+ external_type = Types.ExternalType.of(catalog_string="catalog_string")
+ serialized_result = TypeSerdes.serialize(external_type)
+ self.assertEqual(serialized_result.get(SerdesUtils.TYPE),
SerdesUtils.EXTERNAL)
+ self.assertEqual(
+ serialized_result.get(SerdesUtils.CATALOG_STRING), "catalog_string"
+ )
+
+ def test_write_unparsed_type(self):
+ unparsed_type = Types.UnparsedType.of(unparsed_type="unparsed_type")
+ serialized_result = TypeSerdes.serialize(unparsed_type)
+ self.assertEqual(serialized_result.get(SerdesUtils.TYPE),
SerdesUtils.UNPARSED)
+ self.assertEqual(
+ serialized_result.get(SerdesUtils.UNPARSED_TYPE), "unparsed_type"
+ )
+
+ mock_type = MockType(name="mock")
+ result = TypeSerdes.serialize(mock_type)
+ self.assertEqual(result.get(SerdesUtils.TYPE), SerdesUtils.UNPARSED)
+ self.assertEqual(
+ result.get(SerdesUtils.UNPARSED_TYPE), mock_type.simple_string()
+ )