This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch branch-1.0
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/branch-1.0 by this push:
new b9ec19d9e8 [#5199] feat(client-python): add index serdes (#8464)
b9ec19d9e8 is described below
commit b9ec19d9e8d3e2fe574f3db05f0920f3f13dfc73
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Sep 8 18:23:45 2025 +0800
[#5199] feat(client-python): add index serdes (#8464)
### What changes were proposed in this pull request?
This PR is aimed at implementing the following classes corresponding to
the Java client.
JsonUtils.java
- IndexSerializer
- IndexDeserializer
### Why are the changes needed?
We need to support table partitioning, bucketing and sort ordering and
indexes.
#5199
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
Signed-off-by: George T. C. Lai <[email protected]>
Co-authored-by: George T. C. Lai <[email protected]>
---
.../gravitino/dto/rel/indexes/index_dto.py | 2 +-
.../dto/rel/indexes/json_serdes/__init__.py | 16 +++
.../dto/rel/indexes/json_serdes/index_serdes.py | 55 ++++++++++
.../tests/unittests/dto/rel/test_index_serdes.py | 118 +++++++++++++++++++++
4 files changed, 190 insertions(+), 1 deletion(-)
diff --git a/clients/client-python/gravitino/dto/rel/indexes/index_dto.py
b/clients/client-python/gravitino/dto/rel/indexes/index_dto.py
index d78843ab22..539c660bf5 100644
--- a/clients/client-python/gravitino/dto/rel/indexes/index_dto.py
+++ b/clients/client-python/gravitino/dto/rel/indexes/index_dto.py
@@ -61,7 +61,7 @@ class IndexDTO(Index):
return False
return (
self._index_type is other.type()
- and self._name is other.name()
+ and self._name == other.name()
and self._field_names == other.field_names()
)
diff --git
a/clients/client-python/gravitino/dto/rel/indexes/json_serdes/__init__.py
b/clients/client-python/gravitino/dto/rel/indexes/json_serdes/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/clients/client-python/gravitino/dto/rel/indexes/json_serdes/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git
a/clients/client-python/gravitino/dto/rel/indexes/json_serdes/index_serdes.py
b/clients/client-python/gravitino/dto/rel/indexes/json_serdes/index_serdes.py
new file mode 100644
index 0000000000..a1eab07e15
--- /dev/null
+++
b/clients/client-python/gravitino/dto/rel/indexes/json_serdes/index_serdes.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any
+
+from gravitino.api.expressions.indexes.index import Index
+from gravitino.api.types.json_serdes import JsonSerializable
+from gravitino.dto.rel.indexes.index_dto import IndexDTO
+from gravitino.utils.precondition import Precondition
+from gravitino.utils.serdes import SerdesUtilsBase
+
+
+class IndexSerdes(SerdesUtilsBase, JsonSerializable[Index]):
+ @classmethod
+ def serialize(cls, data_type: Index) -> dict[str, Any]:
+ result: dict[str, Any] = {cls.INDEX_TYPE:
data_type.type().name.upper()}
+ if data_type.name() is not None:
+ result[cls.INDEX_NAME] = data_type.name()
+ result[cls.INDEX_FIELD_NAMES] = data_type.field_names()
+
+ return result
+
+ @classmethod
+ def deserialize(cls, data: dict[str, Any]) -> Index:
+ Precondition.check_argument(
+ isinstance(data, dict) and len(data) > 0,
+ f"Index must be a valid JSON object, but found: {data}",
+ )
+ Precondition.check_argument(
+ data.get(cls.INDEX_TYPE) is not None,
+ f"Cannot parse index from missing type: {data}",
+ )
+ Precondition.check_argument(
+ data.get(cls.INDEX_FIELD_NAMES) is not None,
+ f"Cannot parse index from missing field names: {data}",
+ )
+ index_type = Index.IndexType(data[cls.INDEX_TYPE].upper())
+
+ return IndexDTO(
+ index_type, data.get(cls.INDEX_NAME), data[cls.INDEX_FIELD_NAMES]
+ )
diff --git a/clients/client-python/tests/unittests/dto/rel/test_index_serdes.py
b/clients/client-python/tests/unittests/dto/rel/test_index_serdes.py
new file mode 100644
index 0000000000..5373193209
--- /dev/null
+++ b/clients/client-python/tests/unittests/dto/rel/test_index_serdes.py
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import unittest
+from dataclasses import dataclass, field
+
+from dataclasses_json import DataClassJsonMixin, config
+
+from gravitino.api.expressions.indexes.index import Index
+from gravitino.dto.rel.indexes.index_dto import IndexDTO
+from gravitino.dto.rel.indexes.json_serdes.index_serdes import IndexSerdes
+from gravitino.exceptions.base import IllegalArgumentException
+
+
+@dataclass
+class MockDataClass(DataClassJsonMixin):
+ indexes: list[Index] = field(
+ metadata=config(
+ encoder=lambda idxs: [IndexSerdes.serialize(index) for index in
idxs],
+ decoder=lambda idxs: [IndexSerdes.deserialize(index) for index in
idxs],
+ )
+ )
+
+
+class TestIndexSerdes(unittest.TestCase):
+ def test_index_serdes_invalid_json(self):
+ invalid_json_string = [
+ '{"indexes": [{}]}',
+ '{"indexes": [""]}',
+ '{"indexes": [null]}',
+ ]
+
+ for json_string in invalid_json_string:
+ with self.assertRaisesRegex(
+ IllegalArgumentException, "Index must be a valid JSON object"
+ ):
+ MockDataClass.from_json(json_string)
+
+ def test_index_serdes_missing_type(self):
+ json_string = """
+ {
+ "indexes": [
+ {
+ "fieldNames": [["id"]]
+ }
+ ]
+ }
+ """
+
+ with self.assertRaisesRegex(
+ IllegalArgumentException, "Cannot parse index from missing type"
+ ):
+ MockDataClass.from_json(json_string)
+
+ def test_index_serdes_missing_field_names(self):
+ json_string = """
+ {
+ "indexes": [
+ {
+ "indexType": "PRIMARY_KEY"
+ }
+ ]
+ }
+ """
+
+ with self.assertRaisesRegex(
+ IllegalArgumentException, "Cannot parse index from missing field
names"
+ ):
+ MockDataClass.from_json(json_string)
+
+ def test_index_serdes(self):
+ json_string = """
+ {
+ "indexes": [
+ {
+ "indexType": "PRIMARY_KEY",
+ "name": "PRIMARY",
+ "fieldNames": [["id"]]
+ },
+ {
+ "indexType": "UNIQUE_KEY",
+ "fieldNames": [["name"], ["createTime"]]
+ }
+ ]
+ }
+ """
+
+ mock_data_class = MockDataClass.from_json(json_string)
+ indexes = mock_data_class.indexes
+ self.assertEqual(len(indexes), 2)
+ for index in indexes:
+ self.assertTrue(isinstance(index, IndexDTO))
+ self.assertTrue(
+ indexes[0] == IndexDTO(Index.IndexType.PRIMARY_KEY, "PRIMARY",
[["id"]])
+ )
+ self.assertTrue(
+ indexes[1]
+ == IndexDTO(Index.IndexType.UNIQUE_KEY, None, [["name"],
["createTime"]])
+ )
+
+ json_dict = json.loads(json_string)
+ serialized_dict = json.loads(mock_data_class.to_json())
+ self.assertDictEqual(json_dict, serialized_dict)