This is an automated email from the ASF dual-hosted git repository.
jerryshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 1e25943a4e [#8620] feat(client-python): add metadata object statistics
operations (#11021)
1e25943a4e is described below
commit 1e25943a4eddc5fa04cd9327c02dca98938620ee
Author: George T. C. Lai <[email protected]>
AuthorDate: Mon May 18 11:55:12 2026 +0800
[#8620] feat(client-python): add metadata object statistics operations
(#11021)
### What changes were proposed in this pull request?
This PR aims to add implementation of MetadataObjectStatisticsOperations
and enable statistics support in relational table. The following
classes/methods are implemented in Python client.
- StatisticDTO
- StatisticsDropRequest
- StatisticsUpdateRequest
- StatisticListResponse
- ErrorHandlers.StatisticsErrorHandler
- MetadataObjectStatisticsOperations
- Implement methods listStatistics, updateStatistics, and dropStatistics
in class RelationalTable
- Implement method supportsStatistics in class Table
- Implement method supportsStatistics in class RelationalTable
### Why are the changes needed?
Implement MetadataObjectStatisticsOperations and enable support for
statistics in relational table.
#8620
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit tests
---------
Signed-off-by: George T. C. Lai <[email protected]>
---
clients/client-python/gravitino/api/rel/table.py | 6 ++
.../metadata_object_statistics_operations.py | 102 +++++++++++++++++++
.../gravitino/client/relational_table.py | 24 ++++-
.../dto/requests/statistics_drop_request.py | 46 +++++++++
.../dto/requests/statistics_update_request.py | 64 ++++++++++++
.../dto/responses/statistic_list_response.py | 45 ++++++++
.../client-python/gravitino/dto/stats/__init__.py | 16 +++
.../gravitino/dto/stats/statistic_dto.py | 99 ++++++++++++++++++
.../handlers/statistics_error_handler.py | 76 ++++++++++++++
.../tests/integration/test_relational_table.py | 38 +++++++
.../test_metadata_object_statistics_operations.py | 113 +++++++++++++++++++++
.../dto/requests/test_statistics_drop_request.py | 60 +++++++++++
.../dto/requests/test_statistics_update_request.py | 98 ++++++++++++++++++
.../dto/responses/test_statistic_list_response.py | 76 ++++++++++++++
.../tests/unittests/dto/stats/__init__.py | 16 +++
.../unittests/dto/stats/test_statistic_dto.py | 70 +++++++++++++
.../tests/unittests/test_relational_table.py | 22 ++++
17 files changed, 970 insertions(+), 1 deletion(-)
diff --git a/clients/client-python/gravitino/api/rel/table.py
b/clients/client-python/gravitino/api/rel/table.py
index 141291d36a..a32f244b20 100644
--- a/clients/client-python/gravitino/api/rel/table.py
+++ b/clients/client-python/gravitino/api/rel/table.py
@@ -27,6 +27,7 @@ from gravitino.api.rel.expressions.transforms.transform
import Transform
from gravitino.api.rel.expressions.transforms.transforms import Transforms
from gravitino.api.rel.indexes.index import Index
from gravitino.api.rel.indexes.indexes import Indexes
+from gravitino.api.stats.supports_statistics import SupportsStatistics
from gravitino.api.tag.supports_tags import SupportsTags
from gravitino.exceptions.base import UnsupportedOperationException
@@ -115,3 +116,8 @@ class Table(Auditable):
def supports_tags(self) -> SupportsTags:
raise UnsupportedOperationException("Table does not support tag
operations.")
+
+ def supports_statistics(self) -> SupportsStatistics:
+ raise UnsupportedOperationException(
+ "Table does not support statistics operations."
+ )
diff --git
a/clients/client-python/gravitino/client/metadata_object_statistics_operations.py
b/clients/client-python/gravitino/client/metadata_object_statistics_operations.py
new file mode 100644
index 0000000000..be94869855
--- /dev/null
+++
b/clients/client-python/gravitino/client/metadata_object_statistics_operations.py
@@ -0,0 +1,102 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+from typing import Any
+
+from gravitino.api.metadata_object import MetadataObject
+from gravitino.api.stats.statistic import Statistic
+from gravitino.api.stats.statistic_value import StatisticValue
+from gravitino.api.stats.supports_statistics import SupportsStatistics
+from gravitino.dto.requests.statistics_drop_request import
StatisticsDropRequest
+from gravitino.dto.requests.statistics_update_request import
StatisticsUpdateRequest
+from gravitino.dto.responses.base_response import BaseResponse
+from gravitino.dto.responses.drop_response import DropResponse
+from gravitino.dto.responses.statistic_list_response import
StatisticListResponse
+from gravitino.exceptions.handlers.statistics_error_handler import (
+ STATISTICS_ERROR_HANDLER,
+)
+from gravitino.rest.rest_utils import encode_string
+from gravitino.utils import HTTPClient
+from gravitino.utils.precondition import Precondition
+
+logger = logging.getLogger(__name__)
+
+
+class MetadataObjectStatisticsOperations(SupportsStatistics):
+ """The implementation of SupportsStatistics.
+
+ This interface will be composited into table to provide statistics
operations
+ for metadata objects.
+ """
+
+ def __init__(
+ self,
+ metalake_name: str,
+ metadata_object: MetadataObject,
+ rest_client: HTTPClient,
+ ):
+ metadata_object_type = metadata_object.type().value
+ metadata_object_fullname = metadata_object.full_name()
+ self._rest_client = rest_client
+ self.request_path = (
+ f"api/metalakes/{encode_string(metalake_name)}/"
+
f"objects/{metadata_object_type}/{encode_string(metadata_object_fullname)}/"
+ "statistics"
+ )
+
+ def list_statistics(self) -> list[Statistic]:
+ resp = self._rest_client.get(
+ endpoint=self.request_path,
+ error_handler=STATISTICS_ERROR_HANDLER,
+ )
+
+ statistic_resp = StatisticListResponse.from_json(resp.body,
infer_missing=True)
+ statistic_resp.validate()
+
+ return statistic_resp.statistics
+
+ def update_statistics(self, statistics: dict[str, StatisticValue[Any]]) ->
None:
+ Precondition.check_argument(
+ statistics is not None and len(statistics) > 0,
+ "Statistics map must not be null or empty",
+ )
+ req = StatisticsUpdateRequest(_updates=statistics)
+ req.validate()
+ resp = self._rest_client.put(
+ endpoint=self.request_path,
+ json=req,
+ error_handler=STATISTICS_ERROR_HANDLER,
+ )
+ BaseResponse.from_json(resp.body, infer_missing=True).validate()
+
+ def drop_statistics(self, statistics: list[str]) -> bool:
+ Precondition.check_argument(
+ statistics is not None and len(statistics) > 0,
+ "Statistics list must not be null or empty",
+ )
+ req = StatisticsDropRequest(_names=statistics)
+ req.validate()
+ resp = self._rest_client.post(
+ endpoint=self.request_path,
+ json=req,
+ error_handler=STATISTICS_ERROR_HANDLER,
+ )
+ drop_response = DropResponse.from_json(resp.body, infer_missing=True)
+ drop_response.validate()
+
+ return drop_response.dropped()
diff --git a/clients/client-python/gravitino/client/relational_table.py
b/clients/client-python/gravitino/client/relational_table.py
index 5295b1fb84..b35e868969 100644
--- a/clients/client-python/gravitino/client/relational_table.py
+++ b/clients/client-python/gravitino/client/relational_table.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-from typing import Optional, cast
+from typing import Any, Optional, cast
from gravitino.api.audit import Audit
from gravitino.api.metadata_object import MetadataObject
@@ -27,9 +27,15 @@ from gravitino.api.rel.expressions.transforms.transform
import Transform
from gravitino.api.rel.indexes.index import Index
from gravitino.api.rel.partitions.partition import Partition
from gravitino.api.rel.table import Table
+from gravitino.api.stats.statistic import Statistic
+from gravitino.api.stats.statistic_value import StatisticValue
+from gravitino.api.stats.supports_statistics import SupportsStatistics
from gravitino.api.tag.supports_tags import SupportsTags
from gravitino.api.tag.tag import Tag
from gravitino.client.generic_column import GenericColumn
+from gravitino.client.metadata_object_statistics_operations import (
+ MetadataObjectStatisticsOperations,
+)
from gravitino.client.metadata_object_tag_operations import
MetadataObjectTagOperations
from gravitino.dto.rel.partitions.partition_dto import PartitionDTO
from gravitino.dto.rel.table_dto import TableDTO
@@ -51,6 +57,7 @@ from gravitino.utils import HTTPClient
class RelationalTable(
Table,
+ SupportsStatistics,
SupportsTags,
):
"""Represents a relational table."""
@@ -68,6 +75,9 @@ class RelationalTable(
self._object_tag_operations = MetadataObjectTagOperations(
namespace.level(0), table_object, rest_client
)
+ self._object_statistics_operations =
MetadataObjectStatisticsOperations(
+ namespace.level(0), table_object, rest_client
+ )
@staticmethod
def table_full_name(table_ns: Namespace, table_name: str) -> str:
@@ -249,3 +259,15 @@ class RelationalTable(
def supports_tags(self) -> SupportsTags:
return self
+
+ def list_statistics(self) -> list[Statistic]:
+ return self._object_statistics_operations.list_statistics()
+
+ def update_statistics(self, statistics: dict[str, StatisticValue[Any]]) ->
None:
+ self._object_statistics_operations.update_statistics(statistics)
+
+ def drop_statistics(self, statistics: list[str]) -> bool:
+ return self._object_statistics_operations.drop_statistics(statistics)
+
+ def supports_statistics(self) -> SupportsStatistics:
+ return self
diff --git
a/clients/client-python/gravitino/dto/requests/statistics_drop_request.py
b/clients/client-python/gravitino/dto/requests/statistics_drop_request.py
new file mode 100644
index 0000000000..b4a7284736
--- /dev/null
+++ b/clients/client-python/gravitino/dto/requests/statistics_drop_request.py
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+from dataclasses_json import config
+
+from gravitino.rest.rest_message import RESTRequest
+from gravitino.utils.precondition import Precondition
+
+
+@dataclass
+class StatisticsDropRequest(RESTRequest):
+ """Represents a request to drop statistics for specified names."""
+
+ _names: list[str] = field(metadata=config(field_name="names"))
+
+ @property
+ def names(self) -> list[str]:
+ return self._names
+
+ def validate(self) -> None:
+ Precondition.check_argument(
+ self._names is not None and len(self._names) > 0,
+ '"names" must not be null or empty',
+ )
+ for name in self._names:
+ Precondition.check_string_not_empty(
+ name, "Each name must be a non-empty string"
+ )
diff --git
a/clients/client-python/gravitino/dto/requests/statistics_update_request.py
b/clients/client-python/gravitino/dto/requests/statistics_update_request.py
new file mode 100644
index 0000000000..af593ab902
--- /dev/null
+++ b/clients/client-python/gravitino/dto/requests/statistics_update_request.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from dataclasses_json import config
+
+from gravitino.api.stats.json_serdes.statistic_value_serdes import
StatisticValueSerdes
+from gravitino.api.stats.statistic_value import StatisticValue
+from gravitino.rest.rest_message import RESTRequest
+from gravitino.utils.precondition import Precondition
+
+
+@dataclass
+class StatisticsUpdateRequest(RESTRequest):
+ """Represents a request to update statistics."""
+
+ _updates: dict[str, StatisticValue[Any]] = field(
+ metadata=config(
+ field_name="updates",
+ encoder=lambda mapping: {
+ key: StatisticValueSerdes.serialize(value)
+ for key, value in mapping.items()
+ },
+ decoder=lambda mapping: {
+ key: StatisticValueSerdes.deserialize(value)
+ for key, value in mapping.items()
+ },
+ )
+ )
+
+ @property
+ def updates(self) -> dict[str, StatisticValue[Any]]:
+ return self._updates
+
+ def validate(self) -> None:
+ Precondition.check_argument(
+ self._updates is not None and len(self._updates) > 0,
+ '"updates" must not be null or empty',
+ )
+ for name, value in self._updates.items():
+ Precondition.check_string_not_empty(
+ name, 'statistic "name" must not be null or empty'
+ )
+ Precondition.check_argument(
+ value is not None, f"statistic \"value\" for '{name}' must not
be null"
+ )
diff --git
a/clients/client-python/gravitino/dto/responses/statistic_list_response.py
b/clients/client-python/gravitino/dto/responses/statistic_list_response.py
new file mode 100644
index 0000000000..efe8e6afdf
--- /dev/null
+++ b/clients/client-python/gravitino/dto/responses/statistic_list_response.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from dataclasses import dataclass, field
+
+from dataclasses_json import config
+
+from gravitino.dto.responses.base_response import BaseResponse
+from gravitino.dto.stats.statistic_dto import StatisticDTO
+from gravitino.utils.precondition import Precondition
+
+
+@dataclass
+class StatisticListResponse(BaseResponse):
+ """Represents a response containing a list of statistics."""
+
+ _statistics: list[StatisticDTO] =
field(metadata=config(field_name="statistics"))
+
+ @property
+ def statistics(self) -> list[StatisticDTO]:
+ return self._statistics
+
+ def validate(self) -> None:
+ Precondition.check_argument(
+ self._statistics is not None, '"statistics" must not be null'
+ )
+ for statistic in self._statistics:
+ Precondition.check_argument(
+ statistic is not None, '"statistic" must not be null'
+ )
+ statistic.validate()
diff --git a/clients/client-python/gravitino/dto/stats/__init__.py
b/clients/client-python/gravitino/dto/stats/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/clients/client-python/gravitino/dto/stats/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/clients/client-python/gravitino/dto/stats/statistic_dto.py
b/clients/client-python/gravitino/dto/stats/statistic_dto.py
new file mode 100644
index 0000000000..7891e18de3
--- /dev/null
+++ b/clients/client-python/gravitino/dto/stats/statistic_dto.py
@@ -0,0 +1,99 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from dataclasses_json import DataClassJsonMixin, config
+
+from gravitino.api.audit import Audit
+from gravitino.api.stats.json_serdes.statistic_value_serdes import
StatisticValueSerdes
+from gravitino.api.stats.statistic import Statistic
+from gravitino.api.stats.statistic_value import StatisticValue
+from gravitino.dto.audit_dto import AuditDTO
+from gravitino.utils.precondition import Precondition
+
+
+@dataclass
+class StatisticDTO(Statistic, DataClassJsonMixin):
+ """Data transfer object representing a statistic."""
+
+ _name: str = field(metadata=config(field_name="name"))
+ _reserved: bool = field(metadata=config(field_name="reserved"))
+ _modifiable: bool = field(metadata=config(field_name="modifiable"))
+ _audit: AuditDTO = field(metadata=config(field_name="audit"))
+ _value: StatisticValue[Any] | None = field(
+ default=None,
+ metadata=config(
+ field_name="value",
+ encoder=StatisticValueSerdes.serialize,
+ decoder=StatisticValueSerdes.deserialize,
+ exclude=lambda v: v is None,
+ ),
+ )
+
+ def name(self) -> str:
+ return self._name
+
+ def reserved(self) -> bool:
+ return self._reserved
+
+ def modifiable(self) -> bool:
+ return self._modifiable
+
+ def value(self) -> StatisticValue[Any] | None:
+ return self._value
+
+ def audit_info(self) -> Audit:
+ return self._audit
+
+ def validate(self) -> None:
+ """Validates the StatisticDTO.
+
+ Raises:
+ IllegalArgumentException: If any of the required fields are
invalid.
+ """
+ Precondition.check_string_not_empty(
+ self._name, '"name" is required and cannot be empty'
+ )
+ Precondition.check_argument(
+ self._audit is not None,
+ '"audit" information is required and cannot be null',
+ )
+
+ def __hash__(self):
+ return hash(
+ (
+ self._name,
+ self._reserved,
+ self._modifiable,
+ hash(self._audit),
+ hash(self._value),
+ )
+ )
+
+ def __eq__(self, other) -> bool:
+ if not isinstance(other, StatisticDTO):
+ return False
+ return (
+ self._name == other._name
+ and self._reserved == other._reserved
+ and self._modifiable == other._modifiable
+ and self._audit == other._audit
+ and self._value == other._value
+ )
diff --git
a/clients/client-python/gravitino/exceptions/handlers/statistics_error_handler.py
b/clients/client-python/gravitino/exceptions/handlers/statistics_error_handler.py
new file mode 100644
index 0000000000..c2c8a51ab3
--- /dev/null
+++
b/clients/client-python/gravitino/exceptions/handlers/statistics_error_handler.py
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from gravitino.constants.error import ErrorConstants
+from gravitino.dto.responses.error_response import ErrorResponse
+from gravitino.exceptions.base import (
+ CatalogNotInUseException,
+ ForbiddenException,
+ IllegalArgumentException,
+ IllegalStatisticNameException,
+ MetalakeNotInUseException,
+ NoSuchMetadataObjectException,
+ NoSuchSchemaException,
+ NoSuchTableException,
+ NotFoundException,
+ NotInUseException,
+ UnmodifiableStatisticException,
+ UnsupportedOperationException,
+)
+from gravitino.exceptions.handlers.rest_error_handler import RestErrorHandler
+
+
+class StatisticsErrorHandler(RestErrorHandler):
+ def handle(
+ self, error_response: ErrorResponse
+ ): # pylint: disable=too-many-branches
+ error_message = error_response.format_error_message()
+ code = ErrorConstants(error_response.code())
+ exception_type = error_response.type()
+
+ match code:
+ case ErrorConstants.ILLEGAL_ARGUMENTS_CODE:
+ if exception_type == IllegalStatisticNameException.__name__:
+ raise IllegalStatisticNameException(error_message)
+ raise IllegalArgumentException(error_message)
+ case ErrorConstants.NOT_FOUND_CODE:
+ if exception_type == NoSuchSchemaException.__name__:
+ raise NoSuchSchemaException(error_message)
+ if exception_type == NoSuchTableException.__name__:
+ raise NoSuchTableException(error_message)
+ if exception_type == NoSuchMetadataObjectException.__name__:
+ raise NoSuchMetadataObjectException(error_message)
+ raise NotFoundException(error_message)
+ case ErrorConstants.INTERNAL_ERROR_CODE:
+ raise RuntimeError(error_message)
+ case ErrorConstants.UNSUPPORTED_OPERATION_CODE:
+ if exception_type == UnmodifiableStatisticException.__name__:
+ raise UnmodifiableStatisticException(error_message)
+ raise UnsupportedOperationException(error_message)
+ case ErrorConstants.FORBIDDEN_CODE:
+ raise ForbiddenException(error_message)
+ case ErrorConstants.NOT_IN_USE_CODE:
+ if exception_type == CatalogNotInUseException.__name__:
+ raise CatalogNotInUseException(error_message)
+ if exception_type == MetalakeNotInUseException.__name__:
+ raise MetalakeNotInUseException(error_message)
+ raise NotInUseException(error_message)
+ case _:
+ super().handle(error_response)
+
+
+STATISTICS_ERROR_HANDLER = StatisticsErrorHandler()
diff --git a/clients/client-python/tests/integration/test_relational_table.py
b/clients/client-python/tests/integration/test_relational_table.py
index c696cdd374..7edfa138c5 100644
--- a/clients/client-python/tests/integration/test_relational_table.py
+++ b/clients/client-python/tests/integration/test_relational_table.py
@@ -28,6 +28,7 @@ from gravitino import (
from gravitino.api.rel.expressions.literals.literals import Literals
from gravitino.api.rel.partitions.partitions import Partitions
from gravitino.api.rel.types.types import Types
+from gravitino.api.stats.statistic_values import StatisticValues
from gravitino.dto.rel.column_dto import ColumnDTO
from gravitino.dto.rel.partitioning.identity_partitioning_dto import (
IdentityPartitioningDTO,
@@ -148,3 +149,40 @@ class TestRelationalTable(IntegrationTestEnv):
self.assertTrue(result)
partition_names = relational_table.list_partition_names()
self.assertEqual(len(partition_names), 0)
+
+ def test_relational_table_statistics_ops(self):
+ table_stats_ops = self.relational_catalog.load_table(
+ self.TABLE_IDENT
+ ).supports_statistics()
+ stats = {
+ "custom-bool": StatisticValues.boolean_value(True),
+ "custom-long": StatisticValues.long_value(100),
+ "custom-double": StatisticValues.double_value(99.99),
+ "custom-string": StatisticValues.string_value("test_stat"),
+ "custom-list": StatisticValues.list_value(
+ [StatisticValues.string_value("a")]
+ ),
+ "custom-object": StatisticValues.object_value(
+ {"key": StatisticValues.long_value(1)}
+ ),
+ }
+
+ table_stats_ops.update_statistics(stats)
+ listed_stats = table_stats_ops.list_statistics()
+ self.assertEqual(len(listed_stats), len(stats))
+ stat_map = {stat.name(): stat for stat in listed_stats}
+ for expected_key, expected_val in stats.items():
+ self.assertIn(expected_key, stat_map)
+ actual_stat = stat_map[expected_key]
+ self.assertEqual(actual_stat.value().data_type(),
expected_val.data_type())
+ self.assertEqual(actual_stat.value().value(), expected_val.value())
+
+ keys_to_drop = ["custom-bool", "custom-long"]
+ self.assertTrue(table_stats_ops.drop_statistics(keys_to_drop))
+ listed_stats_after_drop = table_stats_ops.list_statistics()
+ self.assertEqual(len(listed_stats_after_drop), len(stats) -
len(keys_to_drop))
+ for key in keys_to_drop:
+ self.assertNotIn(key, [s.name() for s in listed_stats_after_drop])
+ remaining_keys = [k for k in stats if k not in keys_to_drop]
+ self.assertTrue(table_stats_ops.drop_statistics(remaining_keys))
+ self.assertEqual(len(table_stats_ops.list_statistics()), 0)
diff --git
a/clients/client-python/tests/unittests/client/test_metadata_object_statistics_operations.py
b/clients/client-python/tests/unittests/client/test_metadata_object_statistics_operations.py
new file mode 100644
index 0000000000..f854a257a5
--- /dev/null
+++
b/clients/client-python/tests/unittests/client/test_metadata_object_statistics_operations.py
@@ -0,0 +1,113 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+from unittest.mock import patch
+
+from gravitino.api.metadata_object import MetadataObject
+from gravitino.api.metadata_objects import MetadataObjects
+from gravitino.api.stats.statistic_values import StatisticValues
+from gravitino.client.metadata_object_statistics_operations import (
+ MetadataObjectStatisticsOperations,
+)
+from gravitino.dto.audit_dto import AuditDTO
+from gravitino.dto.requests.statistics_drop_request import
StatisticsDropRequest
+from gravitino.dto.requests.statistics_update_request import
StatisticsUpdateRequest
+from gravitino.dto.responses.base_response import BaseResponse
+from gravitino.dto.responses.drop_response import DropResponse
+from gravitino.dto.responses.statistic_list_response import
StatisticListResponse
+from gravitino.dto.stats.statistic_dto import StatisticDTO
+from gravitino.exceptions.handlers.statistics_error_handler import (
+ STATISTICS_ERROR_HANDLER,
+)
+from gravitino.utils import HTTPClient
+from tests.unittests import mock_base
+
+
+class TestMetadataObjectStatisticsOperations(unittest.TestCase):
+ REST_CLIENT = HTTPClient("http://localhost:8090")
+ METALAKE_NAME = "demo_metalake"
+
+ def test_list_statistics(self) -> None:
+ stats_op = MetadataObjectStatisticsOperations(
+ self.METALAKE_NAME,
+ MetadataObjects.of(
+ ["catalog", "schema", "table"], MetadataObject.Type.TABLE
+ ),
+ self.REST_CLIENT,
+ )
+ statistic = StatisticDTO(
+ _name="rowCount",
+ _reserved=False,
+ _modifiable=True,
+ _audit=AuditDTO(_creator="test_creator"),
+ _value=StatisticValues.long_value(100),
+ )
+ resp_body = StatisticListResponse(0, [statistic]).to_json()
+ mock_resp = mock_base.mock_http_response(resp_body)
+ with patch(
+ "gravitino.utils.http_client.HTTPClient.get",
+ return_value=mock_resp,
+ ) as mock_get:
+ result = stats_op.list_statistics()
+ self.assertEqual([statistic], result)
+ mock_get.assert_called_once_with(
+ endpoint=stats_op.request_path,
+ error_handler=STATISTICS_ERROR_HANDLER,
+ )
+
+ def test_update_statistics(self) -> None:
+ stats_op = MetadataObjectStatisticsOperations(
+ self.METALAKE_NAME,
+ MetadataObjects.of(["catalog", "schema"],
MetadataObject.Type.SCHEMA),
+ self.REST_CLIENT,
+ )
+ stats_map = {"rowCount": StatisticValues.long_value(200)}
+ mock_resp = mock_base.mock_http_response(BaseResponse(0).to_json())
+ with patch(
+ "gravitino.utils.http_client.HTTPClient.put",
+ return_value=mock_resp,
+ ) as mock_put:
+ stats_op.update_statistics(stats_map)
+ req = StatisticsUpdateRequest(_updates=stats_map)
+ mock_put.assert_called_once_with(
+ endpoint=stats_op.request_path,
+ json=req,
+ error_handler=STATISTICS_ERROR_HANDLER,
+ )
+
+ def test_drop_statistics(self) -> None:
+ stats_op = MetadataObjectStatisticsOperations(
+ self.METALAKE_NAME,
+ MetadataObjects.of(["catalog"], MetadataObject.Type.CATALOG),
+ self.REST_CLIENT,
+ )
+ names = ["rowCount", "size"]
+ resp_body = DropResponse(_code=0, _dropped=True).to_json()
+ mock_resp = mock_base.mock_http_response(resp_body)
+ with patch(
+ "gravitino.utils.http_client.HTTPClient.post",
+ return_value=mock_resp,
+ ) as mock_post:
+ result = stats_op.drop_statistics(names)
+ self.assertTrue(result)
+ req = StatisticsDropRequest(_names=names)
+ mock_post.assert_called_once_with(
+ endpoint=stats_op.request_path,
+ json=req,
+ error_handler=STATISTICS_ERROR_HANDLER,
+ )
diff --git
a/clients/client-python/tests/unittests/dto/requests/test_statistics_drop_request.py
b/clients/client-python/tests/unittests/dto/requests/test_statistics_drop_request.py
new file mode 100644
index 0000000000..2c087c3089
--- /dev/null
+++
b/clients/client-python/tests/unittests/dto/requests/test_statistics_drop_request.py
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import json
+import unittest
+
+from gravitino.dto.requests.statistics_drop_request import
StatisticsDropRequest
+from gravitino.exceptions.base import IllegalArgumentException
+
+
+class TestStatisticsDropRequest(unittest.TestCase):
+ def test_validate_success(self):
+ names = ["custom-statistic1", "custom-statistic2"]
+ request = StatisticsDropRequest(_names=names)
+ self.assertListEqual(request.names, names)
+ request.validate()
+
+ def test_validate_failure_names(self):
+ request = StatisticsDropRequest(_names=[])
+ with self.assertRaises(IllegalArgumentException) as context:
+ request.validate()
+ self.assertIn('"names" must not be null or empty',
str(context.exception))
+
+ request = StatisticsDropRequest(_names=None)
+ with self.assertRaises(IllegalArgumentException) as context:
+ request.validate()
+ self.assertIn('"names" must not be null or empty',
str(context.exception))
+
+ def test_validate_failure_empty_name_in_list(self):
+ request = StatisticsDropRequest(_names=["statistic1", "",
"statistic2"])
+ with self.assertRaises(IllegalArgumentException) as context:
+ request.validate()
+ self.assertIn("Each name must be a non-empty string",
str(context.exception))
+
+ def test_statistics_drop_request_serialize(self):
+ names = ["custom-statistic1", "custom-statistic2"]
+ request = StatisticsDropRequest(_names=names)
+ expected_json = json.dumps({"names": names})
+ self.assertEqual(expected_json, request.to_json())
+
+ def test_statistics_drop_request_deserialize(self):
+ names = ["custom-statistic1", "custom-statistic2"]
+ json_str = json.dumps({"names": names})
+ request = StatisticsDropRequest.from_json(json_str)
+ self.assertListEqual(names, request.names)
diff --git
a/clients/client-python/tests/unittests/dto/requests/test_statistics_update_request.py
b/clients/client-python/tests/unittests/dto/requests/test_statistics_update_request.py
new file mode 100644
index 0000000000..3260aa48fa
--- /dev/null
+++
b/clients/client-python/tests/unittests/dto/requests/test_statistics_update_request.py
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import json
+import unittest
+
+from gravitino.api.stats.json_serdes.statistic_value_serdes import
StatisticValueSerdes
+from gravitino.api.stats.statistic_values import StatisticValues
+from gravitino.dto.requests.statistics_update_request import
StatisticsUpdateRequest
+from gravitino.exceptions.base import IllegalArgumentException
+
+
+class TestStatisticsUpdateRequest(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls) -> None:
+ cls.test_updates = {
+ "custom-long-statistic": StatisticValues.long_value(100),
+ "custom-str-statistic": StatisticValues.string_value("value"),
+ "custom-list-statistic": StatisticValues.list_value(
+ [StatisticValues.double_value(1.5),
StatisticValues.double_value(2.5)]
+ ),
+ "custom-object-statistic": StatisticValues.object_value(
+ {
+ "key1": StatisticValues.boolean_value(True),
+ "key2": StatisticValues.long_value(42),
+ }
+ ),
+ }
+
+ def test_validate_success(self):
+ request = StatisticsUpdateRequest(_updates=self.test_updates)
+ self.assertDictEqual(request.updates, self.test_updates)
+ request.validate()
+
+ def test_validate_failure_empty_name(self):
+ updates = {"": StatisticValues.long_value(100)}
+ request = StatisticsUpdateRequest(_updates=updates)
+ with self.assertRaisesRegex(
+ IllegalArgumentException, 'statistic "name" must not be null or
empty'
+ ):
+ request.validate()
+
+ def test_validate_failure_null_value(self):
+ updates = {"custom-null-value": None}
+ request = StatisticsUpdateRequest(_updates=updates)
+ with self.assertRaisesRegex(
+ IllegalArgumentException,
+ "statistic \"value\" for 'custom-null-value' must not be null",
+ ):
+ request.validate()
+
+ def test_statistics_update_request_serialize(self):
+ request = StatisticsUpdateRequest(_updates=self.test_updates)
+ expected_json = json.dumps(
+ {
+ "updates": {
+ key: StatisticValueSerdes.serialize(value)
+ for key, value in self.test_updates.items()
+ }
+ }
+ )
+ self.assertEqual(request.to_json(), expected_json)
+
+ def test_statistics_update_request_deserialize(self):
+ json_str = json.dumps(
+ {
+ "updates": {
+ key: StatisticValueSerdes.serialize(value)
+ for key, value in self.test_updates.items()
+ }
+ }
+ )
+ request = StatisticsUpdateRequest.from_json(json_str)
+ self.assertDictEqual(request.updates, self.test_updates)
+
+ def test_validate_failure_null_or_empty_updates(self):
+ invalid_updates = (None, {})
+ for updates in invalid_updates:
+ request = StatisticsUpdateRequest(_updates=updates)
+ with self.assertRaisesRegex(
+ IllegalArgumentException, '"updates" must not be null or empty'
+ ):
+ request.validate()
diff --git
a/clients/client-python/tests/unittests/dto/responses/test_statistic_list_response.py
b/clients/client-python/tests/unittests/dto/responses/test_statistic_list_response.py
new file mode 100644
index 0000000000..8c8b458019
--- /dev/null
+++
b/clients/client-python/tests/unittests/dto/responses/test_statistic_list_response.py
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import random
+import unittest
+
+from gravitino.api.stats.statistic_values import StatisticValues
+from gravitino.dto.audit_dto import AuditDTO
+from gravitino.dto.responses.statistic_list_response import
StatisticListResponse
+from gravitino.dto.stats.statistic_dto import StatisticDTO
+
+
+class TestStatisticListResponse(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls) -> None:
+ cls.str_statistic_dto = StatisticDTO(
+ _name="custom-str-statistic",
+ _reserved=True,
+ _modifiable=False,
+ _audit=AuditDTO(_creator="test_creator"),
+ _value=StatisticValues.string_value("test_value"),
+ )
+ cls.list_statistic_dto = StatisticDTO(
+ _name="custom-list-statistic",
+ _reserved=True,
+ _modifiable=False,
+ _audit=AuditDTO(_creator="test_creator"),
+ _value=StatisticValues.list_value(
+ [StatisticValues.long_value(random.randint(1, 100)) for _ in
range(3)]
+ ),
+ )
+ cls.object_statistic_dto = StatisticDTO(
+ _name="custom-object-statistic",
+ _reserved=True,
+ _modifiable=False,
+ _audit=AuditDTO(_creator="test_creator"),
+ _value=StatisticValues.object_value(
+ {
+ "custom-double-statistic": StatisticValues.double_value(
+ random.uniform(1.0, 100.0)
+ ),
+ "custom-boolean-statistic": StatisticValues.boolean_value(
+ random.choice([True, False])
+ ),
+ }
+ ),
+ )
+ cls.statistics = [
+ cls.str_statistic_dto,
+ cls.list_statistic_dto,
+ cls.object_statistic_dto,
+ ]
+
+ def test_statistic_list_response(self):
+ response = StatisticListResponse(_code=0, _statistics=self.statistics)
+ response.validate()
+ statistics = response.statistics
+ self.assertEqual(len(response.statistics), 3)
+ self.assertIn(self.str_statistic_dto, statistics)
+ self.assertIn(self.list_statistic_dto, statistics)
+ self.assertIn(self.object_statistic_dto, statistics)
diff --git a/clients/client-python/tests/unittests/dto/stats/__init__.py
b/clients/client-python/tests/unittests/dto/stats/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/clients/client-python/tests/unittests/dto/stats/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git
a/clients/client-python/tests/unittests/dto/stats/test_statistic_dto.py
b/clients/client-python/tests/unittests/dto/stats/test_statistic_dto.py
new file mode 100644
index 0000000000..74e7bebb92
--- /dev/null
+++ b/clients/client-python/tests/unittests/dto/stats/test_statistic_dto.py
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import json
+import unittest
+
+from gravitino.api.stats.statistic_values import StatisticValues
+from gravitino.dto.audit_dto import AuditDTO
+from gravitino.dto.stats.statistic_dto import StatisticDTO
+
+
+class TestStatisticDTOSerDes(unittest.TestCase):
+ def test_statistic_dto_serdes_without_value(self):
+ statistic_dto = StatisticDTO(
+ _name="test_statistic",
+ _reserved=True,
+ _modifiable=False,
+ _audit=AuditDTO(_creator="test_creator"),
+ )
+ statistic_dto.validate()
+ json_str = statistic_dto.to_json()
+ json_obj = json.loads(json_str)
+ self.assertEqual(json_obj["name"], statistic_dto.name())
+ self.assertEqual(json_obj["reserved"], statistic_dto.reserved())
+ self.assertEqual(json_obj["modifiable"], statistic_dto.modifiable())
+ self.assertEqual(
+ json_obj["audit"]["creator"], statistic_dto.audit_info().creator()
+ )
+ self.assertNotIn("value", json_obj)
+
+ deserialized_statistic_dto = StatisticDTO.from_json(json_str)
+ self.assertEqual(deserialized_statistic_dto, statistic_dto)
+
+ def test_statistic_dto_serdes_with_value(self):
+ statistic_dto = StatisticDTO(
+ _name="test_statistic_with_value",
+ _reserved=False,
+ _modifiable=True,
+ _audit=AuditDTO(_creator="test_creator"),
+ _value=StatisticValues.list_value(
+ [StatisticValues.long_value(v) for v in range(5)]
+ ),
+ )
+ json_str = statistic_dto.to_json()
+ json_obj = json.loads(json_str)
+ self.assertEqual(json_obj["name"], statistic_dto.name())
+ self.assertEqual(json_obj["reserved"], statistic_dto.reserved())
+ self.assertEqual(json_obj["modifiable"], statistic_dto.modifiable())
+ self.assertEqual(
+ json_obj["audit"]["creator"], statistic_dto.audit_info().creator()
+ )
+ self.assertEqual(json_obj["value"], list(range(5)))
+
+ deserialized_statistic_dto = StatisticDTO.from_json(json_str)
+ self.assertEqual(deserialized_statistic_dto, statistic_dto)
diff --git a/clients/client-python/tests/unittests/test_relational_table.py
b/clients/client-python/tests/unittests/test_relational_table.py
index f4c34da00d..056b1e7c53 100644
--- a/clients/client-python/tests/unittests/test_relational_table.py
+++ b/clients/client-python/tests/unittests/test_relational_table.py
@@ -28,6 +28,7 @@ from gravitino.api.rel.expressions.sorts.sort_direction
import SortDirection
from gravitino.api.rel.expressions.transforms.transforms import Transforms
from gravitino.api.rel.indexes.index import Index
from gravitino.api.rel.partitions.partitions import Partitions
+from gravitino.api.stats.supports_statistics import SupportsStatistics
from gravitino.api.tag.supports_tags import SupportsTags
from gravitino.client.generic_column import GenericColumn
from gravitino.client.relational_table import RelationalTable
@@ -348,3 +349,24 @@ class TestRelationalTable(unittest.TestCase):
for method in expected_methods
)
)
+
+ def test_extends_supports_statistics_class(self) -> None:
+ table_dto =
TableDTO.from_json(TestRelationalTable.TABLE_DTO_JSON_STRING)
+ namespace = Namespace.of("metalake_demo", "test_catalog",
"test_schema")
+ rest_client = HTTPClient("http://localhost:8090")
+ relational_table = RelationalTable(namespace, table_dto, rest_client)
+
+ self.assertTrue(
+ issubclass(
+ RelationalTable,
+ SupportsStatistics,
+ )
+ )
+ expected_methods = ["list_statistics", "update_statistics",
"drop_statistics"]
+
+ self.assertTrue(
+ all(
+ callable(getattr(relational_table, method, None))
+ for method in expected_methods
+ )
+ )