Copilot commented on code in PR #9943: URL: https://github.com/apache/gravitino/pull/9943#discussion_r2791227776
########## clients/client-python/gravitino/api/function/function_impl.py: ########## @@ -0,0 +1,113 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC +from enum import Enum +from typing import Dict, Optional + +from gravitino.api.function.function_resources import FunctionResources + + +class FunctionImpl(ABC): + """Base class of function implementations. + + A function implementation must declare its language and optional external resources. + Concrete implementations are provided by SQLImpl, JavaImpl, and PythonImpl. + """ + + class Language(Enum): + """Supported implementation languages.""" + + SQL = "SQL" + """SQL implementation.""" + + JAVA = "JAVA" + """Java implementation.""" + + PYTHON = "PYTHON" + """Python implementation.""" + + class RuntimeType(Enum): + """Supported execution runtimes for function implementations.""" + + SPARK = "SPARK" + """Spark runtime.""" + + TRINO = "TRINO" + """Trino runtime.""" + + @classmethod + def from_string(cls, value: str) -> "FunctionImpl.RuntimeType": + """Parse a runtime value from string. + + Args: + value: Runtime name. + + Returns: + Parsed runtime. + + Raises: + ValueError: If the runtime is not supported. + """ + if not value or not value.strip(): + raise ValueError("Function runtime must be set") + return cls[value.strip().upper()] Review Comment: `RuntimeType.from_string()` claims it raises `ValueError` for unsupported runtimes, but `cls[value.strip().upper()]` will raise `KeyError` for unknown values. Please catch `KeyError` and raise a `ValueError` with a clear message so callers get the documented exception type. ```suggestion try: return cls[value.strip().upper()] except KeyError as exc: raise ValueError(f"Unsupported function runtime: {value}") from exc ``` ########## clients/client-python/gravitino/api/function/function_definition.py: ########## @@ -0,0 +1,181 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC, abstractmethod +from typing import List, Optional + +from gravitino.api.function.function_column import FunctionColumn +from gravitino.api.function.function_impl import FunctionImpl +from gravitino.api.function.function_param import FunctionParam +from gravitino.api.rel.types.type import Type + + +class FunctionDefinition(ABC): + """A function definition that pairs a specific parameter list with its implementations. + + A single function can include multiple definitions (overloads), each with distinct + parameters, return types, and implementations. + + For scalar or aggregate functions, use return_type() to specify the return type. + For table-valued functions, use return_columns() to specify the output columns. + """ + + EMPTY_COLUMNS: List[FunctionColumn] = [] + """An empty list of FunctionColumn.""" + + @abstractmethod + def parameters(self) -> List[FunctionParam]: + """Returns the parameters for this definition. + + Returns: + The parameters for this definition. May be an empty list for a no-arg definition. + """ + pass + + def return_type(self) -> Optional[Type]: + """The return type for scalar or aggregate function definitions. + + Returns: + The return type, or None if this is a table-valued function definition. + """ + return None + + def return_columns(self) -> List[FunctionColumn]: + """The output columns for a table-valued function definition. + + A table-valued function is a function that returns a table instead of a + scalar value or an aggregate result. The returned table has a fixed schema + defined by the columns returned from this method. + + Returns: + The output columns that define the schema of the table returned by + this definition, or an empty list if this is a scalar or aggregate + function definition. + """ + return self.EMPTY_COLUMNS + + @abstractmethod + def impls(self) -> List[FunctionImpl]: + """Returns the implementations associated with this definition.""" + pass + + +class FunctionDefinitions: + """Factory class for creating FunctionDefinition instances.""" + + class SimpleFunctionDefinition(FunctionDefinition): + """Simple implementation of FunctionDefinition.""" + + def __init__( + self, + parameters: List[FunctionParam], + return_type: Optional[Type], + return_columns: Optional[List[FunctionColumn]], + impls: List[FunctionImpl], + ): + self._parameters = list(parameters) if parameters else [] + self._return_type = return_type + self._return_columns = ( + list(return_columns) + if return_columns + else FunctionDefinition.EMPTY_COLUMNS + ) + if not impls: + raise ValueError("Impls cannot be null or empty") + self._impls = list(impls) + + def parameters(self) -> List[FunctionParam]: + return list(self._parameters) + + def return_type(self) -> Optional[Type]: + return self._return_type + + def return_columns(self) -> List[FunctionColumn]: + return ( + list(self._return_columns) + if self._return_columns + else FunctionDefinition.EMPTY_COLUMNS + ) + + def impls(self) -> List[FunctionImpl]: + return list(self._impls) + + def __eq__(self, other) -> bool: + if not isinstance(other, FunctionDefinition): + return False + return ( + self._parameters == list(other.parameters()) + and self._return_type == other.return_type() + and self._return_columns == list(other.return_columns()) + and self._impls == list(other.impls()) + ) + + def __hash__(self) -> int: + return hash( + ( + tuple(self._parameters), + self._return_type, + tuple(self._return_columns) if self._return_columns else None, + tuple(self._impls), + ) + ) + + def __repr__(self) -> str: + return ( + f"FunctionDefinition(parameters={self._parameters}, " + f"returnType={self._return_type}, " + f"returnColumns={self._return_columns}, " + f"impls={self._impls})" + ) + + @classmethod + def of( + cls, + parameters: List[FunctionParam], + return_type: Type, + impls: List[FunctionImpl], + ) -> FunctionDefinition: + """Create a FunctionDefinition instance for a scalar or aggregate function. + + Args: + parameters: The parameters for this definition, may be empty. + return_type: The return type for this definition, must not be None. + impls: The implementations for this definition, must not be empty. + + Returns: + A FunctionDefinition instance. + """ Review Comment: `FunctionDefinitions.of()` documents that `return_type` must not be `None`, but there’s no validation enforcing that. As written, callers can create a scalar/aggregate `FunctionDefinition` with `return_type=None`, producing an invalid definition. Please validate `return_type` (and fail fast with a clear exception) before constructing the definition. ```suggestion """ if return_type is None: raise ValueError("return_type must not be None") ``` ########## clients/client-python/gravitino/api/function/function_change.py: ########## @@ -0,0 +1,318 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC +from typing import List + +from gravitino.api.function.function_definition import FunctionDefinition +from gravitino.api.function.function_impl import FunctionImpl +from gravitino.api.function.function_param import FunctionParam + + +class FunctionChange(ABC): + """Represents a change that can be applied to a function.""" + + EMPTY_PARAMS: List[FunctionParam] = [] + """An empty list of parameters.""" + + @staticmethod + def update_comment(new_comment: str) -> "FunctionChange": + """Create a FunctionChange to update the comment of a function. + + Args: + new_comment: The new comment value. + + Returns: + The change instance. + """ + return UpdateComment(new_comment) + + @staticmethod + def add_definition(definition: FunctionDefinition) -> "FunctionChange": + """Create a FunctionChange to add a new definition (overload) to a function. + + Args: + definition: The new definition to add. + + Returns: + The change instance. + """ + return AddDefinition(definition) + + @staticmethod + def remove_definition(parameters: List[FunctionParam]) -> "FunctionChange": + """Create a FunctionChange to remove an existing definition from a function. + + Args: + parameters: The parameters that identify the definition to remove. + + Returns: + The change instance. + """ + return RemoveDefinition(parameters) + + @staticmethod + def add_impl( + parameters: List[FunctionParam], implementation: FunctionImpl + ) -> "FunctionChange": + """Create a FunctionChange to add an implementation to a specific definition. + + Args: + parameters: The parameters that identify the definition to update. + implementation: The implementation to add. + + Returns: + The change instance. + """ + return AddImpl(parameters, implementation) + + @staticmethod + def update_impl( + parameters: List[FunctionParam], + runtime: FunctionImpl.RuntimeType, + implementation: FunctionImpl, + ) -> "FunctionChange": + """Create a FunctionChange to update an implementation for a specific definition. + + Args: + parameters: The parameters that identify the definition to update. + runtime: The runtime that identifies the implementation to replace. + implementation: The new implementation. + + Returns: + The change instance. + """ + return UpdateImpl(parameters, runtime, implementation) + + @staticmethod + def remove_impl( + parameters: List[FunctionParam], runtime: FunctionImpl.RuntimeType + ) -> "FunctionChange": + """Create a FunctionChange to remove an implementation for a specific definition. + + Args: + parameters: The parameters that identify the definition to update. + runtime: The runtime that identifies the implementation to remove. + + Returns: + The change instance. + """ + return RemoveImpl(parameters, runtime) + Review Comment: PR description says `FunctionChange` supports rename, set/remove properties, and add/remove/update definitions *and parameters*, but the implementation here only includes update_comment and definition/impl add/update/remove. Either implement the missing change types or update the PR description to match what’s actually being delivered so reviewers/users aren’t misled. ########## clients/client-python/gravitino/api/function/function_definition.py: ########## @@ -0,0 +1,181 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC, abstractmethod +from typing import List, Optional + +from gravitino.api.function.function_column import FunctionColumn +from gravitino.api.function.function_impl import FunctionImpl +from gravitino.api.function.function_param import FunctionParam +from gravitino.api.rel.types.type import Type + + +class FunctionDefinition(ABC): + """A function definition that pairs a specific parameter list with its implementations. + + A single function can include multiple definitions (overloads), each with distinct + parameters, return types, and implementations. + + For scalar or aggregate functions, use return_type() to specify the return type. + For table-valued functions, use return_columns() to specify the output columns. + """ + + EMPTY_COLUMNS: List[FunctionColumn] = [] + """An empty list of FunctionColumn.""" + + @abstractmethod + def parameters(self) -> List[FunctionParam]: + """Returns the parameters for this definition. + + Returns: + The parameters for this definition. May be an empty list for a no-arg definition. + """ + pass + + def return_type(self) -> Optional[Type]: + """The return type for scalar or aggregate function definitions. + + Returns: + The return type, or None if this is a table-valued function definition. + """ + return None + + def return_columns(self) -> List[FunctionColumn]: + """The output columns for a table-valued function definition. + + A table-valued function is a function that returns a table instead of a + scalar value or an aggregate result. The returned table has a fixed schema + defined by the columns returned from this method. + + Returns: + The output columns that define the schema of the table returned by + this definition, or an empty list if this is a scalar or aggregate + function definition. + """ + return self.EMPTY_COLUMNS + + @abstractmethod + def impls(self) -> List[FunctionImpl]: + """Returns the implementations associated with this definition.""" + pass + + +class FunctionDefinitions: + """Factory class for creating FunctionDefinition instances.""" + + class SimpleFunctionDefinition(FunctionDefinition): + """Simple implementation of FunctionDefinition.""" + + def __init__( + self, + parameters: List[FunctionParam], + return_type: Optional[Type], + return_columns: Optional[List[FunctionColumn]], + impls: List[FunctionImpl], + ): + self._parameters = list(parameters) if parameters else [] + self._return_type = return_type + self._return_columns = ( + list(return_columns) + if return_columns + else FunctionDefinition.EMPTY_COLUMNS + ) + if not impls: + raise ValueError("Impls cannot be null or empty") + self._impls = list(impls) + + def parameters(self) -> List[FunctionParam]: + return list(self._parameters) + + def return_type(self) -> Optional[Type]: + return self._return_type + + def return_columns(self) -> List[FunctionColumn]: + return ( + list(self._return_columns) + if self._return_columns + else FunctionDefinition.EMPTY_COLUMNS + ) + + def impls(self) -> List[FunctionImpl]: + return list(self._impls) + + def __eq__(self, other) -> bool: + if not isinstance(other, FunctionDefinition): + return False + return ( + self._parameters == list(other.parameters()) + and self._return_type == other.return_type() + and self._return_columns == list(other.return_columns()) + and self._impls == list(other.impls()) + ) + + def __hash__(self) -> int: + return hash( + ( + tuple(self._parameters), + self._return_type, + tuple(self._return_columns) if self._return_columns else None, + tuple(self._impls), + ) + ) + + def __repr__(self) -> str: + return ( + f"FunctionDefinition(parameters={self._parameters}, " + f"returnType={self._return_type}, " + f"returnColumns={self._return_columns}, " + f"impls={self._impls})" + ) + + @classmethod + def of( + cls, + parameters: List[FunctionParam], + return_type: Type, + impls: List[FunctionImpl], + ) -> FunctionDefinition: + """Create a FunctionDefinition instance for a scalar or aggregate function. + + Args: + parameters: The parameters for this definition, may be empty. + return_type: The return type for this definition, must not be None. + impls: The implementations for this definition, must not be empty. + + Returns: + A FunctionDefinition instance. + """ + return cls.SimpleFunctionDefinition(parameters, return_type, None, impls) + + @classmethod + def of_table( + cls, + parameters: List[FunctionParam], + return_columns: List[FunctionColumn], + impls: List[FunctionImpl], + ) -> FunctionDefinition: + """Create a FunctionDefinition instance for a table-valued function. + + Args: + parameters: The parameters for this definition, may be empty. + return_columns: The return columns for this definition, must not be empty. + impls: The implementations for this definition, must not be empty. + + Returns: + A FunctionDefinition instance. + """ Review Comment: `FunctionDefinitions.of_table()` documents that `return_columns` must not be empty for table-valued functions, but the implementation doesn’t validate this and will happily create a table function definition with no output schema. Please enforce non-empty `return_columns` (and likely non-`None`) to prevent invalid table function definitions. ```suggestion """ if return_columns is None or not return_columns: raise ValueError( "return_columns must not be None or empty for table-valued function " "definitions" ) if impls is None or not impls: raise ValueError( "impls must not be None or empty for table-valued function definitions" ) ``` ########## clients/client-python/gravitino/api/function/function_definition.py: ########## @@ -0,0 +1,181 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC, abstractmethod +from typing import List, Optional + +from gravitino.api.function.function_column import FunctionColumn +from gravitino.api.function.function_impl import FunctionImpl +from gravitino.api.function.function_param import FunctionParam +from gravitino.api.rel.types.type import Type + + +class FunctionDefinition(ABC): + """A function definition that pairs a specific parameter list with its implementations. + + A single function can include multiple definitions (overloads), each with distinct + parameters, return types, and implementations. + + For scalar or aggregate functions, use return_type() to specify the return type. + For table-valued functions, use return_columns() to specify the output columns. + """ + + EMPTY_COLUMNS: List[FunctionColumn] = [] + """An empty list of FunctionColumn.""" + + @abstractmethod + def parameters(self) -> List[FunctionParam]: + """Returns the parameters for this definition. + + Returns: + The parameters for this definition. May be an empty list for a no-arg definition. + """ + pass + + def return_type(self) -> Optional[Type]: + """The return type for scalar or aggregate function definitions. + + Returns: + The return type, or None if this is a table-valued function definition. + """ + return None + + def return_columns(self) -> List[FunctionColumn]: + """The output columns for a table-valued function definition. + + A table-valued function is a function that returns a table instead of a + scalar value or an aggregate result. The returned table has a fixed schema + defined by the columns returned from this method. + + Returns: + The output columns that define the schema of the table returned by + this definition, or an empty list if this is a scalar or aggregate + function definition. + """ + return self.EMPTY_COLUMNS + + @abstractmethod + def impls(self) -> List[FunctionImpl]: + """Returns the implementations associated with this definition.""" + pass + + +class FunctionDefinitions: + """Factory class for creating FunctionDefinition instances.""" + + class SimpleFunctionDefinition(FunctionDefinition): + """Simple implementation of FunctionDefinition.""" + + def __init__( + self, + parameters: List[FunctionParam], + return_type: Optional[Type], + return_columns: Optional[List[FunctionColumn]], + impls: List[FunctionImpl], + ): + self._parameters = list(parameters) if parameters else [] + self._return_type = return_type + self._return_columns = ( + list(return_columns) + if return_columns + else FunctionDefinition.EMPTY_COLUMNS + ) + if not impls: + raise ValueError("Impls cannot be null or empty") + self._impls = list(impls) Review Comment: `SimpleFunctionDefinition` allows constructing a definition with both `return_type` and `return_columns` set (or neither set), even though the class docstring states scalar/aggregate definitions should use `return_type()` and table-valued ones should use `return_columns()`. Consider validating this invariant in the constructor (exactly one of `return_type` vs non-empty `return_columns`) to prevent creating invalid definitions. ########## clients/client-python/gravitino/api/function/function_param.py: ########## @@ -0,0 +1,121 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABC, abstractmethod +from typing import Optional + +from gravitino.api.rel.expressions.expression import Expression +from gravitino.api.rel.types.type import Type + + +class FunctionParam(ABC): + """Represents a function parameter.""" + + @abstractmethod + def name(self) -> str: + """Returns the name of the parameter.""" + pass + + @abstractmethod + def data_type(self) -> Type: + """Returns the data type of the parameter.""" + pass + + def comment(self) -> Optional[str]: + """Returns the optional comment of the parameter, None if not provided.""" + return None + + def default_value(self) -> Optional[Expression]: + """Returns the default value of the parameter if provided, otherwise None.""" + return None + + +class FunctionParams: + """Factory class for creating FunctionParam instances.""" + + class SimpleFunctionParam(FunctionParam): + """Simple implementation of FunctionParam.""" + + def __init__( + self, + name: str, + data_type: Type, + comment: Optional[str] = None, + default_value: Optional[Expression] = None, + ): + if not name or not name.strip(): + raise ValueError("Parameter name cannot be null or empty") + self._name = name + + if data_type is None: + raise ValueError("Parameter data type cannot be null") + self._data_type = data_type + + self._comment = comment + self._default_value = default_value + + def name(self) -> str: + return self._name + + def data_type(self) -> Type: + return self._data_type + + def comment(self) -> Optional[str]: + return self._comment + + def default_value(self) -> Optional[Expression]: + return self._default_value + + def __eq__(self, other) -> bool: + if not isinstance(other, FunctionParam): + return False + return ( + self._name == other.name() + and self._data_type == other.data_type() + and self._comment == other.comment() + and self._default_value == other.default_value() + ) + + def __hash__(self) -> int: + return hash((self._name, self._data_type, self._comment)) Review Comment: `SimpleFunctionParam.__eq__()` compares `default_value`, but `__hash__()` does not include it. This breaks the hash/equals contract and can lead to incorrect behavior when parameters are used as dict keys or placed in sets. Either include `default_value` in the hash (if it’s hashable) or remove/disable `__hash__` so the object is unhashable. ```suggestion return hash( (self._name, self._data_type, self._comment, self._default_value) ) ``` ########## clients/client-python/gravitino/api/function/function_type.py: ########## @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from enum import Enum + + +class FunctionType(Enum): + """Function type supported by Gravitino.""" + + SCALAR = "scalar" + """Scalar function that returns a single value per row.""" + + AGGREGATE = "aggregate" + """Aggregate function that combines multiple rows into a single value.""" + + TABLE = "table" + """Table-valued function that returns a table of rows.""" + + @classmethod + def from_string(cls, type_str: str) -> "FunctionType": + """Parse the function type from a string value. + + Args: + type_str: The string to parse. + + Returns: + The parsed FunctionType. + + Raises: + ValueError: If the value cannot be parsed. + """ + if not type_str: + raise ValueError("Function type cannot be null or empty") + + type_lower = type_str.lower() + if type_lower == "agg": + return cls.AGGREGATE + + for func_type in cls: + if func_type.value == type_lower: + return func_type + + raise ValueError(f"Invalid function type: {type_str}") Review Comment: This PR adds new public function API model types (e.g., `FunctionType.from_string`, `RuntimeType.from_string`, equality/hash behavior) but doesn’t add any unit tests. The repo has extensive Python unit tests already, so these new parsing/validation paths should be covered (including invalid inputs and edge cases) to prevent regressions. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
