Xiao-zhen-Liu commented on code in PR #5603:
URL: https://github.com/apache/texera/pull/5603#discussion_r3406776331
##########
amber/src/main/python/core/models/schema/attribute_type.py:
##########
@@ -78,6 +81,53 @@ class AttributeType(Enum):
}
+FROM_STRING_PARSER_MAPPING = {
+ AttributeType.STRING: str,
+ AttributeType.INT: lambda v: (
+ 0 if v is None or (isinstance(v, str) and v.strip() == "") else int(v)
+ ),
+ AttributeType.LONG: lambda v: (
+ 0 if v is None or (isinstance(v, str) and v.strip() == "") else int(v)
+ ),
+ AttributeType.DOUBLE: lambda v: (
+ 0.0 if v is None or (isinstance(v, str) and v.strip() == "") else
float(v)
+ ),
+ AttributeType.BOOL: lambda v: (
+ False
+ if v is None or (isinstance(v, str) and v.strip() == "")
+ else (
+ True
+ if str(v).strip().lower() == "true"
+ else (
+ False
+ if str(v).strip().lower() == "false"
+ else float(str(v).strip()) != 0
+ )
+ )
+ ),
+ AttributeType.BINARY: lambda v: (
+ (_ for _ in ()).throw(
+ ValueError(
+ "UiParameter does not support BINARY values. "
+ "Use a supported type instead."
+ )
+ )
+ ),
+ AttributeType.TIMESTAMP: lambda v: (
+ datetime.datetime.fromtimestamp(0)
Review Comment:
For an empty value this returns the 1970 start time in *local* time, so the
result shifts with the server's timezone. Use `datetime(1970, 1, 1,
tzinfo=timezone.utc)` (or `None`) so it's the same everywhere.
##########
amber/src/main/python/core/models/schema/attribute_type.py:
##########
@@ -78,6 +81,53 @@ class AttributeType(Enum):
}
+FROM_STRING_PARSER_MAPPING = {
+ AttributeType.STRING: str,
+ AttributeType.INT: lambda v: (
+ 0 if v is None or (isinstance(v, str) and v.strip() == "") else int(v)
+ ),
+ AttributeType.LONG: lambda v: (
+ 0 if v is None or (isinstance(v, str) and v.strip() == "") else int(v)
+ ),
+ AttributeType.DOUBLE: lambda v: (
+ 0.0 if v is None or (isinstance(v, str) and v.strip() == "") else
float(v)
+ ),
+ AttributeType.BOOL: lambda v: (
Review Comment:
This nested one-line if/else is hard to read. Pull it into a small
`_parse_bool(v)` helper with plain `if/elif`.
##########
amber/src/main/python/pytexera/udf/udf_operator.py:
##########
@@ -16,12 +16,127 @@
# under the License.
from abc import abstractmethod
-from typing import Iterator, Optional, Union
+from dataclasses import dataclass
+import functools
+from typing import Any, Dict, Iterator, Optional, Union
from pyamber import *
+from core.models.schema.attribute_type import AttributeType,
FROM_STRING_PARSER_MAPPING
-class UDFOperatorV2(TupleOperatorV2):
+@dataclass(frozen=True)
+class _UiParameterValue:
+ name: str
+ type: AttributeType
+ value: Any
+
+
+class _UiParameterSupport:
+ _ui_parameter_injected_values: Dict[str, Any]
+ _ui_parameter_name_types: Dict[str, AttributeType]
+ _unsupported_ui_parameter_types = {
+ AttributeType.BINARY,
+ AttributeType.LARGE_BINARY,
+ }
+
+ # Reserved hook name. Backend injector will generate this in the user's
class.
+ def _texera_injected_ui_parameters(self) -> Dict[str, Any]:
+ return {}
+
+ def _ensure_ui_parameter_state(self) -> None:
+ if "_ui_parameter_injected_values" not in self.__dict__:
+ self._ui_parameter_injected_values = {}
+ if "_ui_parameter_name_types" not in self.__dict__:
+ self._ui_parameter_name_types = {}
+
+ def _texera_apply_injected_ui_parameters(self) -> None:
+ self._ensure_ui_parameter_state()
+ values = self._texera_injected_ui_parameters()
+ self._ui_parameter_injected_values = dict(values or {})
+ self._ui_parameter_name_types = {}
+
+ def __init_subclass__(cls, **kwargs):
+ super().__init_subclass__(**kwargs)
+
+ # Wrap the effective open() method once per subclass.
+ original_open = getattr(cls, "open", None)
+ if original_open is None:
+ return
+
+ # Avoid double wrapping
+ if getattr(original_open, "__texera_ui_params_wrapped__", False):
+ return
+
+ @functools.wraps(original_open)
+ def wrapped_open(self, *args, **kwargs):
+ self._texera_apply_injected_ui_parameters()
+ return original_open(self, *args, **kwargs)
+
+ setattr(wrapped_open, "__texera_ui_params_wrapped__", True)
+ cls.open = wrapped_open
+
+ def UiParameter(
+ self, name: str, attr_type: Optional[AttributeType] = None, **kwargs:
Any
+ ) -> _UiParameterValue:
+ if "type" in kwargs:
+ if attr_type is not None:
+ raise TypeError("UiParameter.type was provided multiple
times.")
+ attr_type = kwargs.pop("type")
+
+ if kwargs:
+ unexpected_arguments = ", ".join(sorted(kwargs))
+ raise TypeError(
+ f"UiParameter got unexpected keyword argument(s): "
+ f"{unexpected_arguments}."
+ )
+
+ if attr_type is None:
+ raise TypeError("UiParameter.type is required.")
+
+ if not isinstance(attr_type, AttributeType):
+ raise TypeError(
+ f"UiParameter.type must be an AttributeType, got
{attr_type!r}."
+ )
+
+ self._ensure_ui_parameter_state()
+ existing_type = self._ui_parameter_name_types.get(name)
+ if existing_type is not None and existing_type != attr_type:
+ raise ValueError(
+ f"Duplicate UiParameter name '{name}' with conflicting types: "
+ f"{existing_type.name} vs {attr_type.name}."
+ )
+
+ self._ui_parameter_name_types[name] = attr_type
+ raw_value = self._ui_parameter_injected_values.get(name)
Review Comment:
A misspelled name (or generated code that never ran) just returns `None`
with no warning. Consider warning when a name isn't found, or when a sent value
is never used.
##########
amber/src/test/python/pytexera/udf/test_udf_operator.py:
##########
@@ -0,0 +1,194 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime
+from typing import Iterator, Optional
+
+import pytest
+
+from pytexera import AttributeType, Tuple, TupleLike, UDFOperatorV2
+from pytexera.udf.udf_operator import _UiParameterSupport
+
+
+class InjectedParametersOperator(UDFOperatorV2):
+ def _texera_injected_ui_parameters(self):
+ return {
+ "count": "7",
+ "enabled": "1",
+ "created_at": "2024-01-01T00:00:00",
+ }
+
+ def open(self):
+ self.count_parameter = self.UiParameter("count", AttributeType.INT)
+ self.enabled_parameter = self.UiParameter(
+ name="enabled", type=AttributeType.BOOL
+ )
+ self.created_at_parameter = self.UiParameter(
+ "created_at", type=AttributeType.TIMESTAMP
+ )
+
+ def process_tuple(self, tuple_: Tuple, port: int) ->
Iterator[Optional[TupleLike]]:
+ yield tuple_
+
+
+class ConflictingParameterOperator(UDFOperatorV2):
+ def _texera_injected_ui_parameters(self):
+ return {"duplicate": "1"}
+
+ def open(self):
+ self.UiParameter("duplicate", AttributeType.INT)
+ self.UiParameter("duplicate", AttributeType.STRING)
+
+ def process_tuple(self, tuple_: Tuple, port: int) ->
Iterator[Optional[TupleLike]]:
+ yield tuple_
+
+
+class FirstIndependentParameterOperator(UDFOperatorV2):
+ def _texera_injected_ui_parameters(self):
+ return {"count": "1"}
+
+ def open(self):
+ self.count_parameter = self.UiParameter("count", AttributeType.INT)
+
+ def process_tuple(self, tuple_: Tuple, port: int) ->
Iterator[Optional[TupleLike]]:
+ yield tuple_
+
+
+class SecondIndependentParameterOperator(UDFOperatorV2):
+ def _texera_injected_ui_parameters(self):
+ return {"count": "2"}
+
+ def open(self):
+ self.count_parameter = self.UiParameter("count", AttributeType.INT)
+
+ def process_tuple(self, tuple_: Tuple, port: int) ->
Iterator[Optional[TupleLike]]:
+ yield tuple_
+
+
+class TestUiParameterSupport:
Review Comment:
Until the feature is fully wired up, these tests are the only check on it,
so worth adding: a `Z` timestamp, empty values for INT/LONG/DOUBLE/TIMESTAMP, a
name that wasn't sent (should be `None`), the `UiParameter` argument errors,
and a subclass that calls `super().open()`.
##########
amber/src/main/python/core/models/schema/attribute_type.py:
##########
@@ -78,6 +81,53 @@ class AttributeType(Enum):
}
+FROM_STRING_PARSER_MAPPING = {
+ AttributeType.STRING: str,
+ AttributeType.INT: lambda v: (
+ 0 if v is None or (isinstance(v, str) and v.strip() == "") else int(v)
+ ),
+ AttributeType.LONG: lambda v: (
+ 0 if v is None or (isinstance(v, str) and v.strip() == "") else int(v)
+ ),
+ AttributeType.DOUBLE: lambda v: (
+ 0.0 if v is None or (isinstance(v, str) and v.strip() == "") else
float(v)
+ ),
+ AttributeType.BOOL: lambda v: (
+ False
+ if v is None or (isinstance(v, str) and v.strip() == "")
+ else (
+ True
+ if str(v).strip().lower() == "true"
+ else (
+ False
+ if str(v).strip().lower() == "false"
+ else float(str(v).strip()) != 0
+ )
+ )
+ ),
+ AttributeType.BINARY: lambda v: (
+ (_ for _ in ()).throw(
+ ValueError(
+ "UiParameter does not support BINARY values. "
+ "Use a supported type instead."
+ )
+ )
+ ),
+ AttributeType.TIMESTAMP: lambda v: (
+ datetime.datetime.fromtimestamp(0)
+ if v is None or (isinstance(v, str) and v.strip() == "")
+ else datetime.datetime.fromisoformat(v)
Review Comment:
`fromisoformat` can't handle a trailing `Z` on Python 3.10, which we still
test. On 3.11+ it works, but gives a different result with the `Z` than without
(one carries timezone info, one doesn't). The backend's own test sends a `Z`
value, so replace it first: `datetime.fromisoformat(v.replace("Z", "+00:00"))`.
Add a test with a `Z`.
##########
amber/src/main/python/core/models/schema/attribute_type.py:
##########
@@ -78,6 +81,53 @@ class AttributeType(Enum):
}
+FROM_STRING_PARSER_MAPPING = {
+ AttributeType.STRING: str,
+ AttributeType.INT: lambda v: (
+ 0 if v is None or (isinstance(v, str) and v.strip() == "") else int(v)
+ ),
+ AttributeType.LONG: lambda v: (
+ 0 if v is None or (isinstance(v, str) and v.strip() == "") else int(v)
+ ),
+ AttributeType.DOUBLE: lambda v: (
+ 0.0 if v is None or (isinstance(v, str) and v.strip() == "") else
float(v)
+ ),
+ AttributeType.BOOL: lambda v: (
+ False
+ if v is None or (isinstance(v, str) and v.strip() == "")
+ else (
+ True
+ if str(v).strip().lower() == "true"
+ else (
+ False
+ if str(v).strip().lower() == "false"
+ else float(str(v).strip()) != 0
+ )
+ )
+ ),
+ AttributeType.BINARY: lambda v: (
+ (_ for _ in ()).throw(
+ ValueError(
Review Comment:
This `(_ for _ in ()).throw(...)` trick for raising an error inside a lambda
is hard to read, and the unsupported types are now listed in three spots (here,
the `_unsupported_ui_parameter_types` set, and `_parse`). Drop these two
entries and just reject unsupported types at the start of `_parse`.
##########
amber/src/main/python/pytexera/udf/udf_operator.py:
##########
@@ -16,12 +16,127 @@
# under the License.
from abc import abstractmethod
-from typing import Iterator, Optional, Union
+from dataclasses import dataclass
+import functools
+from typing import Any, Dict, Iterator, Optional, Union
from pyamber import *
+from core.models.schema.attribute_type import AttributeType,
FROM_STRING_PARSER_MAPPING
-class UDFOperatorV2(TupleOperatorV2):
+@dataclass(frozen=True)
+class _UiParameterValue:
+ name: str
+ type: AttributeType
+ value: Any
+
+
+class _UiParameterSupport:
+ _ui_parameter_injected_values: Dict[str, Any]
+ _ui_parameter_name_types: Dict[str, AttributeType]
+ _unsupported_ui_parameter_types = {
+ AttributeType.BINARY,
+ AttributeType.LARGE_BINARY,
+ }
+
+ # Reserved hook name. Backend injector will generate this in the user's
class.
+ def _texera_injected_ui_parameters(self) -> Dict[str, Any]:
+ return {}
+
+ def _ensure_ui_parameter_state(self) -> None:
+ if "_ui_parameter_injected_values" not in self.__dict__:
+ self._ui_parameter_injected_values = {}
+ if "_ui_parameter_name_types" not in self.__dict__:
+ self._ui_parameter_name_types = {}
+
+ def _texera_apply_injected_ui_parameters(self) -> None:
+ self._ensure_ui_parameter_state()
+ values = self._texera_injected_ui_parameters()
+ self._ui_parameter_injected_values = dict(values or {})
+ self._ui_parameter_name_types = {}
+
+ def __init_subclass__(cls, **kwargs):
+ super().__init_subclass__(**kwargs)
+
+ # Wrap the effective open() method once per subclass.
+ original_open = getattr(cls, "open", None)
+ if original_open is None:
+ return
+
+ # Avoid double wrapping
+ if getattr(original_open, "__texera_ui_params_wrapped__", False):
+ return
+
+ @functools.wraps(original_open)
+ def wrapped_open(self, *args, **kwargs):
+ self._texera_apply_injected_ui_parameters()
+ return original_open(self, *args, **kwargs)
+
+ setattr(wrapped_open, "__texera_ui_params_wrapped__", True)
+ cls.open = wrapped_open
+
+ def UiParameter(
Review Comment:
A capitalized method name reads like a class; Python style would be
`ui_parameter`. Fine if it's on purpose, just worth deciding.
##########
amber/src/main/python/core/models/schema/attribute_type.py:
##########
@@ -33,8 +33,11 @@ class AttributeType(Enum):
STRING = 1
INT = 2
+ # Java enum-name aliases accepted by the UI parameter parser.
+ INTEGER = 2
Review Comment:
Works correctly. But there are now two places mapping Java names to a type
(these aliases and `RAW_TYPE_MAPPING` below) — a one-line comment on why both
exist would help.
##########
amber/src/main/python/core/models/schema/attribute_type.py:
##########
@@ -78,6 +81,53 @@ class AttributeType(Enum):
}
+FROM_STRING_PARSER_MAPPING = {
Review Comment:
This map is really UI-parameter behavior (the empty-value default, the
"UiParameter…" error text) sitting in a general types file that other code also
uses. Consider moving it next to `_parse` and keeping this file about types
only.
##########
amber/src/main/python/pytexera/udf/udf_operator.py:
##########
@@ -16,12 +16,127 @@
# under the License.
from abc import abstractmethod
-from typing import Iterator, Optional, Union
+from dataclasses import dataclass
+import functools
+from typing import Any, Dict, Iterator, Optional, Union
from pyamber import *
+from core.models.schema.attribute_type import AttributeType,
FROM_STRING_PARSER_MAPPING
-class UDFOperatorV2(TupleOperatorV2):
+@dataclass(frozen=True)
+class _UiParameterValue:
+ name: str
+ type: AttributeType
+ value: Any
+
+
+class _UiParameterSupport:
+ _ui_parameter_injected_values: Dict[str, Any]
+ _ui_parameter_name_types: Dict[str, AttributeType]
+ _unsupported_ui_parameter_types = {
+ AttributeType.BINARY,
+ AttributeType.LARGE_BINARY,
+ }
+
+ # Reserved hook name. Backend injector will generate this in the user's
class.
+ def _texera_injected_ui_parameters(self) -> Dict[str, Any]:
+ return {}
+
+ def _ensure_ui_parameter_state(self) -> None:
+ if "_ui_parameter_injected_values" not in self.__dict__:
+ self._ui_parameter_injected_values = {}
+ if "_ui_parameter_name_types" not in self.__dict__:
+ self._ui_parameter_name_types = {}
+
+ def _texera_apply_injected_ui_parameters(self) -> None:
Review Comment:
If a user's `open()` calls `super().open()`, this setup runs twice and the
duplicate-name check gets wiped. The parameter values are still fine, but make
it run only once per `open()`, and add a short docstring explaining what this
wrapping does.
##########
amber/src/main/python/pytexera/udf/udf_operator.py:
##########
@@ -16,12 +16,127 @@
# under the License.
from abc import abstractmethod
-from typing import Iterator, Optional, Union
+from dataclasses import dataclass
+import functools
+from typing import Any, Dict, Iterator, Optional, Union
from pyamber import *
+from core.models.schema.attribute_type import AttributeType,
FROM_STRING_PARSER_MAPPING
-class UDFOperatorV2(TupleOperatorV2):
+@dataclass(frozen=True)
+class _UiParameterValue:
+ name: str
+ type: AttributeType
+ value: Any
+
+
+class _UiParameterSupport:
+ _ui_parameter_injected_values: Dict[str, Any]
+ _ui_parameter_name_types: Dict[str, AttributeType]
+ _unsupported_ui_parameter_types = {
+ AttributeType.BINARY,
+ AttributeType.LARGE_BINARY,
+ }
+
+ # Reserved hook name. Backend injector will generate this in the user's
class.
+ def _texera_injected_ui_parameters(self) -> Dict[str, Any]:
+ return {}
+
+ def _ensure_ui_parameter_state(self) -> None:
+ if "_ui_parameter_injected_values" not in self.__dict__:
+ self._ui_parameter_injected_values = {}
+ if "_ui_parameter_name_types" not in self.__dict__:
+ self._ui_parameter_name_types = {}
+
+ def _texera_apply_injected_ui_parameters(self) -> None:
+ self._ensure_ui_parameter_state()
+ values = self._texera_injected_ui_parameters()
+ self._ui_parameter_injected_values = dict(values or {})
+ self._ui_parameter_name_types = {}
+
+ def __init_subclass__(cls, **kwargs):
+ super().__init_subclass__(**kwargs)
+
+ # Wrap the effective open() method once per subclass.
+ original_open = getattr(cls, "open", None)
+ if original_open is None:
+ return
+
+ # Avoid double wrapping
+ if getattr(original_open, "__texera_ui_params_wrapped__", False):
+ return
+
+ @functools.wraps(original_open)
+ def wrapped_open(self, *args, **kwargs):
+ self._texera_apply_injected_ui_parameters()
+ return original_open(self, *args, **kwargs)
+
+ setattr(wrapped_open, "__texera_ui_params_wrapped__", True)
+ cls.open = wrapped_open
+
+ def UiParameter(
+ self, name: str, attr_type: Optional[AttributeType] = None, **kwargs:
Any
+ ) -> _UiParameterValue:
+ if "type" in kwargs:
+ if attr_type is not None:
+ raise TypeError("UiParameter.type was provided multiple
times.")
+ attr_type = kwargs.pop("type")
+
+ if kwargs:
+ unexpected_arguments = ", ".join(sorted(kwargs))
+ raise TypeError(
+ f"UiParameter got unexpected keyword argument(s): "
+ f"{unexpected_arguments}."
+ )
+
+ if attr_type is None:
+ raise TypeError("UiParameter.type is required.")
+
+ if not isinstance(attr_type, AttributeType):
+ raise TypeError(
+ f"UiParameter.type must be an AttributeType, got
{attr_type!r}."
+ )
+
+ self._ensure_ui_parameter_state()
+ existing_type = self._ui_parameter_name_types.get(name)
+ if existing_type is not None and existing_type != attr_type:
Review Comment:
The same name twice with the same type quietly succeeds, but with a
different type it errors. Pick one behavior and document it.
##########
amber/src/main/python/pytexera/__init__.py:
##########
@@ -30,6 +30,7 @@
UDFSourceOperator,
)
from core.models.type.large_binary import largebinary
+from core.models.schema.attribute_type import *
Review Comment:
`import *` pulls every name from `attribute_type` into `pytexera`, not just
`AttributeType`. Import `AttributeType` by name, or add `__all__` to that
module.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]