shunping commented on code in PR #33845:
URL: https://github.com/apache/beam/pull/33845#discussion_r1947385648


##########
sdks/python/apache_beam/ml/anomaly/specifiable.py:
##########
@@ -0,0 +1,334 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A module that provides utilities to turn a class into a Specifiable subclass.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import inspect
+import logging
+from typing import Any
+from typing import ClassVar
+from typing import List
+from typing import Protocol
+from typing import Type
+from typing import TypeVar
+from typing import runtime_checkable
+
+from typing_extensions import Self
+
+__all__ = ["Spec", "Specifiable", "specifiable"]
+
+_FALLBACK_SUBSPACE = "*"
+
+_ACCEPTED_SUBSPACES = [
+    "EnsembleAnomalyDetector",
+    "AnomalyDetector",
+    "ThresholdFn",
+    "AggregationFn",
+    _FALLBACK_SUBSPACE,
+]
+
+#: A nested dictionary for efficient lookup of Specifiable subclasses.
+#: Structure: `_KNOWN_SPECIFIABLE[subspace][spec_type]`, where `subspace` is 
one
+#: of the accepted subspaces that the class belongs to and `spec_type` is the
+#: class name by default. Users can also specify a different value for
+#: `spec_type` when applying the `specifiable` decorator to an existing class.
+_KNOWN_SPECIFIABLE = {}
+
+SpecT = TypeVar('SpecT', bound='Specifiable')
+
+
+def _class_to_subspace(cls: Type) -> str:
+  """
+  Search the class hierarchy to find the subspace: the closest ancestor class 
in
+  the class's method resolution order (MRO) whose name is found in the accepted
+  subspace list. This is usually called when registering a new specifiable
+  class.
+  """
+  for c in cls.mro():
+    if c.__name__ in _ACCEPTED_SUBSPACES:
+      return c.__name__
+
+  return _FALLBACK_SUBSPACE
+
+
+def _spec_type_to_subspace(type: str) -> str:
+  """
+  Look for the subspace for a spec type. This is usually called to retrieve
+  the subspace of a registered specifiable class.
+  """
+  for subspace in _ACCEPTED_SUBSPACES:
+    if type in _KNOWN_SPECIFIABLE.get(subspace, {}):
+      return subspace
+
+  raise ValueError(f"subspace for {str} not found.")
+
+
[email protected](frozen=True)
+class Spec():
+  """
+  Dataclass for storing specifications of specifiable objects.
+  Objects can be initialized using the data in their corresponding spec.
+  """
+  #: A string indicating the concrete `Specifiable` class
+  type: str
+  #: A dictionary of keyword arguments for the `__init__` method of the class.
+  config: dict[str, Any] = dataclasses.field(default_factory=dict)
+
+
+@runtime_checkable
+class Specifiable(Protocol):
+  """Protocol that a specifiable class needs to implement.
+
+  Attributes:
+    spec_type: The value of the `type` field in the object's spec for this
+      class.
+    init_kwargs: The raw keyword arguments passed to `__init__` method during
+      object initialization.
+  """
+  spec_type: ClassVar[str]
+  init_kwargs: dict[str, Any]
+
+  # a boolean to tell whether the original `__init__` method is called
+  _initialized: bool
+  # a boolean used by new_getattr to tell whether it is in the `__init__` 
method
+  # call
+  _in_init: bool
+
+  @staticmethod
+  def _from_spec_helper(v, _run_init):
+    if isinstance(v, Spec):
+      return Specifiable.from_spec(v, _run_init)
+
+    if isinstance(v, List):
+      return [Specifiable._from_spec_helper(e, _run_init) for e in v]
+
+    return v
+
+  @classmethod
+  def from_spec(cls, spec: Spec, _run_init: bool = True) -> Self:
+    """Generate a `Specifiable` subclass object based on a spec."""
+    if spec.type is None:
+      raise ValueError(f"Spec type not found in {spec}")
+
+    subspace = _spec_type_to_subspace(spec.type)
+    subclass: Type[Self] = _KNOWN_SPECIFIABLE[subspace].get(spec.type, None)
+    if subclass is None:
+      raise ValueError(f"Unknown spec type '{spec.type}' in {spec}")
+
+    kwargs = {
+        k: Specifiable._from_spec_helper(v, _run_init)
+        for k,
+        v in spec.config.items()
+    }
+
+    if _run_init:
+      kwargs["_run_init"] = True
+    return subclass(**kwargs)
+
+  @staticmethod
+  def _to_spec_helper(v):
+    if isinstance(v, Specifiable):
+      return v.to_spec()
+
+    if isinstance(v, List):
+      return [Specifiable._to_spec_helper(e) for e in v]
+
+    return v
+
+  def to_spec(self) -> Spec:
+    """
+    Generate a spec from a `Specifiable` subclass object.
+    """
+    if getattr(type(self), 'spec_type', None) is None:
+      raise ValueError(
+          f"'{type(self).__name__}' not registered as Specifiable. "
+          f"Decorate ({type(self).__name__}) with @specifiable")
+
+    args = {k: self._to_spec_helper(v) for k, v in self.init_kwargs.items()}
+
+    return Spec(type=self.__class__.spec_type, config=args)
+
+
+# Register a `Specifiable` subclass in `KNOWN_SPECIFIABLE`
+def _register(cls, spec_type=None, error_if_exists=True) -> None:
+  if spec_type is None:
+    # By default, spec type is the class name. Users can override this with
+    # other unique identifier.
+    spec_type = cls.__name__
+
+  subspace = _class_to_subspace(cls)
+  if subspace in _KNOWN_SPECIFIABLE:
+    if spec_type in _KNOWN_SPECIFIABLE[subspace] and error_if_exists:
+      raise ValueError(
+          f"{spec_type} is already registered for "
+          f"specifiable class {_KNOWN_SPECIFIABLE[subspace]}. "
+          "Please specify a different spec_type by "
+          "@specifiable(spec_type=...) or ignore the error by "
+          "@specifiable(error_if_exists=False).")
+  else:
+    _KNOWN_SPECIFIABLE[subspace] = {}
+  _KNOWN_SPECIFIABLE[subspace][spec_type] = cls

Review Comment:
   Good idea. I've made the changes in the new commit.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to