robertwb commented on a change in pull request #16917:
URL: https://github.com/apache/beam/pull/16917#discussion_r813233151



##########
File path: sdks/python/apache_beam/ml/inference/model.py
##########
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import abc
+
+VALID_PYTORCH_DEVICE_TYPES = ['CPU', 'GPU']
+VALID_SKLEARN_SERIALIZATION_TYPES = ['JOBLIB', 'PICKLE']
+
+
+class RunInferenceModel(object):
+  '''
+  Base class for the RunInference Model.
+  This class contains information about
+  a model that is needed
+  '''
+  def __init__(self, model_url):
+    self._model_url = model_url
+    self._validate_model()
+
+  @abc.abstractmethod
+  def _validate_model(self):
+    raise NotImplementedError("Please implement _validate_model")
+
+
+class PytorchModel(RunInferenceModel):
+  '''
+  This class wraps the PyTorch model, and other
+  PyTorch-specific parameters
+  '''
+  def __init__(self, model_url: str, device: str):
+    super().__init__(model_url)
+    self._device = device
+    self._validate_device()
+
+  def _validate_model(self):
+    pass
+
+  def _validate_device(self):
+    if self._device not in VALID_PYTORCH_DEVICE_TYPES:
+      raise ValueError(
+          'Device type must be one of ' + VALID_PYTORCH_DEVICE_TYPES)
+
+
+class SklearnModel(RunInferenceModel):
+  '''
+  This class wraps the scikit-learn model, and other
+  scikit-learn-specific parameters
+  '''
+  def __init__(self, model_url, serialization_method):
+    super().__init__(model_url)
+    self._serialization_method = serialization_method
+
+  def _validate_model(self):
+    pass
+
+  def _validate_serialization(self):

Review comment:
       Did you intend to call this?

##########
File path: sdks/python/apache_beam/ml/inference/model.py
##########
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import abc
+
+VALID_PYTORCH_DEVICE_TYPES = ['CPU', 'GPU']
+VALID_SKLEARN_SERIALIZATION_TYPES = ['JOBLIB', 'PICKLE']
+
+
+class RunInferenceModel(object):
+  '''
+  Base class for the RunInference Model.
+  This class contains information about
+  a model that is needed
+  '''
+  def __init__(self, model_url):
+    self._model_url = model_url
+    self._validate_model()
+
+  @abc.abstractmethod
+  def _validate_model(self):
+    raise NotImplementedError("Please implement _validate_model")
+
+
+class PytorchModel(RunInferenceModel):
+  '''
+  This class wraps the PyTorch model, and other
+  PyTorch-specific parameters
+  '''
+  def __init__(self, model_url: str, device: str):
+    super().__init__(model_url)
+    self._device = device
+    self._validate_device()
+
+  def _validate_model(self):
+    pass
+
+  def _validate_device(self):
+    if self._device not in VALID_PYTORCH_DEVICE_TYPES:
+      raise ValueError(
+          'Device type must be one of ' + VALID_PYTORCH_DEVICE_TYPES)
+
+
+class SklearnModel(RunInferenceModel):
+  '''
+  This class wraps the scikit-learn model, and other
+  scikit-learn-specific parameters
+  '''
+  def __init__(self, model_url, serialization_method):

Review comment:
       Does it make sense to have a default serialization method? (I'm not sure 
what that means, docs would be good.)

##########
File path: sdks/python/apache_beam/ml/inference/run_inference.py
##########
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import apache_beam as beam
+from apache_beam.ml.inference.model import (
+    PytorchModel, RunInferenceModel, SklearnModel)

Review comment:
       Ideally we should not have to import specific implementations here, just 
RunInferenceModel.

##########
File path: sdks/python/apache_beam/ml/inference/model.py
##########
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import abc
+
+VALID_PYTORCH_DEVICE_TYPES = ['CPU', 'GPU']

Review comment:
       Should these be class-level variables on the pytorch/sklearn 
implementations?

##########
File path: sdks/python/apache_beam/ml/inference/model.py
##########
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import abc
+
+VALID_PYTORCH_DEVICE_TYPES = ['CPU', 'GPU']
+VALID_SKLEARN_SERIALIZATION_TYPES = ['JOBLIB', 'PICKLE']
+
+
+class RunInferenceModel(object):
+  '''
+  Base class for the RunInference Model.
+  This class contains information about
+  a model that is needed
+  '''
+  def __init__(self, model_url):
+    self._model_url = model_url
+    self._validate_model()
+
+  @abc.abstractmethod
+  def _validate_model(self):
+    raise NotImplementedError("Please implement _validate_model")
+
+
+class PytorchModel(RunInferenceModel):

Review comment:
       It feels like PytorchModel and SklearnModel should be in their own file, 
only imported if one or the other is needed, rather than having a single module 
that holds all model types. 

##########
File path: sdks/python/apache_beam/ml/inference/run_inference.py
##########
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import apache_beam as beam
+from apache_beam.ml.inference.model import (
+    PytorchModel, RunInferenceModel, SklearnModel)
+
+
+# TODO
+#@beam.typehints.with_input_types(Union[_INPUT_TYPE, Tuple[_K, _INPUT_TYPE]])
+#@beam.typehints.with_output_types(Union[_OUTPUT_TYPE, Tuple[_K, 
_OUTPUT_TYPE]])
+class RunInference(beam.PTransform):
+  def __init__(self, model: RunInferenceModel, batch_size=None, **kwargs):
+    self._model = model
+    self._batch_size = batch_size
+
+  def expand(self, examples: beam.PCollection) -> beam.PCollection:
+
+    if isinstance(self._model, PytorchModel):

Review comment:
       Generally isinstance chains like this is an anti-pattern. 
RunInferenceModel should have a sufficiently expressive interface that the 
implementation-specific can be fully encapsulated in the various subclass 
implementations. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to