Github user jkbradley commented on a diff in the pull request:
https://github.com/apache/spark/pull/10469#discussion_r50903200
--- Diff: python/pyspark/ml/util.py ---
@@ -52,3 +71,133 @@ def _randomUID(cls):
concatenates the class name, "_", and 12 random hex chars.
"""
return cls.__name__ + "_" + uuid.uuid4().hex[12:]
+
+
+@inherit_doc
+class MLWriter(object):
+ """
+ .. note:: Experimental
+
+ Utility class that can save ML instances.
+
+ .. versionadded:: 2.0.0
+ """
+
+ def __init__(self, instance):
+ instance._transfer_params_to_java()
+ self._jwrite = instance._java_obj.write()
+
+ @since("2.0.0")
+ def save(self, path):
+ """Saves the ML instances to the input path."""
+ self._jwrite.save(path)
+
+ @since("2.0.0")
+ def overwrite(self):
+ """Overwrites if the output path already exists."""
+ self._jwrite.overwrite()
+ return self
+
+ @since("2.0.0")
+ def context(self, sqlContext):
+ """Sets the SQL context to use for saving."""
+ self._jwrite.context(sqlContext._ssql_ctx)
+ return self
+
+
+@inherit_doc
+class MLWritable(object):
+ """
+ .. note:: Experimental
+
+ Mixin for ML instances that provide MLWriter through their Scala
+ implementation.
+
+ .. versionadded:: 2.0.0
+ """
+
+ @since("2.0.0")
+ def write(self):
+ """Returns an MLWriter instance for this ML instance."""
+ return MLWriter(self)
+
+ @since("2.0.0")
+ def save(self, path):
+ """Save this ML instance to the given path, a shortcut of
`write().save(path)`."""
+ if not isinstance(path, basestring):
+ raise TypeError("path should be a basestring, got type %s" %
type(path))
+ self.write().save(path)
+
+
+@inherit_doc
+class MLReader(object):
+ """
+ .. note:: Experimental
+
+ Utility class that can load ML instances.
+
+ .. versionadded:: 2.0.0
+ """
+
+ def __init__(self, instance):
+ self._instance = instance
+ self._jread = instance._java_obj.read()
+
+ @since("2.0.0")
+ def load(self, path):
+ """Loads the ML component from the input path."""
+ java_obj = self._jread.load(path)
+ self._instance._java_obj = java_obj
+ self._instance.uid = java_obj.uid()
+ self._instance._transfer_params_from_java(True)
+ return self._instance
+
+ @since("2.0.0")
+ def context(self, sqlContext):
+ """Sets the SQL context to use for loading."""
+ self._jread.context(sqlContext._ssql_ctx)
+ return self
+
+
+@inherit_doc
+class MLReadable(object):
+ """
+ .. note:: Experimental
+
+ Mixin for objects that provide MLReader using its Scala implementation.
--- End diff --
Could this be kept more general by putting JVM-specific things in MLReader?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]