Github user jkbradley commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18742#discussion_r131332176
  
    --- Diff: python/pyspark/ml/util.py ---
    @@ -283,3 +341,198 @@ def numFeatures(self):
             Returns the number of features the model was trained on. If 
unknown, returns -1
             """
             return self._call_java("numFeatures")
    +
    +
    +@inherit_doc
    +class DefaultParamsWritable(MLWritable):
    +    """
    +    .. note:: DeveloperApi
    +
    +    Helper trait for making simple `Params` types writable.  If a `Params` 
class stores
    +    all data as [[pyspark.ml.param.Param]] values, then extending this 
trait will provide
    +    a default implementation of writing saved instances of the class.
    +    This only handles simple [[pyspark.ml.param.Param]] types; e.g., it 
will not handle
    +    [[pyspark.sql.Dataset]].
    +
    +    @see `DefaultParamsReadable`, the counterpart to this trait
    +
    +    .. versionadded:: 2.3.0
    +    """
    +
    +    def write(self):
    +        """Returns a DefaultParamsWriter instance for this class."""
    +        if isinstance(self, Params):
    +            return DefaultParamsWriter(self)
    +        else:
    +            raise TypeError("Cannot use DefautParamsWritable with type %s 
because it does not " +
    +                            " extend Params.", type(self))
    +
    +
    +@inherit_doc
    +class DefaultParamsWriter(MLWriter):
    +    """
    +    .. note:: DeveloperApi
    +
    +    Class for writing Estimators and Transformers whose parameters are 
JSON-serializable.
    +
    +    .. versionadded:: 2.3.0
    +    """
    +
    +    def __init__(self, instance):
    +        super(DefaultParamsWriter, self).__init__()
    +        self.instance = instance
    +
    +    def saveImpl(self, path):
    +        DefaultParamsWriter.save_metadata(self.instance, path, self.sc)
    +
    +    @staticmethod
    +    def save_metadata(instance, path, sc, extraMetadata=None, 
paramMap=None):
    +        """
    +        Saves metadata + Params to: path + "/metadata"
    +        - class
    +        - timestamp
    +        - sparkVersion
    +        - uid
    +        - paramMap
    +        - (optionally, extra metadata)
    +        @param extraMetadata  Extra metadata to be saved at same level as 
uid, paramMap, etc.
    +        @param paramMap  If given, this is saved in the "paramMap" field.
    +        """
    +        metadataPath = os.path.join(path, "metadata")
    +        metadataJson = DefaultParamsWriter._get_metadata_to_save(instance,
    +                                                                 sc,
    +                                                                 
extraMetadata,
    +                                                                 paramMap)
    +        sc.parallelize([metadataJson], 1).saveAsTextFile(metadataPath)
    +
    +    @staticmethod
    +    def _get_metadata_to_save(instance, sc, extraMetadata=None, 
paramMap=None):
    +        """
    +        Helper for [[save_metadata()]] which extracts the JSON to save.
    +        This is useful for ensemble models which need to save metadata for 
many sub-models.
    +
    +        @see [[save_metadata()]] for details on what this includes.
    +        """
    +        uid = instance.uid
    +        cls = instance.__module__ + '.' + instance.__class__.__name__
    +        params = instance.extractParamMap()
    +        jsonParams = {}
    +        if paramMap is not None:
    +            jsonParams = paramMap
    +        else:
    +            for p in params:
    +                jsonParams[p.name] = params[p]
    +        basicMetadata = {"class": cls, "timestamp": long(round(time.time() 
* 1000)),
    +                         "sparkVersion": sc.version, "uid": uid, 
"paramMap": jsonParams}
    +        if extraMetadata is not None:
    +            basicMetadata.update(extraMetadata)
    +        return json.dumps(basicMetadata, separators=[',',  ':'])
    +
    +
    +@inherit_doc
    +class DefaultParamsReadable(MLReadable):
    +    """
    +    .. note:: DeveloperApi
    +
    +    Helper trait for making simple `Params` types readable.  If a `Params` 
class stores
    +    all data as [[pyspark.ml.param.Param]] values, then extending this 
trait will provide
    --- End diff --
    
    For python docs, follow other examples such as ```:py:class:`MLWriter` ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to