spark git commit: [SPARK-10094] Pyspark ML Feature transformers marked as experimental

meng Tue, 08 Sep 2015 21:26:40 -0700

Repository: spark
Updated Branches:
  refs/heads/master 3a11e50e2 -> 0e2f21633



[SPARK-10094] Pyspark ML Feature transformers marked as experimental

Modified class-level docstrings to mark all feature transformers in pyspark.ml 
as experimental.

Author: noelsmith <m...@noelsmith.com>

Closes #8623 from noel-smith/SPARK-10094-mark-pyspark-ml-trans-exp.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0e2f2163
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0e2f2163
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0e2f2163

Branch: refs/heads/master
Commit: 0e2f2163314972f6be18e3453c64314d1bee7bb9
Parents: 3a11e50
Author: noelsmith <m...@noelsmith.com>
Authored: Tue Sep 8 21:26:20 2015 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Tue Sep 8 21:26:20 2015 -0700

----------------------------------------------------------------------
 python/pyspark/ml/feature.py | 52 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0e2f2163/python/pyspark/ml/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index d955307..a7c5b2b 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -36,6 +36,8 @@ __all__ = ['Binarizer', 'Bucketizer', 'DCT', 
'ElementwiseProduct', 'HashingTF',
 @inherit_doc
 class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     Binarize a column of continuous features given a threshold.
 
     >>> df = sqlContext.createDataFrame([(0.5,)], ["values"])
@@ -92,6 +94,8 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
 @inherit_doc
 class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     Maps a column of continuous features to a column of feature buckets.
 
     >>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], 
["values"])
@@ -169,6 +173,8 @@ class Bucketizer(JavaTransformer, HasInputCol, 
HasOutputCol):
 @inherit_doc
 class DCT(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     A feature transformer that takes the 1D discrete cosine transform
     of a real vector. No zero padding is performed on the input vector.
     It returns a real vector of the same length representing the DCT.
@@ -232,6 +238,8 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
 @inherit_doc
 class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     Outputs the Hadamard product (i.e., the element-wise product) of each 
input vector
     with a provided "weight" vector. In other words, it scales each column of 
the dataset
     by a scalar multiplier.
@@ -289,6 +297,8 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, 
HasOutputCol):
 @inherit_doc
 class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
     """
+    .. note:: Experimental
+
     Maps a sequence of terms to their term frequencies using the
     hashing trick.
 
@@ -327,6 +337,8 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, 
HasNumFeatures):
 @inherit_doc
 class IDF(JavaEstimator, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     Compute the Inverse Document Frequency (IDF) given a collection of 
documents.
 
     >>> from pyspark.mllib.linalg import DenseVector
@@ -387,6 +399,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
 
 class IDFModel(JavaModel):
     """
+    .. note:: Experimental
+
     Model fitted by IDF.
     """
 
@@ -395,6 +409,8 @@ class IDFModel(JavaModel):
 @ignore_unicode_prefix
 class NGram(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     A feature transformer that converts the input array of strings into an 
array of n-grams. Null
     values in the input array are ignored.
     It returns an array of n-grams where each n-gram is represented by a 
space-separated string of
@@ -463,6 +479,8 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
 @inherit_doc
 class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
      Normalize a vector to have unit norm using the given p-norm.
 
     >>> from pyspark.mllib.linalg import Vectors
@@ -519,6 +537,8 @@ class Normalizer(JavaTransformer, HasInputCol, 
HasOutputCol):
 @inherit_doc
 class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     A one-hot encoder that maps a column of category indices to a
     column of binary vectors, with at most a single one-value per row
     that indicates the input category index.
@@ -591,6 +611,8 @@ class OneHotEncoder(JavaTransformer, HasInputCol, 
HasOutputCol):
 @inherit_doc
 class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     Perform feature expansion in a polynomial space. As said in wikipedia of 
Polynomial Expansion,
     which is available at `http://en.wikipedia.org/wiki/Polynomial_expansion`, 
"In mathematics, an
     expansion of a product of sums expresses it as a sum of products by using 
the fact that
@@ -649,6 +671,8 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, 
HasOutputCol):
 @ignore_unicode_prefix
 class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     A regex based tokenizer that extracts tokens either by using the
     provided regex pattern (in Java dialect) to split the text
     (default) or repeatedly matching the regex (if gaps is false).
@@ -746,6 +770,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, 
HasOutputCol):
 @inherit_doc
 class SQLTransformer(JavaTransformer):
     """
+    .. note:: Experimental
+
     Implements the transforms which are defined by SQL statement.
     Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
     where '__THIS__' represents the underlying table of the input dataset.
@@ -797,6 +823,8 @@ class SQLTransformer(JavaTransformer):
 @inherit_doc
 class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     Standardizes features by removing the mean and scaling to unit variance 
using column summary
     statistics on the samples in the training set.
 
@@ -870,6 +898,8 @@ class StandardScaler(JavaEstimator, HasInputCol, 
HasOutputCol):
 
 class StandardScalerModel(JavaModel):
     """
+    .. note:: Experimental
+
     Model fitted by StandardScaler.
     """
 
@@ -891,6 +921,8 @@ class StandardScalerModel(JavaModel):
 @inherit_doc
 class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     A label indexer that maps a string column of labels to an ML column of 
label indices.
     If the input column is numeric, we cast it to string and index the string 
values.
     The indices are in [0, numLabels), ordered by label frequencies.
@@ -929,6 +961,8 @@ class StringIndexer(JavaEstimator, HasInputCol, 
HasOutputCol):
 
 class StringIndexerModel(JavaModel):
     """
+    .. note:: Experimental
+
     Model fitted by StringIndexer.
     """
 
@@ -1006,6 +1040,8 @@ class StopWordsRemover(JavaTransformer, HasInputCol, 
HasOutputCol):
 @ignore_unicode_prefix
 class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     A tokenizer that converts the input string to lowercase and then
     splits it by white spaces.
 
@@ -1051,6 +1087,8 @@ class Tokenizer(JavaTransformer, HasInputCol, 
HasOutputCol):
 @inherit_doc
 class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
     """
+    .. note:: Experimental
+
     A feature transformer that merges multiple columns into a vector column.
 
     >>> df = sqlContext.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
@@ -1087,6 +1125,8 @@ class VectorAssembler(JavaTransformer, HasInputCols, 
HasOutputCol):
 @inherit_doc
 class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     Class for indexing categorical feature columns in a dataset of [[Vector]].
 
     This has 2 usage modes:
@@ -1186,6 +1226,8 @@ class VectorIndexer(JavaEstimator, HasInputCol, 
HasOutputCol):
 
 class VectorIndexerModel(JavaModel):
     """
+    .. note:: Experimental
+
     Model fitted by VectorIndexer.
     """
 
@@ -1194,6 +1236,8 @@ class VectorIndexerModel(JavaModel):
 @ignore_unicode_prefix
 class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, 
HasOutputCol):
     """
+    .. note:: Experimental
+
     Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word 
into a code for further
     natural language processing or machine learning process.
 
@@ -1307,6 +1351,8 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, 
HasSeed, HasInputCol, Has
 
 class Word2VecModel(JavaModel):
     """
+    .. note:: Experimental
+
     Model fitted by Word2Vec.
     """
 
@@ -1332,6 +1378,8 @@ class Word2VecModel(JavaModel):
 @inherit_doc
 class PCA(JavaEstimator, HasInputCol, HasOutputCol):
     """
+    .. note:: Experimental
+
     PCA trains a model to project vectors to a low-dimensional space using PCA.
 
     >>> from pyspark.mllib.linalg import Vectors
@@ -1387,6 +1435,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
 
 class PCAModel(JavaModel):
     """
+    .. note:: Experimental
+
     Model fitted by PCA.
     """
 
@@ -1470,6 +1520,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, 
HasLabelCol):
 
 class RFormulaModel(JavaModel):
     """
+    .. note:: Experimental
+
     Model fitted by :py:class:`RFormula`.
     """
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10094] Pyspark ML Feature transformers marked as experimental

Reply via email to