Repository: spark Updated Branches: refs/heads/branch-2.3 abd3e1bb5 -> e66c66cd2
[SPARK-23163][DOC][PYTHON] Sync ML Python API with Scala ## What changes were proposed in this pull request? This syncs the ML Python API with Scala for differences found after the 2.3 QA audit. ## How was this patch tested? NA Author: Bryan Cutler <[email protected]> Closes #20354 from BryanCutler/pyspark-ml-doc-sync-23163. (cherry picked from commit 39ee2acf96f1e1496cff8e4d2614d27fca76d43b) Signed-off-by: Felix Cheung <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e66c66cd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e66c66cd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e66c66cd Branch: refs/heads/branch-2.3 Commit: e66c66cd2d4f0cd67cbc2aa6f95135176f1165e4 Parents: abd3e1b Author: Bryan Cutler <[email protected]> Authored: Thu Jan 25 01:48:11 2018 -0800 Committer: Felix Cheung <[email protected]> Committed: Thu Jan 25 01:48:34 2018 -0800 ---------------------------------------------------------------------- python/pyspark/ml/evaluation.py | 8 +++++++- python/pyspark/ml/feature.py | 2 +- python/pyspark/ml/fpm.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e66c66cd/python/pyspark/ml/evaluation.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index aa8dbe7..0cbce9b 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -334,7 +334,13 @@ class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol, .. note:: Experimental Evaluator for Clustering results, which expects two input - columns: prediction and features. + columns: prediction and features. The metric computes the Silhouette + measure using the squared Euclidean distance. + + The Silhouette is a measure for the validation of the consistency + within clusters. It ranges between 1 and -1, where a value close to + 1 means that the points in a cluster are close to the other points + in the same cluster and far from the points of the other clusters. >>> from pyspark.ml.linalg import Vectors >>> featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]), http://git-wip-us.apache.org/repos/asf/spark/blob/e66c66cd/python/pyspark/ml/feature.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index eb79b19..da85ba7 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -3440,7 +3440,7 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja selectorType = Param(Params._dummy(), "selectorType", "The selector type of the ChisqSelector. " + - "Supported options: numTopFeatures (default), percentile and fpr.", + "Supported options: numTopFeatures (default), percentile, fpr, fdr, fwe.", typeConverter=TypeConverters.toString) numTopFeatures = \ http://git-wip-us.apache.org/repos/asf/spark/blob/e66c66cd/python/pyspark/ml/fpm.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py index dd7dda5..b8dafd4 100644 --- a/python/pyspark/ml/fpm.py +++ b/python/pyspark/ml/fpm.py @@ -144,7 +144,7 @@ class FPGrowthModel(JavaModel, JavaMLWritable, JavaMLReadable): @since("2.2.0") def associationRules(self): """ - Data with three columns: + DataFrame with three columns: * `antecedent` - Array of the same type as the input column. * `consequent` - Array of the same type as the input column. * `confidence` - Confidence for the rule (`DoubleType`). --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
