Repository: spark Updated Branches: refs/heads/master 828fab035 -> 4bab8f599
[SPARK-21856] Add probability and rawPrediction to MLPC for Python Probability and rawPrediction has been added to MultilayerPerceptronClassifier for Python Add unit test. Author: Chunsheng Ji <chunsheng...@gmail.com> Closes #19172 from chunshengji/SPARK-21856. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4bab8f59 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4bab8f59 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4bab8f59 Branch: refs/heads/master Commit: 4bab8f5996d94a468a40fde2961ebebafc393508 Parents: 828fab0 Author: Chunsheng Ji <chunsheng...@gmail.com> Authored: Mon Sep 11 16:52:48 2017 +0800 Committer: Yanbo Liang <yblia...@gmail.com> Committed: Mon Sep 11 16:52:48 2017 +0800 ---------------------------------------------------------------------- python/pyspark/ml/classification.py | 15 ++++++++++----- python/pyspark/ml/tests.py | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4bab8f59/python/pyspark/ml/classification.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index f0f42a3..aa747f3 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -1356,7 +1356,8 @@ class NaiveBayesModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaML @inherit_doc class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed, HasStepSize, HasSolver, - JavaMLWritable, JavaMLReadable): + JavaMLWritable, JavaMLReadable, HasProbabilityCol, + HasRawPredictionCol): """ Classifier trainer based on the Multilayer Perceptron. Each layer has sigmoid activation function, output layer has softmax. @@ -1425,11 +1426,13 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, @keyword_only def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, - solver="l-bfgs", initialWeights=None): + solver="l-bfgs", initialWeights=None, probabilityCol="probability", + rawPredicitionCol="rawPrediction"): """ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \ - solver="l-bfgs", initialWeights=None) + solver="l-bfgs", initialWeights=None, probabilityCol="probability", \ + rawPredicitionCol="rawPrediction") """ super(MultilayerPerceptronClassifier, self).__init__() self._java_obj = self._new_java_obj( @@ -1442,11 +1445,13 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, @since("1.6.0") def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, - solver="l-bfgs", initialWeights=None): + solver="l-bfgs", initialWeights=None, probabilityCol="probability", + rawPredicitionCol="rawPrediction"): """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \ - solver="l-bfgs", initialWeights=None) + solver="l-bfgs", initialWeights=None, probabilityCol="probability", \ + rawPredicitionCol="rawPrediction"): Sets params for MultilayerPerceptronClassifier. """ kwargs = self._input_kwargs http://git-wip-us.apache.org/repos/asf/spark/blob/4bab8f59/python/pyspark/ml/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 509698f..15d6c76 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -1655,6 +1655,26 @@ class LogisticRegressionTest(SparkSessionTestCase): np.allclose(model.interceptVector.toArray(), [-0.9057, -1.1392, -0.0033], atol=1E-4)) +class MultilayerPerceptronClassifierTest(SparkSessionTestCase): + + def test_raw_and_probability_prediction(self): + + data_path = "data/mllib/sample_multiclass_classification_data.txt" + df = self.spark.read.format("libsvm").load(data_path) + + mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[4, 5, 4, 3], + blockSize=128, seed=123) + model = mlp.fit(df) + test = self.sc.parallelize([Row(features=Vectors.dense(0.1, 0.1, 0.25, 0.25))]).toDF() + result = model.transform(test).head() + expected_prediction = 2.0 + expected_probability = [0.0, 0.0, 1.0] + expected_rawPrediction = [57.3955, -124.5462, 67.9943] + self.assertTrue(result.prediction, expected_prediction) + self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4)) + self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1E-4)) + + class FPGrowthTests(SparkSessionTestCase): def setUp(self): super(FPGrowthTests, self).setUp() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org