spark git commit: [SPARK-21856] Add probability and rawPrediction to MLPC for Python

yliang Mon, 11 Sep 2017 01:53:11 -0700

Repository: spark
Updated Branches:
  refs/heads/master 828fab035 -> 4bab8f599



[SPARK-21856] Add probability and rawPrediction to MLPC for Python

Probability and rawPrediction has been added to MultilayerPerceptronClassifier 
for Python

Add unit test.

Author: Chunsheng Ji <[email protected]>

Closes #19172 from chunshengji/SPARK-21856.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4bab8f59
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4bab8f59
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4bab8f59

Branch: refs/heads/master
Commit: 4bab8f5996d94a468a40fde2961ebebafc393508
Parents: 828fab0
Author: Chunsheng Ji <[email protected]>
Authored: Mon Sep 11 16:52:48 2017 +0800
Committer: Yanbo Liang <[email protected]>
Committed: Mon Sep 11 16:52:48 2017 +0800

----------------------------------------------------------------------
 python/pyspark/ml/classification.py | 15 ++++++++++-----
 python/pyspark/ml/tests.py          | 20 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4bab8f59/python/pyspark/ml/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/classification.py 
b/python/pyspark/ml/classification.py
index f0f42a3..aa747f3 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1356,7 +1356,8 @@ class NaiveBayesModel(JavaModel, JavaClassificationModel, 
JavaMLWritable, JavaML
 @inherit_doc
 class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredictionCol,
                                      HasMaxIter, HasTol, HasSeed, HasStepSize, 
HasSolver,
-                                     JavaMLWritable, JavaMLReadable):
+                                     JavaMLWritable, JavaMLReadable, 
HasProbabilityCol,
+                                     HasRawPredictionCol):
     """
     Classifier trainer based on the Multilayer Perceptron.
     Each layer has sigmoid activation function, output layer has softmax.
@@ -1425,11 +1426,13 @@ class MultilayerPerceptronClassifier(JavaEstimator, 
HasFeaturesCol, HasLabelCol,
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, 
stepSize=0.03,
-                 solver="l-bfgs", initialWeights=None):
+                 solver="l-bfgs", initialWeights=None, 
probabilityCol="probability",
+                 rawPredicitionCol="rawPrediction"):
         """
         __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction", \
                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, 
stepSize=0.03, \
-                 solver="l-bfgs", initialWeights=None)
+                 solver="l-bfgs", initialWeights=None, 
probabilityCol="probability", \
+                 rawPredicitionCol="rawPrediction")
         """
         super(MultilayerPerceptronClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1442,11 +1445,13 @@ class MultilayerPerceptronClassifier(JavaEstimator, 
HasFeaturesCol, HasLabelCol,
     @since("1.6.0")
     def setParams(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
                   maxIter=100, tol=1e-6, seed=None, layers=None, 
blockSize=128, stepSize=0.03,
-                  solver="l-bfgs", initialWeights=None):
+                  solver="l-bfgs", initialWeights=None, 
probabilityCol="probability",
+                  rawPredicitionCol="rawPrediction"):
         """
         setParams(self, featuresCol="features", labelCol="label", 
predictionCol="prediction", \
                   maxIter=100, tol=1e-6, seed=None, layers=None, 
blockSize=128, stepSize=0.03, \
-                  solver="l-bfgs", initialWeights=None)
+                  solver="l-bfgs", initialWeights=None, 
probabilityCol="probability", \
+                  rawPredicitionCol="rawPrediction"):
         Sets params for MultilayerPerceptronClassifier.
         """
         kwargs = self._input_kwargs

http://git-wip-us.apache.org/repos/asf/spark/blob/4bab8f59/python/pyspark/ml/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 509698f..15d6c76 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1655,6 +1655,26 @@ class LogisticRegressionTest(SparkSessionTestCase):
             np.allclose(model.interceptVector.toArray(), [-0.9057, -1.1392, 
-0.0033], atol=1E-4))
 
 
+class MultilayerPerceptronClassifierTest(SparkSessionTestCase):
+
+    def test_raw_and_probability_prediction(self):
+
+        data_path = "data/mllib/sample_multiclass_classification_data.txt"
+        df = self.spark.read.format("libsvm").load(data_path)
+
+        mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[4, 5, 4, 3],
+                                             blockSize=128, seed=123)
+        model = mlp.fit(df)
+        test = self.sc.parallelize([Row(features=Vectors.dense(0.1, 0.1, 0.25, 
0.25))]).toDF()
+        result = model.transform(test).head()
+        expected_prediction = 2.0
+        expected_probability = [0.0, 0.0, 1.0]
+        expected_rawPrediction = [57.3955, -124.5462, 67.9943]
+        self.assertTrue(result.prediction, expected_prediction)
+        self.assertTrue(np.allclose(result.probability, expected_probability, 
atol=1E-4))
+        self.assertTrue(np.allclose(result.rawPrediction, 
expected_rawPrediction, atol=1E-4))
+
+
 class FPGrowthTests(SparkSessionTestCase):
     def setUp(self):
         super(FPGrowthTests, self).setUp()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-21856] Add probability and rawPrediction to MLPC for Python

Reply via email to