spark git commit: [SPARK-11820][ML][PYSPARK] PySpark LiR & LoR should support weightCol

meng Wed, 18 Nov 2015 13:32:31 -0800

Repository: spark
Updated Branches:
  refs/heads/master e222d7584 -> 603a721c2



[SPARK-11820][ML][PYSPARK] PySpark LiR & LoR should support weightCol

[SPARK-7685](https://issues.apache.org/jira/browse/SPARK-7685) and 
[SPARK-9642](https://issues.apache.org/jira/browse/SPARK-9642) have already 
supported setting weight column for ```LogisticRegression``` and 
```LinearRegression```. It's a very important feature, PySpark should also 
support. mengxr

Author: Yanbo Liang <yblia...@gmail.com>

Closes #9811 from yanboliang/spark-11820.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/603a721c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/603a721c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/603a721c

Branch: refs/heads/master
Commit: 603a721c21488e17c15c45ce1de893e6b3d02274
Parents: e222d75
Author: Yanbo Liang <yblia...@gmail.com>
Authored: Wed Nov 18 13:32:06 2015 -0800
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Wed Nov 18 13:32:06 2015 -0800

----------------------------------------------------------------------
 python/pyspark/ml/classification.py | 17 +++++++++--------
 python/pyspark/ml/regression.py     | 16 ++++++++--------
 2 files changed, 17 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/603a721c/python/pyspark/ml/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/classification.py 
b/python/pyspark/ml/classification.py
index 603f2c7..4a2982e 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -36,7 +36,8 @@ __all__ = ['LogisticRegression', 'LogisticRegressionModel', 
'DecisionTreeClassif
 @inherit_doc
 class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, 
HasPredictionCol, HasMaxIter,
                          HasRegParam, HasTol, HasProbabilityCol, 
HasRawPredictionCol,
-                         HasElasticNetParam, HasFitIntercept, 
HasStandardization, HasThresholds):
+                         HasElasticNetParam, HasFitIntercept, 
HasStandardization, HasThresholds,
+                         HasWeightCol):
     """
     Logistic regression.
     Currently, this class only supports binary classification.
@@ -44,9 +45,9 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredicti
     >>> from pyspark.sql import Row
     >>> from pyspark.mllib.linalg import Vectors
     >>> df = sc.parallelize([
-    ...     Row(label=1.0, features=Vectors.dense(1.0)),
-    ...     Row(label=0.0, features=Vectors.sparse(1, [], []))]).toDF()
-    >>> lr = LogisticRegression(maxIter=5, regParam=0.01)
+    ...     Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
+    ...     Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], 
[]))]).toDF()
+    >>> lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight")
     >>> model = lr.fit(df)
     >>> model.weights
     DenseVector([5.5...])
@@ -80,12 +81,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredicti
     def __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
                  maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, 
fitIntercept=True,
                  threshold=0.5, thresholds=None, probabilityCol="probability",
-                 rawPredictionCol="rawPrediction", standardization=True):
+                 rawPredictionCol="rawPrediction", standardization=True, 
weightCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction", \
                  maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, 
fitIntercept=True, \
                  threshold=0.5, thresholds=None, probabilityCol="probability", 
\
-                 rawPredictionCol="rawPrediction", standardization=True)
+                 rawPredictionCol="rawPrediction", standardization=True, 
weightCol=None)
         If the threshold and thresholds Params are both set, they must be 
equivalent.
         """
         super(LogisticRegression, self).__init__()
@@ -105,12 +106,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredicti
     def setParams(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
                   maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, 
fitIntercept=True,
                   threshold=0.5, thresholds=None, probabilityCol="probability",
-                  rawPredictionCol="rawPrediction", standardization=True):
+                  rawPredictionCol="rawPrediction", standardization=True, 
weightCol=None):
         """
         setParams(self, featuresCol="features", labelCol="label", 
predictionCol="prediction", \
                   maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, 
fitIntercept=True, \
                   threshold=0.5, thresholds=None, 
probabilityCol="probability", \
-                  rawPredictionCol="rawPrediction", standardization=True)
+                  rawPredictionCol="rawPrediction", standardization=True, 
weightCol=None)
         Sets params for logistic regression.
         If the threshold and thresholds Params are both set, they must be 
equivalent.
         """

http://git-wip-us.apache.org/repos/asf/spark/blob/603a721c/python/pyspark/ml/regression.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 7648bf1..944e648 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -35,7 +35,7 @@ __all__ = ['AFTSurvivalRegression', 
'AFTSurvivalRegressionModel',
 @inherit_doc
 class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, 
HasPredictionCol, HasMaxIter,
                        HasRegParam, HasTol, HasElasticNetParam, 
HasFitIntercept,
-                       HasStandardization, HasSolver):
+                       HasStandardization, HasSolver, HasWeightCol):
     """
     Linear regression.
 
@@ -50,9 +50,9 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPrediction
 
     >>> from pyspark.mllib.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
-    ...     (1.0, Vectors.dense(1.0)),
-    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
-    >>> lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal")
+    ...     (1.0, 2.0, Vectors.dense(1.0)),
+    ...     (0.0, 2.0, Vectors.sparse(1, [], []))], ["label", "weight", 
"features"])
+    >>> lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", 
weightCol="weight")
     >>> model = lr.fit(df)
     >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], 
["features"])
     >>> abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
@@ -75,11 +75,11 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPrediction
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, 
fitIntercept=True,
-                 standardization=True, solver="auto"):
+                 standardization=True, solver="auto", weightCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction", \
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, 
fitIntercept=True, \
-                 standardization=True, solver="auto")
+                 standardization=True, solver="auto", weightCol=None)
         """
         super(LinearRegression, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -92,11 +92,11 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPrediction
     @since("1.4.0")
     def setParams(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, 
fitIntercept=True,
-                  standardization=True, solver="auto"):
+                  standardization=True, solver="auto", weightCol=None):
         """
         setParams(self, featuresCol="features", labelCol="label", 
predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, 
fitIntercept=True, \
-                  standardization=True, solver="auto")
+                  standardization=True, solver="auto", weightCol=None)
         Sets params for linear regression.
         """
         kwargs = self.setParams._input_kwargs


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11820][ML][PYSPARK] PySpark LiR & LoR should support weightCol

Reply via email to