Repository: spark Updated Branches: refs/heads/master bba5d7999 -> 99f3c8277
[SPARK-14615][ML][FOLLOWUP] Fix Python examples to use the new ML Vector and Matrix APIs in the ML pipeline based algorithms ## What changes were proposed in this pull request? This PR fixes Python examples to use the new ML Vector and Matrix APIs in the ML pipeline based algorithms. I firstly executed this shell command, `grep -r "from pyspark.mllib" .` and then executed them all. Some of tests in `ml` produced the error messages as below: ``` pyspark.sql.utils.IllegalArgumentException: u'requirement failed: Input type must be VectorUDT but got org.apache.spark.mllib.linalg.VectorUDTf71b0bce.' ``` So, I fixed them to use new ones just identically with some Python tests fixed in https://github.com/apache/spark/pull/12627 ## How was this patch tested? Manually tested for all the examples listed by `grep -r "from pyspark.mllib" .`. Author: hyukjinkwon <[email protected]> Closes #13393 from HyukjinKwon/SPARK-14615. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/99f3c827 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/99f3c827 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/99f3c827 Branch: refs/heads/master Commit: 99f3c82776fe5ea4f89a9965a288c7447585dc2c Parents: bba5d79 Author: hyukjinkwon <[email protected]> Authored: Fri Jun 10 18:29:26 2016 -0700 Committer: Joseph K. Bradley <[email protected]> Committed: Fri Jun 10 18:29:26 2016 -0700 ---------------------------------------------------------------------- .../main/python/ml/aft_survival_regression.py | 2 +- .../src/main/python/ml/chisq_selector_example.py | 2 +- examples/src/main/python/ml/dct_example.py | 2 +- .../python/ml/elementwise_product_example.py | 2 +- .../ml/estimator_transformer_param_example.py | 2 +- examples/src/main/python/ml/pca_example.py | 2 +- .../python/ml/polynomial_expansion_example.py | 2 +- .../src/main/python/ml/simple_params_example.py | 19 +++++++++---------- .../main/python/ml/vector_assembler_example.py | 2 +- .../src/main/python/ml/vector_slicer_example.py | 2 +- 10 files changed, 18 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/aft_survival_regression.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/aft_survival_regression.py b/examples/src/main/python/ml/aft_survival_regression.py index 9879679..060f017 100644 --- a/examples/src/main/python/ml/aft_survival_regression.py +++ b/examples/src/main/python/ml/aft_survival_regression.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.regression import AFTSurvivalRegression -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/chisq_selector_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/chisq_selector_example.py b/examples/src/main/python/ml/chisq_selector_example.py index 8bafb94..5e19ef1 100644 --- a/examples/src/main/python/ml/chisq_selector_example.py +++ b/examples/src/main/python/ml/chisq_selector_example.py @@ -20,7 +20,7 @@ from __future__ import print_function from pyspark.sql import SparkSession # $example on$ from pyspark.ml.feature import ChiSqSelector -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ if __name__ == "__main__": http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/dct_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/dct_example.py b/examples/src/main/python/ml/dct_example.py index e36fcde..a4f25df 100644 --- a/examples/src/main/python/ml/dct_example.py +++ b/examples/src/main/python/ml/dct_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import DCT -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/elementwise_product_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/elementwise_product_example.py b/examples/src/main/python/ml/elementwise_product_example.py index 41727ed..598deae 100644 --- a/examples/src/main/python/ml/elementwise_product_example.py +++ b/examples/src/main/python/ml/elementwise_product_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import ElementwiseProduct -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/estimator_transformer_param_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py index 0fcae0e..3bd3fd3 100644 --- a/examples/src/main/python/ml/estimator_transformer_param_example.py +++ b/examples/src/main/python/ml/estimator_transformer_param_example.py @@ -20,7 +20,7 @@ Estimator Transformer Param Example. """ # $example on$ -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors from pyspark.ml.classification import LogisticRegression # $example off$ from pyspark.sql import SparkSession http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/pca_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/pca_example.py b/examples/src/main/python/ml/pca_example.py index f1b3cde..414629f 100644 --- a/examples/src/main/python/ml/pca_example.py +++ b/examples/src/main/python/ml/pca_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import PCA -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/polynomial_expansion_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py index 08882bc..9475e33 100644 --- a/examples/src/main/python/ml/polynomial_expansion_example.py +++ b/examples/src/main/python/ml/polynomial_expansion_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import PolynomialExpansion -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/simple_params_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/simple_params_example.py b/examples/src/main/python/ml/simple_params_example.py index c57e59d..54fbc2c 100644 --- a/examples/src/main/python/ml/simple_params_example.py +++ b/examples/src/main/python/ml/simple_params_example.py @@ -21,9 +21,8 @@ import pprint import sys from pyspark.ml.classification import LogisticRegression -from pyspark.mllib.linalg import DenseVector -from pyspark.mllib.regression import LabeledPoint -from pyspark.sql import SparkSession +from pyspark.ml.linalg import DenseVector +from pyspark.sql import Row, SparkSession """ A simple example demonstrating ways to specify parameters for Estimators and Transformers. @@ -42,10 +41,10 @@ if __name__ == "__main__": # A LabeledPoint is an Object with two fields named label and features # and Spark SQL identifies these fields and creates the schema appropriately. training = spark.createDataFrame([ - LabeledPoint(1.0, DenseVector([0.0, 1.1, 0.1])), - LabeledPoint(0.0, DenseVector([2.0, 1.0, -1.0])), - LabeledPoint(0.0, DenseVector([2.0, 1.3, 1.0])), - LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))]) + Row(label=1.0, features=DenseVector([0.0, 1.1, 0.1])), + Row(label=0.0, features=DenseVector([2.0, 1.0, -1.0])), + Row(label=0.0, features=DenseVector([2.0, 1.3, 1.0])), + Row(label=1.0, features=DenseVector([0.0, 1.2, -0.5]))]) # Create a LogisticRegression instance with maxIter = 10. # This instance is an Estimator. @@ -77,9 +76,9 @@ if __name__ == "__main__": # prepare test data. test = spark.createDataFrame([ - LabeledPoint(1.0, DenseVector([-1.0, 1.5, 1.3])), - LabeledPoint(0.0, DenseVector([3.0, 2.0, -0.1])), - LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))]) + Row(label=1.0, features=DenseVector([-1.0, 1.5, 1.3])), + Row(label=0.0, features=DenseVector([3.0, 2.0, -0.1])), + Row(label=0.0, features=DenseVector([0.0, 2.2, -1.5]))]) # Make predictions on test data using the Transformer.transform() method. # LogisticRegressionModel.transform will only use the 'features' column. http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/vector_assembler_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py index b955ff0..bbfc316 100644 --- a/examples/src/main/python/ml/vector_assembler_example.py +++ b/examples/src/main/python/ml/vector_assembler_example.py @@ -18,7 +18,7 @@ from __future__ import print_function # $example on$ -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors from pyspark.ml.feature import VectorAssembler # $example off$ from pyspark.sql import SparkSession http://git-wip-us.apache.org/repos/asf/spark/blob/99f3c827/examples/src/main/python/ml/vector_slicer_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/vector_slicer_example.py b/examples/src/main/python/ml/vector_slicer_example.py index b833a89..d2f46b1 100644 --- a/examples/src/main/python/ml/vector_slicer_example.py +++ b/examples/src/main/python/ml/vector_slicer_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import VectorSlicer -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors from pyspark.sql.types import Row # $example off$ from pyspark.sql import SparkSession --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
