Repository: spark
Updated Branches:
  refs/heads/master 569788a55 -> e9746f87d


[SPARK-18133][EXAMPLES][ML] Python ML Pipeline Example has syntax e…

## What changes were proposed in this pull request?

In Python 3, there is only one integer type (i.e., int), which mostly behaves 
like the long type in Python 2. Since Python 3 won't accept "L", so removed "L" 
in all examples.

## How was this patch tested?

Unit tests.

…rrors]

Author: Jagadeesan <a...@us.ibm.com>

Closes #15660 from jagadeesanas2/SPARK-18133.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9746f87
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9746f87
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9746f87

Branch: refs/heads/master
Commit: e9746f87d0b553b8115948acb79f7e32c23dfd86
Parents: 569788a
Author: Jagadeesan <a...@us.ibm.com>
Authored: Fri Oct 28 02:26:55 2016 -0700
Committer: Yanbo Liang <yblia...@gmail.com>
Committed: Fri Oct 28 02:26:55 2016 -0700

----------------------------------------------------------------------
 examples/src/main/python/ml/cross_validator.py      |  8 ++++----
 .../src/main/python/ml/gaussian_mixture_example.py  |  2 +-
 examples/src/main/python/ml/pipeline_example.py     | 16 ++++++++--------
 .../mllib/binary_classification_metrics_example.py  |  2 +-
 .../python/mllib/multi_class_metrics_example.py     |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e9746f87/examples/src/main/python/ml/cross_validator.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/cross_validator.py 
b/examples/src/main/python/ml/cross_validator.py
index 907eec6..db70543 100644
--- a/examples/src/main/python/ml/cross_validator.py
+++ b/examples/src/main/python/ml/cross_validator.py
@@ -84,10 +84,10 @@ if __name__ == "__main__":
 
     # Prepare test documents, which are unlabeled.
     test = spark.createDataFrame([
-        (4L, "spark i j k"),
-        (5L, "l m n"),
-        (6L, "mapreduce spark"),
-        (7L, "apache hadoop")
+        (4, "spark i j k"),
+        (5, "l m n"),
+        (6, "mapreduce spark"),
+        (7, "apache hadoop")
     ], ["id", "text"])
 
     # Make predictions on test documents. cvModel uses the best model found 
(lrModel).

http://git-wip-us.apache.org/repos/asf/spark/blob/e9746f87/examples/src/main/python/ml/gaussian_mixture_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/gaussian_mixture_example.py 
b/examples/src/main/python/ml/gaussian_mixture_example.py
index 8ad450b..e4a0d31 100644
--- a/examples/src/main/python/ml/gaussian_mixture_example.py
+++ b/examples/src/main/python/ml/gaussian_mixture_example.py
@@ -38,7 +38,7 @@ if __name__ == "__main__":
     # loads data
     dataset = 
spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
 
-    gmm = GaussianMixture().setK(2).setSeed(538009335L)
+    gmm = GaussianMixture().setK(2).setSeed(538009335)
     model = gmm.fit(dataset)
 
     print("Gaussians shown as a DataFrame: ")

http://git-wip-us.apache.org/repos/asf/spark/blob/e9746f87/examples/src/main/python/ml/pipeline_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/pipeline_example.py 
b/examples/src/main/python/ml/pipeline_example.py
index f63e4db..e1fab7c 100644
--- a/examples/src/main/python/ml/pipeline_example.py
+++ b/examples/src/main/python/ml/pipeline_example.py
@@ -35,10 +35,10 @@ if __name__ == "__main__":
     # $example on$
     # Prepare training documents from a list of (id, text, label) tuples.
     training = spark.createDataFrame([
-        (0L, "a b c d e spark", 1.0),
-        (1L, "b d", 0.0),
-        (2L, "spark f g h", 1.0),
-        (3L, "hadoop mapreduce", 0.0)
+        (0, "a b c d e spark", 1.0),
+        (1, "b d", 0.0),
+        (2, "spark f g h", 1.0),
+        (3, "hadoop mapreduce", 0.0)
     ], ["id", "text", "label"])
 
     # Configure an ML pipeline, which consists of three stages: tokenizer, 
hashingTF, and lr.
@@ -52,10 +52,10 @@ if __name__ == "__main__":
 
     # Prepare test documents, which are unlabeled (id, text) tuples.
     test = spark.createDataFrame([
-        (4L, "spark i j k"),
-        (5L, "l m n"),
-        (6L, "spark hadoop spark"),
-        (7L, "apache hadoop")
+        (4, "spark i j k"),
+        (5, "l m n"),
+        (6, "spark hadoop spark"),
+        (7, "apache hadoop")
     ], ["id", "text"])
 
     # Make predictions on test documents and print columns of interest.

http://git-wip-us.apache.org/repos/asf/spark/blob/e9746f87/examples/src/main/python/mllib/binary_classification_metrics_example.py
----------------------------------------------------------------------
diff --git 
a/examples/src/main/python/mllib/binary_classification_metrics_example.py 
b/examples/src/main/python/mllib/binary_classification_metrics_example.py
index daf000e..91f8378 100644
--- a/examples/src/main/python/mllib/binary_classification_metrics_example.py
+++ b/examples/src/main/python/mllib/binary_classification_metrics_example.py
@@ -39,7 +39,7 @@ if __name__ == "__main__":
         .rdd.map(lambda row: LabeledPoint(row[0], row[1]))
 
     # Split data into training (60%) and test (40%)
-    training, test = data.randomSplit([0.6, 0.4], seed=11L)
+    training, test = data.randomSplit([0.6, 0.4], seed=11)
     training.cache()
 
     # Run training algorithm to build the model

http://git-wip-us.apache.org/repos/asf/spark/blob/e9746f87/examples/src/main/python/mllib/multi_class_metrics_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/mllib/multi_class_metrics_example.py 
b/examples/src/main/python/mllib/multi_class_metrics_example.py
index cd56b3c..7dc5fb4 100644
--- a/examples/src/main/python/mllib/multi_class_metrics_example.py
+++ b/examples/src/main/python/mllib/multi_class_metrics_example.py
@@ -32,7 +32,7 @@ if __name__ == "__main__":
     data = MLUtils.loadLibSVMFile(sc, 
"data/mllib/sample_multiclass_classification_data.txt")
 
     # Split data into training (60%) and test (40%)
-    training, test = data.randomSplit([0.6, 0.4], seed=11L)
+    training, test = data.randomSplit([0.6, 0.4], seed=11)
     training.cache()
 
     # Run training algorithm to build the model


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to