Repository: spark
Updated Branches:
  refs/heads/master b8afe3052 -> 9032f7c0d


SPARK-1160: Deprecate toArray in RDD

https://spark-project.atlassian.net/browse/SPARK-1160

reported by @mateiz: "It's redundant with collect() and the name doesn't make 
sense in Java, where we return a List (we can't return an array due to the way 
Java generics work). It's also missing in Python."

In this patch, I deprecated the method and changed the source files using it by 
replacing toArray with collect() directly

Author: CodingCat <zhunans...@gmail.com>

Closes #105 from CodingCat/SPARK-1060 and squashes the following commits:

286f163 [CodingCat] deprecate in JavaRDDLike
ee17b4e [CodingCat] add message and since
2ff7319 [CodingCat] deprecate toArray in RDD


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9032f7c0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9032f7c0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9032f7c0

Branch: refs/heads/master
Commit: 9032f7c0d5f1ae7985a20d54ca04c297201aae85
Parents: b8afe30
Author: CodingCat <zhunans...@gmail.com>
Authored: Wed Mar 12 17:43:12 2014 -0700
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Wed Mar 12 17:43:12 2014 -0700

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala | 1 +
 .../src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala | 2 +-
 core/src/main/scala/org/apache/spark/rdd/RDD.scala             | 1 +
 core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala      | 4 ++--
 .../src/main/scala/org/apache/spark/examples/SparkALS.scala    | 4 ++--
 .../main/scala/org/apache/spark/examples/mllib/SparkSVD.scala  | 2 +-
 mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala   | 4 ++--
 .../test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala    | 6 +++---
 8 files changed, 13 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9032f7c0/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala 
b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index a89419b..3df68d4 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -283,6 +283,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends 
Serializable {
   /**
    * Return an array that contains all of the elements in this RDD.
    */
+  @deprecated("use collect", "1.0.0")
   def toArray(): JList[T] = collect()
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/9032f7c0/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala 
b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 2384c8f..b20ed99 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -423,7 +423,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: 
RDD[(K, V)])
    * Return the key-value pairs in this RDD to the master as a Map.
    */
   def collectAsMap(): Map[K, V] = {
-    val data = self.toArray()
+    val data = self.collect()
     val map = new mutable.HashMap[K, V]
     map.sizeHint(data.length)
     data.foreach { case (k, v) => map.put(k, v) }

http://git-wip-us.apache.org/repos/asf/spark/blob/9032f7c0/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala 
b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 4afa752..b50c996 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -658,6 +658,7 @@ abstract class RDD[T: ClassTag](
   /**
    * Return an array that contains all of the elements in this RDD.
    */
+  @deprecated("use collect", "1.0.0")
   def toArray(): Array[T] = collect()
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/9032f7c0/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala 
b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
index b50307c..4ceea55 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
@@ -26,13 +26,13 @@ import cern.jet.random.engine.DRand
 
 import org.apache.spark.{Partition, TaskContext}
 
-@deprecated("Replaced by PartitionwiseSampledRDDPartition", "1.0")
+@deprecated("Replaced by PartitionwiseSampledRDDPartition", "1.0.0")
 private[spark]
 class SampledRDDPartition(val prev: Partition, val seed: Int) extends 
Partition with Serializable {
   override val index: Int = prev.index
 }
 
-@deprecated("Replaced by PartitionwiseSampledRDD", "1.0")
+@deprecated("Replaced by PartitionwiseSampledRDD", "1.0.0")
 class SampledRDD[T: ClassTag](
     prev: RDD[T],
     withReplacement: Boolean,

http://git-wip-us.apache.org/repos/asf/spark/blob/9032f7c0/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala 
b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index 17bafc2..ce4b3c8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -128,11 +128,11 @@ object SparkALS {
       println("Iteration " + iter + ":")
       ms = sc.parallelize(0 until M, slices)
                 .map(i => update(i, msb.value(i), usb.value, Rc.value))
-                .toArray
+                .collect()
       msb = sc.broadcast(ms) // Re-broadcast ms because it was updated
       us = sc.parallelize(0 until U, slices)
                 .map(i => update(i, usb.value(i), msb.value, 
algebra.transpose(Rc.value)))
-                .toArray
+                .collect()
       usb = sc.broadcast(us) // Re-broadcast us because it was updated
       println("RMSE = " + rmse(R, ms, us))
       println()

http://git-wip-us.apache.org/repos/asf/spark/blob/9032f7c0/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
----------------------------------------------------------------------
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala 
b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
index 19676fc..ce2b133 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
@@ -54,6 +54,6 @@ object SparkSVD {
     val s = decomposed.S.data
     val v = decomposed.V.data
 
-    println("singular values = " + s.toArray.mkString)
+    println("singular values = " + s.collect().mkString)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/9032f7c0/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
index 8803c4c..e4a26ee 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
@@ -109,7 +109,7 @@ object SVD {
 
     // Construct jblas A^T A locally
     val ata = DoubleMatrix.zeros(n, n)
-    for (entry <- emits.toArray) {
+    for (entry <- emits.collect()) {
       ata.put(entry._1._1, entry._1._2, entry._2)
     }
 
@@ -178,7 +178,7 @@ object SVD {
     val s = decomposed.S.data
     val v = decomposed.V.data
     
-    println("Computed " + s.toArray.length + " singular values and vectors")
+    println("Computed " + s.collect().length + " singular values and vectors")
     u.saveAsTextFile(output_u)
     s.saveAsTextFile(output_s)
     v.saveAsTextFile(output_v)

http://git-wip-us.apache.org/repos/asf/spark/blob/9032f7c0/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
index 32f3f14..a923868 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
@@ -50,7 +50,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val m = matrix.m
     val n = matrix.n
     val ret = DoubleMatrix.zeros(m, n)
-    matrix.data.toArray.map(x => ret.put(x.i, x.j, x.mval))
+    matrix.data.collect().map(x => ret.put(x.i, x.j, x.mval))
     ret
   }
 
@@ -106,7 +106,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val u = decomposed.U
     val s = decomposed.S
     val v = decomposed.V
-    val retrank = s.data.toArray.length
+    val retrank = s.data.collect().length
 
     assert(retrank == 1, "rank returned not one")
 
@@ -139,7 +139,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val u = decomposed.U
     val s = decomposed.S
     val v = decomposed.V
-    val retrank = s.data.toArray.length
+    val retrank = s.data.collect().length
 
     val densea = getDenseMatrix(a)
     val svd = Singular.sparseSVD(densea)

Reply via email to