Repository: spark Updated Branches: refs/heads/master a67864249 -> 010c460d6
[SPARK-2061] Made splits deprecated in JavaRDDLike The jira for the issue can be found at: https://issues.apache.org/jira/browse/SPARK-2061 Most of spark has used over to consistently using `partitions` instead of `splits`. We should do likewise and add a `partitions` method to JavaRDDLike and have `splits` just call that. We should also go through all cases where other API's (e.g. Python) call `splits` and we should change those to use the newer API. Author: Anant <[email protected]> Closes #1062 from anantasty/SPARK-2061 and squashes the following commits: b83ce6b [Anant] Fixed syntax issue 21f9210 [Anant] Fixed version number in deprecation string 9315b76 [Anant] made related changes to use partitions in python api 8c62dd1 [Anant] Made splits deprecated in JavaRDDLike Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/010c460d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/010c460d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/010c460d Branch: refs/heads/master Commit: 010c460d627c1917dc47b09e59fd41172bbf90b3 Parents: a678642 Author: Anant <[email protected]> Authored: Fri Jun 20 18:54:00 2014 -0700 Committer: Patrick Wendell <[email protected]> Committed: Fri Jun 20 18:57:24 2014 -0700 ---------------------------------------------------------------------- core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala | 5 ++++- core/src/test/java/org/apache/spark/JavaAPISuite.java | 2 +- python/pyspark/context.py | 2 +- python/pyspark/rdd.py | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/010c460d/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index 330569a..f917cfd 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -43,8 +43,11 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { def rdd: RDD[T] - /** Set of partitions in this RDD. */ + @deprecated("Use partitions() instead.", "1.1.0") def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq) + + /** Set of partitions in this RDD. */ + def partitions: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq) /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */ def context: SparkContext = rdd.context http://git-wip-us.apache.org/repos/asf/spark/blob/010c460d/core/src/test/java/org/apache/spark/JavaAPISuite.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java index 761f2d6..1d7a7be 100644 --- a/core/src/test/java/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java @@ -741,7 +741,7 @@ public class JavaAPISuite implements Serializable { public void iterator() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2); TaskContext context = new TaskContext(0, 0, 0, false, new TaskMetrics()); - Assert.assertEquals(1, rdd.iterator(rdd.splits().get(0), context).next().intValue()); + Assert.assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue()); } @Test http://git-wip-us.apache.org/repos/asf/spark/blob/010c460d/python/pyspark/context.py ---------------------------------------------------------------------- diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 062bec2..95c54e7 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -704,7 +704,7 @@ class SparkContext(object): [0, 1, 16, 25] """ if partitions == None: - partitions = range(rdd._jrdd.splits().size()) + partitions = range(rdd._jrdd.partitions().size()) javaPartitions = ListConverter().convert(partitions, self._gateway._gateway_client) # Implementation note: This is implemented as a mapPartitions followed http://git-wip-us.apache.org/repos/asf/spark/blob/010c460d/python/pyspark/rdd.py ---------------------------------------------------------------------- diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 1d55c35..f64f48e 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -321,7 +321,7 @@ class RDD(object): >>> rdd.getNumPartitions() 2 """ - return self._jrdd.splits().size() + return self._jrdd.partitions().size() def filter(self, f): """ @@ -922,7 +922,7 @@ class RDD(object): [91, 92, 93] """ items = [] - totalParts = self._jrdd.splits().size() + totalParts = self._jrdd.partitions().size() partsScanned = 0 while len(items) < num and partsScanned < totalParts:
