Repository: spark
Updated Branches:
  refs/heads/master db4d317cc -> 28bcb9e9e


[SPARK-6370][core] Documentation: Improve all 3 docs for RDD.sample

The docs for the `sample` method were insufficient, now less so.

Author: mbonaci <mbon...@gmail.com>

Closes #5097 from mbonaci/master and squashes the following commits:

a6a9d97 [mbonaci] [SPARK-6370][core] Documentation: Improve all 3 docs for 
RDD.sample method


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28bcb9e9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28bcb9e9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28bcb9e9

Branch: refs/heads/master
Commit: 28bcb9e9e86a4b643fbf96b2b7e03928ebcfc060
Parents: db4d317
Author: mbonaci <mbon...@gmail.com>
Authored: Fri Mar 20 18:30:45 2015 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Fri Mar 20 18:33:53 2015 +0000

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/api/java/JavaRDD.scala   | 11 +++++++++++
 core/src/main/scala/org/apache/spark/rdd/RDD.scala       |  6 ++++++
 python/pyspark/rdd.py                                    |  6 ++++++
 3 files changed, 23 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/28bcb9e9/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala 
b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index 645dc3b..3e9beb6 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -101,12 +101,23 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: 
ClassTag[T])
 
   /**
    * Return a sampled subset of this RDD.
+   * 
+   * @param withReplacement can elements be sampled multiple times (replaced 
when sampled out)
+   * @param fraction expected size of the sample as a fraction of this RDD's 
size
+   *  without replacement: probability that each element is chosen; fraction 
must be [0, 1]
+   *  with replacement: expected number of times each element is chosen; 
fraction must be >= 0
    */
   def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
     sample(withReplacement, fraction, Utils.random.nextLong)
     
   /**
    * Return a sampled subset of this RDD.
+   * 
+   * @param withReplacement can elements be sampled multiple times (replaced 
when sampled out)
+   * @param fraction expected size of the sample as a fraction of this RDD's 
size
+   *  without replacement: probability that each element is chosen; fraction 
must be [0, 1]
+   *  with replacement: expected number of times each element is chosen; 
fraction must be >= 0
+   * @param seed seed for the random number generator
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): 
JavaRDD[T] =
     wrapRDD(rdd.sample(withReplacement, fraction, seed))

http://git-wip-us.apache.org/repos/asf/spark/blob/28bcb9e9/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala 
b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index a139780..a4c74ed 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -377,6 +377,12 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Return a sampled subset of this RDD.
+   * 
+   * @param withReplacement can elements be sampled multiple times (replaced 
when sampled out)
+   * @param fraction expected size of the sample as a fraction of this RDD's 
size
+   *  without replacement: probability that each element is chosen; fraction 
must be [0, 1]
+   *  with replacement: expected number of times each element is chosen; 
fraction must be >= 0
+   * @param seed seed for the random number generator
    */
   def sample(withReplacement: Boolean,
       fraction: Double,

http://git-wip-us.apache.org/repos/asf/spark/blob/28bcb9e9/python/pyspark/rdd.py
----------------------------------------------------------------------
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index bf17f51..c337a43 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -346,6 +346,12 @@ class RDD(object):
         """
         Return a sampled subset of this RDD.
 
+        :param withReplacement: can elements be sampled multiple times 
(replaced when sampled out)
+        :param fraction: expected size of the sample as a fraction of this 
RDD's size
+            without replacement: probability that each element is chosen; 
fraction must be [0, 1]
+            with replacement: expected number of times each element is chosen; 
fraction must be >= 0
+        :param seed: seed for the random number generator
+
         >>> rdd = sc.parallelize(range(100), 4)
         >>> rdd.sample(False, 0.1, 81).count()
         10


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to