Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/2041#discussion_r16443540
--- Diff:
mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala ---
@@ -42,213 +42,165 @@ object RandomRDDs {
*
* @param sc SparkContext used to create the RDD.
* @param size Size of the RDD.
- * @param numPartitions Number of partitions in the RDD.
- * @param seed Seed for the RNG that generates the seed for the
generator in each partition.
+ * @param numPartitions Number of partitions in the RDD (default:
`sc.defaultParallelism`).
+ * @param seed Random seed (default: a random long integer).
* @return RDD[Double] comprised of i.i.d. samples ~ U[0.0, 1.0].
*/
- @Experimental
- def uniformRDD(sc: SparkContext, size: Long, numPartitions: Int, seed:
Long): RDD[Double] = {
+ def uniformRDD(
+ sc: SparkContext,
+ size: Long,
+ numPartitions: Int = 0,
+ seed: Long = Utils.random.nextLong()): RDD[Double] = {
val uniform = new UniformGenerator()
- randomRDD(sc, uniform, size, numPartitions, seed)
+ randomRDD(sc, uniform, size, numPartitionsOrDefault(sc,
numPartitions), seed)
}
/**
- * :: Experimental ::
- * Generates an RDD comprised of i.i.d. samples from the uniform
distribution on [0.0, 1.0].
- *
- * To transform the distribution in the generated RDD from U[0.0, 1.0]
to U[a, b], use
- * `RandomRDDGenerators.uniformRDD(sc, n, p).map(v => a + (b - a) * v)`.
- *
- * @param sc SparkContext used to create the RDD.
- * @param size Size of the RDD.
- * @param numPartitions Number of partitions in the RDD.
- * @return RDD[Double] comprised of i.i.d. samples ~ U[0.0, 1.0].
+ * Java-friendly version of [[RandomRDDs#uniformRDD]].
*/
- @Experimental
- def uniformRDD(sc: SparkContext, size: Long, numPartitions: Int):
RDD[Double] = {
- uniformRDD(sc, size, numPartitions, Utils.random.nextLong)
+ def uniformJavaRDD(
+ jsc: JavaSparkContext,
+ size: Long,
+ numPartitions: Int,
+ seed: Long): JavaDoubleRDD = {
+ JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size, numPartitions, seed))
}
/**
- * :: Experimental ::
- * Generates an RDD comprised of i.i.d. samples from the uniform
distribution on [0.0, 1.0].
- * sc.defaultParallelism used for the number of partitions in the RDD.
- *
- * To transform the distribution in the generated RDD from U[0.0, 1.0]
to U[a, b], use
- * `RandomRDDGenerators.uniformRDD(sc, n).map(v => a + (b - a) * v)`.
- *
- * @param sc SparkContext used to create the RDD.
- * @param size Size of the RDD.
- * @return RDD[Double] comprised of i.i.d. samples ~ U[0.0, 1.0].
+ * [[RandomRDDs#uniformJavaRDD]] with the default seed.
*/
- @Experimental
- def uniformRDD(sc: SparkContext, size: Long): RDD[Double] = {
- uniformRDD(sc, size, sc.defaultParallelism, Utils.random.nextLong)
+ def uniformJavaRDD(jsc: JavaSparkContext, size: Long, numPartitions:
Int): JavaDoubleRDD = {
+ JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size, numPartitions))
}
/**
- * :: Experimental ::
- * Generates an RDD comprised of i.i.d. samples from the standard normal
distribution.
- *
- * To transform the distribution in the generated RDD from standard
normal to some other normal
- * N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p,
seed).map(v => mean + sigma * v)`.
- *
- * @param sc SparkContext used to create the RDD.
- * @param size Size of the RDD.
- * @param numPartitions Number of partitions in the RDD.
- * @param seed Seed for the RNG that generates the seed for the
generator in each partition.
- * @return RDD[Double] comprised of i.i.d. samples ~ N(0.0, 1.0).
+ * [[RandomRDDs#uniformJavaRDD]] with the default number of partitions
and the default seed.
*/
- @Experimental
- def normalRDD(sc: SparkContext, size: Long, numPartitions: Int, seed:
Long): RDD[Double] = {
- val normal = new StandardNormalGenerator()
- randomRDD(sc, normal, size, numPartitions, seed)
+ def uniformJavaRDD(jsc: JavaSparkContext, size: Long): JavaDoubleRDD = {
+ JavaDoubleRDD.fromRDD(uniformRDD(jsc.sc, size))
}
/**
- * :: Experimental ::
* Generates an RDD comprised of i.i.d. samples from the standard normal
distribution.
*
* To transform the distribution in the generated RDD from standard
normal to some other normal
- * N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p).map(v =>
mean + sigma * v)`.
+ * N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p,
seed).map(v => mean + sigma * v)`.
--- End diff --
Good catch!
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]