Repository: spark Updated Branches: refs/heads/master 7278f792a -> 870b8a2ed
[SPARK-10706] [MLLIB] Add java wrapper for random vector rdd Add java wrapper for random vector rdd holdenk srowen Author: Meihua Wu <meihu...@umich.edu> Closes #8841 from rotationsymmetry/SPARK-10706. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/870b8a2e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/870b8a2e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/870b8a2e Branch: refs/heads/master Commit: 870b8a2edd44c9274c43ca0db4ef5b0998e16fd8 Parents: 7278f79 Author: Meihua Wu <meihu...@umich.edu> Authored: Tue Sep 22 11:05:24 2015 +0100 Committer: Sean Owen <so...@cloudera.com> Committed: Tue Sep 22 11:05:24 2015 +0100 ---------------------------------------------------------------------- .../apache/spark/mllib/random/RandomRDDs.scala | 42 ++++++++++++++++++++ .../spark/mllib/random/JavaRandomRDDsSuite.java | 17 ++++++++ 2 files changed, 59 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/870b8a2e/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala index f8ff26b..41d7c4d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala @@ -856,6 +856,48 @@ object RandomRDDs { } /** + * Java-friendly version of [[RandomRDDs#randomVectorRDD]]. + */ + @DeveloperApi + @Since("1.6.0") + def randomJavaVectorRDD( + jsc: JavaSparkContext, + generator: RandomDataGenerator[Double], + numRows: Long, + numCols: Int, + numPartitions: Int, + seed: Long): JavaRDD[Vector] = { + randomVectorRDD(jsc.sc, generator, numRows, numCols, numPartitions, seed).toJavaRDD() + } + + /** + * [[RandomRDDs#randomJavaVectorRDD]] with the default seed. + */ + @DeveloperApi + @Since("1.6.0") + def randomJavaVectorRDD( + jsc: JavaSparkContext, + generator: RandomDataGenerator[Double], + numRows: Long, + numCols: Int, + numPartitions: Int): JavaRDD[Vector] = { + randomVectorRDD(jsc.sc, generator, numRows, numCols, numPartitions).toJavaRDD() + } + + /** + * [[RandomRDDs#randomJavaVectorRDD]] with the default number of partitions and the default seed. + */ + @DeveloperApi + @Since("1.6.0") + def randomJavaVectorRDD( + jsc: JavaSparkContext, + generator: RandomDataGenerator[Double], + numRows: Long, + numCols: Int): JavaRDD[Vector] = { + randomVectorRDD(jsc.sc, generator, numRows, numCols).toJavaRDD() + } + + /** * Returns `numPartitions` if it is positive, or `sc.defaultParallelism` otherwise. */ private def numPartitionsOrDefault(sc: SparkContext, numPartitions: Int): Int = { http://git-wip-us.apache.org/repos/asf/spark/blob/870b8a2e/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java index fce5f67..5728df5 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java @@ -246,6 +246,23 @@ public class JavaRandomRDDsSuite { Assert.assertEquals(2, rdd.first().length()); } } + + @Test + @SuppressWarnings("unchecked") + public void testRandomVectorRDD() { + UniformGenerator generator = new UniformGenerator(); + long m = 100L; + int n = 10; + int p = 2; + long seed = 1L; + JavaRDD<Vector> rdd1 = randomJavaVectorRDD(sc, generator, m, n); + JavaRDD<Vector> rdd2 = randomJavaVectorRDD(sc, generator, m, n, p); + JavaRDD<Vector> rdd3 = randomJavaVectorRDD(sc, generator, m, n, p, seed); + for (JavaRDD<Vector> rdd: Arrays.asList(rdd1, rdd2, rdd3)) { + Assert.assertEquals(m, rdd.count()); + Assert.assertEquals(n, rdd.first().size()); + } + } } // This is just a test generator, it always returns a string of 42 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org