Repository: spark Updated Branches: refs/heads/master ca1a7b9d6 -> 54040f8d3
[SPARK-13715][MLLIB] Remove last usages of jblas in tests ## What changes were proposed in this pull request? Remove last usage of jblas, in tests ## How was this patch tested? Jenkins tests -- the same ones that are being modified. Author: Sean Owen <so...@cloudera.com> Closes #11560 from srowen/SPARK-13715. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54040f8d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54040f8d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54040f8d Branch: refs/heads/master Commit: 54040f8d350d2aad3078dcffef808c62b7c0b73d Parents: ca1a7b9 Author: Sean Owen <so...@cloudera.com> Authored: Tue Mar 8 17:47:55 2016 +0000 Committer: Sean Owen <so...@cloudera.com> Committed: Tue Mar 8 17:47:55 2016 +0000 ---------------------------------------------------------------------- LICENSE | 1 - docs/mllib-data-types.md | 2 +- mllib/pom.xml | 6 -- .../mllib/recommendation/JavaALSSuite.java | 39 +++++----- .../regression/JavaRidgeRegressionSuite.java | 17 +++-- .../spark/mllib/classification/SVMSuite.scala | 7 +- .../spark/mllib/optimization/NNLSSuite.scala | 71 +++++++++--------- .../spark/mllib/recommendation/ALSSuite.scala | 76 ++++++++++---------- .../mllib/regression/RidgeRegressionSuite.scala | 8 +-- pom.xml | 1 - 10 files changed, 107 insertions(+), 121 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/LICENSE ---------------------------------------------------------------------- diff --git a/LICENSE b/LICENSE index 9b78f3b..3c6117f 100644 --- a/LICENSE +++ b/LICENSE @@ -237,7 +237,6 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core) (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model) - (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/) (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/) (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org) (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org) http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/docs/mllib-data-types.md ---------------------------------------------------------------------- diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md index 363dc7c..5e3ee47 100644 --- a/docs/mllib-data-types.md +++ b/docs/mllib-data-types.md @@ -11,7 +11,7 @@ MLlib supports local vectors and matrices stored on a single machine, as well as distributed matrices backed by one or more RDDs. Local vectors and local matrices are simple data models that serve as public interfaces. The underlying linear algebra operations are provided by -[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/). +[Breeze](http://www.scalanlp.org/). A training example used in supervised learning is called a "labeled point" in MLlib. ## Local vector http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/pom.xml ---------------------------------------------------------------------- diff --git a/mllib/pom.xml b/mllib/pom.xml index 816f3f6..428176d 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -63,12 +63,6 @@ <version>${project.version}</version> </dependency> <dependency> - <groupId>org.jblas</groupId> - <artifactId>jblas</artifactId> - <version>${jblas.version}</version> - <scope>test</scope> - </dependency> - <dependency> <groupId>org.scalanlp</groupId> <artifactId>breeze_${scala.binary.version}</artifactId> <version>0.11.2</version> http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java index a6631ed..d0bf7f5 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java @@ -24,7 +24,6 @@ import java.util.List; import scala.Tuple2; import scala.Tuple3; -import org.jblas.DoubleMatrix; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -48,14 +47,14 @@ public class JavaALSSuite implements Serializable { sc = null; } - void validatePrediction( + private void validatePrediction( MatrixFactorizationModel model, int users, int products, - DoubleMatrix trueRatings, + double[] trueRatings, double matchThreshold, boolean implicitPrefs, - DoubleMatrix truePrefs) { + double[] truePrefs) { List<Tuple2<Integer, Integer>> localUsersProducts = new ArrayList<>(users * products); for (int u=0; u < users; ++u) { for (int p=0; p < products; ++p) { @@ -68,7 +67,7 @@ public class JavaALSSuite implements Serializable { if (!implicitPrefs) { for (Rating r: predictedRatings) { double prediction = r.rating(); - double correct = trueRatings.get(r.user(), r.product()); + double correct = trueRatings[r.product() * users + r.user()]; Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f", prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold); } @@ -79,9 +78,9 @@ public class JavaALSSuite implements Serializable { double denom = 0.0; for (Rating r: predictedRatings) { double prediction = r.rating(); - double truePref = truePrefs.get(r.user(), r.product()); + double truePref = truePrefs[r.product() * users + r.user()]; double confidence = 1.0 + - /* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product())); + /* alpha = 1.0 * ... */ Math.abs(trueRatings[r.product() * users + r.user()]); double err = confidence * (truePref - prediction) * (truePref - prediction); sqErr += err; denom += confidence; @@ -98,8 +97,8 @@ public class JavaALSSuite implements Serializable { int iterations = 15; int users = 50; int products = 100; - Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList( - users, products, features, 0.7, false, false); + Tuple3<List<Rating>, double[], double[]> testData = + ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false); JavaRDD<Rating> data = sc.parallelize(testData._1()); MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations); @@ -112,8 +111,8 @@ public class JavaALSSuite implements Serializable { int iterations = 15; int users = 100; int products = 200; - Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList( - users, products, features, 0.7, false, false); + Tuple3<List<Rating>, double[], double[]> testData = + ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, false); JavaRDD<Rating> data = sc.parallelize(testData._1()); @@ -129,8 +128,8 @@ public class JavaALSSuite implements Serializable { int iterations = 15; int users = 80; int products = 160; - Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList( - users, products, features, 0.7, true, false); + Tuple3<List<Rating>, double[], double[]> testData = + ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false); JavaRDD<Rating> data = sc.parallelize(testData._1()); MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations); @@ -143,8 +142,8 @@ public class JavaALSSuite implements Serializable { int iterations = 15; int users = 100; int products = 200; - Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList( - users, products, features, 0.7, true, false); + Tuple3<List<Rating>, double[], double[]> testData = + ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, false); JavaRDD<Rating> data = sc.parallelize(testData._1()); @@ -161,8 +160,8 @@ public class JavaALSSuite implements Serializable { int iterations = 15; int users = 80; int products = 160; - Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList( - users, products, features, 0.7, true, true); + Tuple3<List<Rating>, double[], double[]> testData = + ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, true); JavaRDD<Rating> data = sc.parallelize(testData._1()); MatrixFactorizationModel model = new ALS().setRank(features) @@ -179,9 +178,9 @@ public class JavaALSSuite implements Serializable { int iterations = 10; int users = 200; int products = 50; - Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = ALSSuite.generateRatingsAsJavaList( - users, products, features, 0.7, true, false); - JavaRDD<Rating> data = sc.parallelize(testData._1()); + List<Rating> testData = ALSSuite.generateRatingsAsJava( + users, products, features, 0.7, true, false)._1(); + JavaRDD<Rating> data = sc.parallelize(testData); MatrixFactorizationModel model = new ALS().setRank(features) .setIterations(iterations) .setImplicitPrefs(true) http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java index 7266eec..c56db70 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java @@ -19,14 +19,13 @@ package org.apache.spark.mllib.regression; import java.io.Serializable; import java.util.List; +import java.util.Random; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.jblas.DoubleMatrix; - import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.util.LinearDataGenerator; @@ -45,7 +44,8 @@ public class JavaRidgeRegressionSuite implements Serializable { sc = null; } - double predictionError(List<LabeledPoint> validationData, RidgeRegressionModel model) { + private static double predictionError(List<LabeledPoint> validationData, + RidgeRegressionModel model) { double errorSum = 0; for (LabeledPoint point: validationData) { Double prediction = model.predict(point.features()); @@ -54,11 +54,14 @@ public class JavaRidgeRegressionSuite implements Serializable { return errorSum / validationData.size(); } - List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) { - org.jblas.util.Random.seed(42); + private static List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double std) { // Pick weights as random values distributed uniformly in [-0.5, 0.5] - DoubleMatrix w = DoubleMatrix.rand(numFeatures, 1).subi(0.5); - return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, numPoints, 42, std); + Random random = new Random(42); + double[] w = new double[numFeatures]; + for (int i = 0; i < w.length; i++) { + w[i] = random.nextDouble() - 0.5; + } + return LinearDataGenerator.generateLinearInputAsList(0.0, w, numPoints, 42, std); } @Test http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala index 1a47344..3676d9c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.classification import scala.collection.JavaConverters._ import scala.util.Random -import org.jblas.DoubleMatrix +import breeze.linalg.{DenseVector => BDV} import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.mllib.linalg.Vectors @@ -45,12 +45,11 @@ object SVMSuite { nPoints: Int, seed: Int): Seq[LabeledPoint] = { val rnd = new Random(seed) - val weightsMat = new DoubleMatrix(1, weights.length, weights: _*) + val weightsMat = new BDV(weights) val x = Array.fill[Array[Double]](nPoints)( Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0)) val y = x.map { xi => - val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) + - intercept + 0.01 * rnd.nextGaussian() + val yD = new BDV(xi).dot(weightsMat) + intercept + 0.01 * rnd.nextGaussian() if (yD < 0) 0.0 else 1.0 } y.zip(x).map(p => LabeledPoint(p._1, Vectors.dense(p._2))) http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala index d8f9b8c..4ec3dc0 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala @@ -19,28 +19,22 @@ package org.apache.spark.mllib.optimization import scala.util.Random -import org.jblas.{DoubleMatrix, SimpleBlas} +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.TestingUtils._ class NNLSSuite extends SparkFunSuite { /** Generate an NNLS problem whose optimal solution is the all-ones vector. */ - def genOnesData(n: Int, rand: Random): (DoubleMatrix, DoubleMatrix) = { - val A = new DoubleMatrix(n, n, Array.fill(n*n)(rand.nextDouble()): _*) - val b = A.mmul(DoubleMatrix.ones(n, 1)) - - val ata = A.transpose.mmul(A) - val atb = A.transpose.mmul(b) - - (ata, atb) + def genOnesData(n: Int, rand: Random): (BDM[Double], BDV[Double]) = { + val A = new BDM(n, n, Array.fill(n*n)(rand.nextDouble())) + val b = A * new BDV(Array.fill(n)(1.0)) + (A.t * A, A.t * b) } /** Compute the objective value */ - def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: DoubleMatrix): Double = { - val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x)) - res.get(0) - } + def computeObjectiveValue(ata: BDM[Double], atb: BDV[Double], x: BDV[Double]): Double = + (x.t * ata * x) / 2.0 - atb.dot(x) test("NNLS: exact solution cases") { val n = 20 @@ -54,12 +48,15 @@ class NNLSSuite extends SparkFunSuite { for (k <- 0 until 100) { val (ata, atb) = genOnesData(n, rand) - val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws)) + val x = new BDV(NNLS.solve(ata.data, atb.data, ws)) assert(x.length === n) - val answer = DoubleMatrix.ones(n, 1) - SimpleBlas.axpy(-1.0, answer, x) - val solved = (x.norm2 < 1e-2) && (x.normmax < 1e-3) - if (solved) numSolved = numSolved + 1 + val answer = new BDV(Array.fill(n)(1.0)) + val solved = + (breeze.linalg.norm(x - answer) < 0.01) && // L2 norm + ((x - answer).toArray.map(_.abs).max < 0.001) // inf norm + if (solved) { + numSolved += 1 + } } assert(numSolved > 50) @@ -67,20 +64,18 @@ class NNLSSuite extends SparkFunSuite { test("NNLS: nonnegativity constraint active") { val n = 5 - // scalastyle:off - val ata = new DoubleMatrix(Array( - Array( 4.377, -3.531, -1.306, -0.139, 3.418), - Array(-3.531, 4.344, 0.934, 0.305, -2.140), - Array(-1.306, 0.934, 2.644, -0.203, -0.170), - Array(-0.139, 0.305, -0.203, 5.883, 1.428), - Array( 3.418, -2.140, -0.170, 1.428, 4.684))) - // scalastyle:on - val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636)) + val ata = Array( + 4.377, -3.531, -1.306, -0.139, 3.418, + -3.531, 4.344, 0.934, 0.305, -2.140, + -1.306, 0.934, 2.644, -0.203, -0.170, + -0.139, 0.305, -0.203, 5.883, 1.428, + 3.418, -2.140, -0.170, 1.428, 4.684) + val atb = Array(-1.632, 2.115, 1.094, -1.025, -0.636) val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628) val ws = NNLS.createWorkspace(n) - val x = NNLS.solve(ata.data, atb.data, ws) + val x = NNLS.solve(ata, atb, ws) for (i <- 0 until n) { assert(x(i) ~== goodx(i) absTol 1E-3) assert(x(i) >= 0) @@ -89,23 +84,21 @@ class NNLSSuite extends SparkFunSuite { test("NNLS: objective value test") { val n = 5 - val ata = new DoubleMatrix(5, 5 - , 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283 - , 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884 - , -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049 - , 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819 - , -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814 - ) - val atb = new DoubleMatrix(5, 1, - -31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017) + val ata = new BDM(5, 5, Array( + 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283, + 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884, + -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049, + 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819, + -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814)) + val atb = new BDV(Array(-31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)) /** reference solution obtained from matlab function quadprog */ - val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627)) + val refx = new BDV(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627)) val refObj = computeObjectiveValue(ata, atb, refx) val ws = NNLS.createWorkspace(n) - val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws)) + val x = new BDV(NNLS.solve(ata.data, atb.data, ws)) val obj = computeObjectiveValue(ata, atb, x) assert(obj < refObj + 1E-5) http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala index 045135f..d9dc557 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala @@ -21,7 +21,7 @@ import scala.collection.JavaConverters._ import scala.math.abs import scala.util.Random -import org.jblas.DoubleMatrix +import breeze.linalg.{DenseMatrix => BDM} import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext @@ -29,16 +29,16 @@ import org.apache.spark.storage.StorageLevel object ALSSuite { - def generateRatingsAsJavaList( + def generateRatingsAsJava( users: Int, products: Int, features: Int, samplingRate: Double, implicitPrefs: Boolean, - negativeWeights: Boolean): (java.util.List[Rating], DoubleMatrix, DoubleMatrix) = { + negativeWeights: Boolean): (java.util.List[Rating], Array[Double], Array[Double]) = { val (sampledRatings, trueRatings, truePrefs) = - generateRatings(users, products, features, samplingRate, implicitPrefs) - (sampledRatings.asJava, trueRatings, truePrefs) + generateRatings(users, products, features, samplingRate, implicitPrefs, negativeWeights) + (sampledRatings.asJava, trueRatings.toArray, if (truePrefs == null) null else truePrefs.toArray) } def generateRatings( @@ -48,35 +48,36 @@ object ALSSuite { samplingRate: Double, implicitPrefs: Boolean = false, negativeWeights: Boolean = false, - negativeFactors: Boolean = true): (Seq[Rating], DoubleMatrix, DoubleMatrix) = { + negativeFactors: Boolean = true): (Seq[Rating], BDM[Double], BDM[Double]) = { val rand = new Random(42) // Create a random matrix with uniform values from -1 to 1 def randomMatrix(m: Int, n: Int) = { if (negativeFactors) { - new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): _*) + new BDM(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1)) } else { - new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble()): _*) + new BDM(m, n, Array.fill(m * n)(rand.nextDouble())) } } val userMatrix = randomMatrix(users, features) val productMatrix = randomMatrix(features, products) - val (trueRatings, truePrefs) = implicitPrefs match { - case true => + val (trueRatings, truePrefs) = + if (implicitPrefs) { // Generate raw values from [0,9], or if negativeWeights, from [-2,7] - val raw = new DoubleMatrix(users, products, + val raw = new BDM(users, products, Array.fill(users * products)( - (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble): _*) + (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble)) val prefs = - new DoubleMatrix(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0): _*) + new BDM(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0)) (raw, prefs) - case false => (userMatrix.mmul(productMatrix), null) - } + } else { + (userMatrix * productMatrix, null) + } val sampledRatings = { for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < samplingRate) - yield Rating(u, p, trueRatings.get(u, p)) + yield Rating(u, p, trueRatings(u, p)) } (sampledRatings, trueRatings, truePrefs) @@ -149,8 +150,8 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext { .setSeed(1) .setFinalRDDStorageLevel(storageLevel) .run(ratings) - assert(model.productFeatures.getStorageLevel == storageLevel); - assert(model.userFeatures.getStorageLevel == storageLevel); + assert(model.productFeatures.getStorageLevel == storageLevel) + assert(model.userFeatures.getStorageLevel == storageLevel) storageLevel = StorageLevel.DISK_ONLY model = new ALS() .setRank(5) @@ -160,8 +161,8 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext { .setSeed(1) .setFinalRDDStorageLevel(storageLevel) .run(ratings) - assert(model.productFeatures.getStorageLevel == storageLevel); - assert(model.userFeatures.getStorageLevel == storageLevel); + assert(model.productFeatures.getStorageLevel == storageLevel) + assert(model.userFeatures.getStorageLevel == storageLevel) } test("negative ids") { @@ -178,7 +179,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext { val u = r.user + 25 val p = r.product + 25 val v = r.rating - val error = v - correct.get(u, p) + val error = v - correct(u, p) assert(math.abs(error) < 0.4) } } @@ -197,7 +198,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext { * @param samplingRate what fraction of the user-product pairs are known * @param matchThreshold max difference allowed to consider a predicted rating correct * @param implicitPrefs flag to test implicit feedback - * @param bulkPredict flag to test bulk predicition + * @param bulkPredict flag to test bulk prediction * @param negativeWeights whether the generated data can contain negative values * @param numUserBlocks number of user blocks to partition users into * @param numProductBlocks number of product blocks to partition products into @@ -234,30 +235,31 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext { .setNonnegative(!negativeFactors) .run(sc.parallelize(sampledRatings)) - val predictedU = new DoubleMatrix(users, features) + val predictedU = new BDM[Double](users, features) for ((u, vec) <- model.userFeatures.collect(); i <- 0 until features) { - predictedU.put(u, i, vec(i)) + predictedU(u, i) = vec(i) } - val predictedP = new DoubleMatrix(products, features) + val predictedP = new BDM[Double](products, features) for ((p, vec) <- model.productFeatures.collect(); i <- 0 until features) { - predictedP.put(p, i, vec(i)) + predictedP(p, i) = vec(i) } - val predictedRatings = bulkPredict match { - case false => predictedU.mmul(predictedP.transpose) - case true => - val allRatings = new DoubleMatrix(users, products) + val predictedRatings = + if (bulkPredict) { + val allRatings = new BDM[Double](users, products) val usersProducts = for (u <- 0 until users; p <- 0 until products) yield (u, p) val userProductsRDD = sc.parallelize(usersProducts) model.predict(userProductsRDD).collect().foreach { elem => - allRatings.put(elem.user, elem.product, elem.rating) + allRatings(elem.user, elem.product) = elem.rating } allRatings - } + } else { + predictedU * predictedP.t + } if (!implicitPrefs) { for (u <- 0 until users; p <- 0 until products) { - val prediction = predictedRatings.get(u, p) - val correct = trueRatings.get(u, p) + val prediction = predictedRatings(u, p) + val correct = trueRatings(u, p) if (math.abs(prediction - correct) > matchThreshold) { fail(("Model failed to predict (%d, %d): %f vs %f\ncorr: %s\npred: %s\nU: %s\n P: %s") .format(u, p, correct, prediction, trueRatings, predictedRatings, predictedU, @@ -269,9 +271,9 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext { var sqErr = 0.0 var denom = 0.0 for (u <- 0 until users; p <- 0 until products) { - val prediction = predictedRatings.get(u, p) - val truePref = truePrefs.get(u, p) - val confidence = 1 + 1.0 * abs(trueRatings.get(u, p)) + val prediction = predictedRatings(u, p) + val truePref = truePrefs(u, p) + val confidence = 1.0 + abs(trueRatings(u, p)) val err = confidence * (truePref - prediction) * (truePref - prediction) sqErr += err denom += confidence http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala index a200e94..815be32 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala @@ -19,8 +19,6 @@ package org.apache.spark.mllib.regression import scala.util.Random -import org.jblas.DoubleMatrix - import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.{LinearDataGenerator, LocalClusterSparkContext, @@ -49,12 +47,12 @@ class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val numExamples = 50 val numFeatures = 20 - org.jblas.util.Random.seed(42) // Pick weights as random values distributed uniformly in [-0.5, 0.5] - val w = DoubleMatrix.rand(numFeatures, 1).subi(0.5) + val random = new Random(42) + val w = Array.fill(numFeatures)(random.nextDouble() - 0.5) // Use half of data for training and other half for validation - val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2 * numExamples, 42, 10.0) + val data = LinearDataGenerator.generateLinearInput(3.0, w, 2 * numExamples, 42, 10.0) val testData = data.take(numExamples) val validationData = data.takeRight(numExamples) http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index dccfd10..90f4672 100644 --- a/pom.xml +++ b/pom.xml @@ -144,7 +144,6 @@ <derby.version>10.10.1.1</derby.version> <parquet.version>1.7.0</parquet.version> <hive.parquet.version>1.6.0</hive.parquet.version> - <jblas.version>1.2.4</jblas.version> <jetty.version>8.1.14.v20131031</jetty.version> <orbit.version>3.0.0.v201112011016</orbit.version> <chill.version>0.7.4</chill.version> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org