spark git commit: [SPARK-13715][MLLIB] Remove last usages of jblas in tests

srowen Tue, 08 Mar 2016 09:48:39 -0800

Repository: spark
Updated Branches:
  refs/heads/master ca1a7b9d6 -> 54040f8d3



[SPARK-13715][MLLIB] Remove last usages of jblas in tests

## What changes were proposed in this pull request?

Remove last usage of jblas, in tests

## How was this patch tested?

Jenkins tests -- the same ones that are being modified.

Author: Sean Owen <so...@cloudera.com>

Closes #11560 from srowen/SPARK-13715.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54040f8d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54040f8d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54040f8d

Branch: refs/heads/master
Commit: 54040f8d350d2aad3078dcffef808c62b7c0b73d
Parents: ca1a7b9
Author: Sean Owen <so...@cloudera.com>
Authored: Tue Mar 8 17:47:55 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Tue Mar 8 17:47:55 2016 +0000

----------------------------------------------------------------------
 LICENSE                                         |  1 -
 docs/mllib-data-types.md                        |  2 +-
 mllib/pom.xml                                   |  6 --
 .../mllib/recommendation/JavaALSSuite.java      | 39 +++++-----
 .../regression/JavaRidgeRegressionSuite.java    | 17 +++--
 .../spark/mllib/classification/SVMSuite.scala   |  7 +-
 .../spark/mllib/optimization/NNLSSuite.scala    | 71 +++++++++---------
 .../spark/mllib/recommendation/ALSSuite.scala   | 76 ++++++++++----------
 .../mllib/regression/RidgeRegressionSuite.scala |  8 +--
 pom.xml                                         |  1 -
 10 files changed, 107 insertions(+), 121 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
index 9b78f3b..3c6117f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -237,7 +237,6 @@ The text of each license is also included at 
licenses/LICENSE-[project].txt.
 
      (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - 
https://github.com/fommil/netlib-java/core)
      (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - 
https://github.com/jpmml/jpmml-model)
-     (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - 
http://jblas.org/)
      (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - 
http://www.antlr.org/)
      (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - 
http://www.stringtemplate.org)
      (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - 
http://www.stringtemplate.org)

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/docs/mllib-data-types.md
----------------------------------------------------------------------
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index 363dc7c..5e3ee47 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -11,7 +11,7 @@ MLlib supports local vectors and matrices stored on a single 
machine,
 as well as distributed matrices backed by one or more RDDs.
 Local vectors and local matrices are simple data models 
 that serve as public interfaces. The underlying linear algebra operations are 
provided by
-[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/).
+[Breeze](http://www.scalanlp.org/).
 A training example used in supervised learning is called a "labeled point" in 
MLlib.
 
 ## Local vector

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/pom.xml
----------------------------------------------------------------------
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 816f3f6..428176d 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -63,12 +63,6 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.jblas</groupId>
-      <artifactId>jblas</artifactId>
-      <version>${jblas.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
       <groupId>org.scalanlp</groupId>
       <artifactId>breeze_${scala.binary.version}</artifactId>
       <version>0.11.2</version>

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java 
b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
index a6631ed..d0bf7f5 100644
--- 
a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
+++ 
b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -24,7 +24,6 @@ import java.util.List;
 import scala.Tuple2;
 import scala.Tuple3;
 
-import org.jblas.DoubleMatrix;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -48,14 +47,14 @@ public class JavaALSSuite implements Serializable {
     sc = null;
   }
 
-  void validatePrediction(
+  private void validatePrediction(
       MatrixFactorizationModel model,
       int users,
       int products,
-      DoubleMatrix trueRatings,
+      double[] trueRatings,
       double matchThreshold,
       boolean implicitPrefs,
-      DoubleMatrix truePrefs) {
+      double[] truePrefs) {
     List<Tuple2<Integer, Integer>> localUsersProducts = new ArrayList<>(users 
* products);
     for (int u=0; u < users; ++u) {
       for (int p=0; p < products; ++p) {
@@ -68,7 +67,7 @@ public class JavaALSSuite implements Serializable {
     if (!implicitPrefs) {
       for (Rating r: predictedRatings) {
         double prediction = r.rating();
-        double correct = trueRatings.get(r.user(), r.product());
+        double correct = trueRatings[r.product() * users + r.user()];
         Assert.assertTrue(String.format("Prediction=%2.4f not below match 
threshold of %2.2f",
           prediction, matchThreshold), Math.abs(prediction - correct) < 
matchThreshold);
       }
@@ -79,9 +78,9 @@ public class JavaALSSuite implements Serializable {
       double denom = 0.0;
       for (Rating r: predictedRatings) {
         double prediction = r.rating();
-        double truePref = truePrefs.get(r.user(), r.product());
+        double truePref = truePrefs[r.product() * users + r.user()];
         double confidence = 1.0 +
-          /* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product()));
+          /* alpha = 1.0 * ... */ Math.abs(trueRatings[r.product() * users + 
r.user()]);
         double err = confidence * (truePref - prediction) * (truePref - 
prediction);
         sqErr += err;
         denom += confidence;
@@ -98,8 +97,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 50;
     int products = 100;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = 
ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, false, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, 
false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.train(data.rdd(), features, 
iterations);
@@ -112,8 +111,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 100;
     int products = 200;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = 
ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, false, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, false, 
false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
 
@@ -129,8 +128,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 80;
     int products = 160;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = 
ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, 
false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, 
iterations);
@@ -143,8 +142,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 100;
     int products = 200;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = 
ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, 
false);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
 
@@ -161,8 +160,8 @@ public class JavaALSSuite implements Serializable {
     int iterations = 15;
     int users = 80;
     int products = 160;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = 
ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, true);
+    Tuple3<List<Rating>, double[], double[]> testData =
+        ALSSuite.generateRatingsAsJava(users, products, features, 0.7, true, 
true);
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = new ALS().setRank(features)
@@ -179,9 +178,9 @@ public class JavaALSSuite implements Serializable {
     int iterations = 10;
     int users = 200;
     int products = 50;
-    Tuple3<List<Rating>, DoubleMatrix, DoubleMatrix> testData = 
ALSSuite.generateRatingsAsJavaList(
-        users, products, features, 0.7, true, false);
-    JavaRDD<Rating> data = sc.parallelize(testData._1());
+    List<Rating> testData = ALSSuite.generateRatingsAsJava(
+        users, products, features, 0.7, true, false)._1();
+    JavaRDD<Rating> data = sc.parallelize(testData);
     MatrixFactorizationModel model = new ALS().setRank(features)
       .setIterations(iterations)
       .setImplicitPrefs(true)

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
 
b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
index 7266eec..c56db70 100644
--- 
a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
+++ 
b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -19,14 +19,13 @@ package org.apache.spark.mllib.regression;
 
 import java.io.Serializable;
 import java.util.List;
+import java.util.Random;
 
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
-import org.jblas.DoubleMatrix;
-
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.mllib.util.LinearDataGenerator;
@@ -45,7 +44,8 @@ public class JavaRidgeRegressionSuite implements Serializable 
{
       sc = null;
   }
 
-  double predictionError(List<LabeledPoint> validationData, 
RidgeRegressionModel model) {
+  private static double predictionError(List<LabeledPoint> validationData,
+                                        RidgeRegressionModel model) {
     double errorSum = 0;
     for (LabeledPoint point: validationData) {
       Double prediction = model.predict(point.features());
@@ -54,11 +54,14 @@ public class JavaRidgeRegressionSuite implements 
Serializable {
     return errorSum / validationData.size();
   }
 
-  List<LabeledPoint> generateRidgeData(int numPoints, int numFeatures, double 
std) {
-    org.jblas.util.Random.seed(42);
+  private static List<LabeledPoint> generateRidgeData(int numPoints, int 
numFeatures, double std) {
     // Pick weights as random values distributed uniformly in [-0.5, 0.5]
-    DoubleMatrix w = DoubleMatrix.rand(numFeatures, 1).subi(0.5);
-    return LinearDataGenerator.generateLinearInputAsList(0.0, w.data, 
numPoints, 42, std);
+    Random random = new Random(42);
+    double[] w = new double[numFeatures];
+    for (int i = 0; i < w.length; i++) {
+      w[i] = random.nextDouble() - 0.5;
+    }
+    return LinearDataGenerator.generateLinearInputAsList(0.0, w, numPoints, 
42, std);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 1a47344..3676d9c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.classification
 import scala.collection.JavaConverters._
 import scala.util.Random
 
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseVector => BDV}
 
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.mllib.linalg.Vectors
@@ -45,12 +45,11 @@ object SVMSuite {
     nPoints: Int,
     seed: Int): Seq[LabeledPoint] = {
     val rnd = new Random(seed)
-    val weightsMat = new DoubleMatrix(1, weights.length, weights: _*)
+    val weightsMat = new BDV(weights)
     val x = Array.fill[Array[Double]](nPoints)(
         Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
     val y = x.map { xi =>
-      val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
-        intercept + 0.01 * rnd.nextGaussian()
+      val yD = new BDV(xi).dot(weightsMat) + intercept + 0.01 * 
rnd.nextGaussian()
       if (yD < 0) 0.0 else 1.0
     }
     y.zip(x).map(p => LabeledPoint(p._1, Vectors.dense(p._2)))

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
index d8f9b8c..4ec3dc0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
@@ -19,28 +19,22 @@ package org.apache.spark.mllib.optimization
 
 import scala.util.Random
 
-import org.jblas.{DoubleMatrix, SimpleBlas}
+import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.TestingUtils._
 
 class NNLSSuite extends SparkFunSuite {
   /** Generate an NNLS problem whose optimal solution is the all-ones vector. 
*/
-  def genOnesData(n: Int, rand: Random): (DoubleMatrix, DoubleMatrix) = {
-    val A = new DoubleMatrix(n, n, Array.fill(n*n)(rand.nextDouble()): _*)
-    val b = A.mmul(DoubleMatrix.ones(n, 1))
-
-    val ata = A.transpose.mmul(A)
-    val atb = A.transpose.mmul(b)
-
-    (ata, atb)
+  def genOnesData(n: Int, rand: Random): (BDM[Double], BDV[Double]) = {
+    val A = new BDM(n, n, Array.fill(n*n)(rand.nextDouble()))
+    val b = A * new BDV(Array.fill(n)(1.0))
+    (A.t * A, A.t * b)
   }
 
   /** Compute the objective value */
-  def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: 
DoubleMatrix): Double = {
-    val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x))
-    res.get(0)
-  }
+  def computeObjectiveValue(ata: BDM[Double], atb: BDV[Double], x: 
BDV[Double]): Double =
+    (x.t * ata * x) / 2.0 - atb.dot(x)
 
   test("NNLS: exact solution cases") {
     val n = 20
@@ -54,12 +48,15 @@ class NNLSSuite extends SparkFunSuite {
 
     for (k <- 0 until 100) {
       val (ata, atb) = genOnesData(n, rand)
-      val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+      val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
       assert(x.length === n)
-      val answer = DoubleMatrix.ones(n, 1)
-      SimpleBlas.axpy(-1.0, answer, x)
-      val solved = (x.norm2 < 1e-2) && (x.normmax < 1e-3)
-      if (solved) numSolved = numSolved + 1
+      val answer = new BDV(Array.fill(n)(1.0))
+      val solved =
+        (breeze.linalg.norm(x - answer) < 0.01) &&    // L2 norm
+        ((x - answer).toArray.map(_.abs).max < 0.001) // inf norm
+      if (solved) {
+        numSolved += 1
+      }
     }
 
     assert(numSolved > 50)
@@ -67,20 +64,18 @@ class NNLSSuite extends SparkFunSuite {
 
   test("NNLS: nonnegativity constraint active") {
     val n = 5
-    // scalastyle:off
-    val ata = new DoubleMatrix(Array(
-      Array( 4.377, -3.531, -1.306, -0.139,  3.418),
-      Array(-3.531,  4.344,  0.934,  0.305, -2.140),
-      Array(-1.306,  0.934,  2.644, -0.203, -0.170),
-      Array(-0.139,  0.305, -0.203,  5.883,  1.428),
-      Array( 3.418, -2.140, -0.170,  1.428,  4.684)))
-    // scalastyle:on
-    val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636))
+    val ata = Array(
+       4.377, -3.531, -1.306, -0.139, 3.418,
+      -3.531, 4.344, 0.934, 0.305, -2.140,
+      -1.306, 0.934, 2.644, -0.203, -0.170,
+      -0.139, 0.305, -0.203, 5.883, 1.428,
+       3.418, -2.140, -0.170, 1.428, 4.684)
+    val atb = Array(-1.632, 2.115, 1.094, -1.025, -0.636)
 
     val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628)
 
     val ws = NNLS.createWorkspace(n)
-    val x = NNLS.solve(ata.data, atb.data, ws)
+    val x = NNLS.solve(ata, atb, ws)
     for (i <- 0 until n) {
       assert(x(i) ~== goodx(i) absTol 1E-3)
       assert(x(i) >= 0)
@@ -89,23 +84,21 @@ class NNLSSuite extends SparkFunSuite {
 
   test("NNLS: objective value test") {
     val n = 5
-    val ata = new DoubleMatrix(5, 5
-      , 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283
-      , 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884
-      , -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049
-      , 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819
-      , -798452.29283, -405290.60884, 247059.51049, -253747.03819, 
1310939.40814
-    )
-    val atb = new DoubleMatrix(5, 1,
-      -31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)
+    val ata = new BDM(5, 5, Array(
+      517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283,
+      242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884,
+      -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049,
+      130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819,
+      -798452.29283, -405290.60884, 247059.51049, -253747.03819, 
1310939.40814))
+    val atb = new BDV(Array(-31755.05710, 13047.14813, -20191.24443, 
25993.77580, 11963.55017))
 
     /** reference solution obtained from matlab function quadprog */
-    val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 
58.79627))
+    val refx = new BDV(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
     val refObj = computeObjectiveValue(ata, atb, refx)
 
 
     val ws = NNLS.createWorkspace(n)
-    val x = new DoubleMatrix(NNLS.solve(ata.data, atb.data, ws))
+    val x = new BDV(NNLS.solve(ata.data, atb.data, ws))
     val obj = computeObjectiveValue(ata, atb, x)
 
     assert(obj < refObj + 1E-5)

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 045135f..d9dc557 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import scala.math.abs
 import scala.util.Random
 
-import org.jblas.DoubleMatrix
+import breeze.linalg.{DenseMatrix => BDM}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -29,16 +29,16 @@ import org.apache.spark.storage.StorageLevel
 
 object ALSSuite {
 
-  def generateRatingsAsJavaList(
+  def generateRatingsAsJava(
       users: Int,
       products: Int,
       features: Int,
       samplingRate: Double,
       implicitPrefs: Boolean,
-      negativeWeights: Boolean): (java.util.List[Rating], DoubleMatrix, 
DoubleMatrix) = {
+      negativeWeights: Boolean): (java.util.List[Rating], Array[Double], 
Array[Double]) = {
     val (sampledRatings, trueRatings, truePrefs) =
-      generateRatings(users, products, features, samplingRate, implicitPrefs)
-    (sampledRatings.asJava, trueRatings, truePrefs)
+      generateRatings(users, products, features, samplingRate, implicitPrefs, 
negativeWeights)
+    (sampledRatings.asJava, trueRatings.toArray, if (truePrefs == null) null 
else truePrefs.toArray)
   }
 
   def generateRatings(
@@ -48,35 +48,36 @@ object ALSSuite {
       samplingRate: Double,
       implicitPrefs: Boolean = false,
       negativeWeights: Boolean = false,
-      negativeFactors: Boolean = true): (Seq[Rating], DoubleMatrix, 
DoubleMatrix) = {
+      negativeFactors: Boolean = true): (Seq[Rating], BDM[Double], 
BDM[Double]) = {
     val rand = new Random(42)
 
     // Create a random matrix with uniform values from -1 to 1
     def randomMatrix(m: Int, n: Int) = {
       if (negativeFactors) {
-        new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1): 
_*)
+        new BDM(m, n, Array.fill(m * n)(rand.nextDouble() * 2 - 1))
       } else {
-        new DoubleMatrix(m, n, Array.fill(m * n)(rand.nextDouble()): _*)
+        new BDM(m, n, Array.fill(m * n)(rand.nextDouble()))
       }
     }
 
     val userMatrix = randomMatrix(users, features)
     val productMatrix = randomMatrix(features, products)
-    val (trueRatings, truePrefs) = implicitPrefs match {
-      case true =>
+    val (trueRatings, truePrefs) =
+      if (implicitPrefs) {
         // Generate raw values from [0,9], or if negativeWeights, from [-2,7]
-        val raw = new DoubleMatrix(users, products,
+        val raw = new BDM(users, products,
           Array.fill(users * products)(
-            (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble): _*)
+            (if (negativeWeights) -2 else 0) + rand.nextInt(10).toDouble))
         val prefs =
-          new DoubleMatrix(users, products, raw.data.map(v => if (v > 0) 1.0 
else 0.0): _*)
+          new BDM(users, products, raw.data.map(v => if (v > 0) 1.0 else 0.0))
         (raw, prefs)
-      case false => (userMatrix.mmul(productMatrix), null)
-    }
+      } else {
+        (userMatrix * productMatrix, null)
+      }
 
     val sampledRatings = {
       for (u <- 0 until users; p <- 0 until products if rand.nextDouble() < 
samplingRate)
-        yield Rating(u, p, trueRatings.get(u, p))
+        yield Rating(u, p, trueRatings(u, p))
     }
 
     (sampledRatings, trueRatings, truePrefs)
@@ -149,8 +150,8 @@ class ALSSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setSeed(1)
       .setFinalRDDStorageLevel(storageLevel)
       .run(ratings)
-    assert(model.productFeatures.getStorageLevel == storageLevel);
-    assert(model.userFeatures.getStorageLevel == storageLevel);
+    assert(model.productFeatures.getStorageLevel == storageLevel)
+    assert(model.userFeatures.getStorageLevel == storageLevel)
     storageLevel = StorageLevel.DISK_ONLY
     model = new ALS()
       .setRank(5)
@@ -160,8 +161,8 @@ class ALSSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setSeed(1)
       .setFinalRDDStorageLevel(storageLevel)
       .run(ratings)
-    assert(model.productFeatures.getStorageLevel == storageLevel);
-    assert(model.userFeatures.getStorageLevel == storageLevel);
+    assert(model.productFeatures.getStorageLevel == storageLevel)
+    assert(model.userFeatures.getStorageLevel == storageLevel)
   }
 
   test("negative ids") {
@@ -178,7 +179,7 @@ class ALSSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       val u = r.user + 25
       val p = r.product + 25
       val v = r.rating
-      val error = v - correct.get(u, p)
+      val error = v - correct(u, p)
       assert(math.abs(error) < 0.4)
     }
   }
@@ -197,7 +198,7 @@ class ALSSuite extends SparkFunSuite with 
MLlibTestSparkContext {
    * @param samplingRate what fraction of the user-product pairs are known
    * @param matchThreshold max difference allowed to consider a predicted 
rating correct
    * @param implicitPrefs flag to test implicit feedback
-   * @param bulkPredict flag to test bulk predicition
+   * @param bulkPredict flag to test bulk prediction
    * @param negativeWeights whether the generated data can contain negative 
values
    * @param numUserBlocks number of user blocks to partition users into
    * @param numProductBlocks number of product blocks to partition products 
into
@@ -234,30 +235,31 @@ class ALSSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       .setNonnegative(!negativeFactors)
       .run(sc.parallelize(sampledRatings))
 
-    val predictedU = new DoubleMatrix(users, features)
+    val predictedU = new BDM[Double](users, features)
     for ((u, vec) <- model.userFeatures.collect(); i <- 0 until features) {
-      predictedU.put(u, i, vec(i))
+      predictedU(u, i) = vec(i)
     }
-    val predictedP = new DoubleMatrix(products, features)
+    val predictedP = new BDM[Double](products, features)
     for ((p, vec) <- model.productFeatures.collect(); i <- 0 until features) {
-      predictedP.put(p, i, vec(i))
+      predictedP(p, i) = vec(i)
     }
-    val predictedRatings = bulkPredict match {
-      case false => predictedU.mmul(predictedP.transpose)
-      case true =>
-        val allRatings = new DoubleMatrix(users, products)
+    val predictedRatings =
+      if (bulkPredict) {
+        val allRatings = new BDM[Double](users, products)
         val usersProducts = for (u <- 0 until users; p <- 0 until products) 
yield (u, p)
         val userProductsRDD = sc.parallelize(usersProducts)
         model.predict(userProductsRDD).collect().foreach { elem =>
-          allRatings.put(elem.user, elem.product, elem.rating)
+          allRatings(elem.user, elem.product) = elem.rating
         }
         allRatings
-    }
+      } else {
+        predictedU * predictedP.t
+      }
 
     if (!implicitPrefs) {
       for (u <- 0 until users; p <- 0 until products) {
-        val prediction = predictedRatings.get(u, p)
-        val correct = trueRatings.get(u, p)
+        val prediction = predictedRatings(u, p)
+        val correct = trueRatings(u, p)
         if (math.abs(prediction - correct) > matchThreshold) {
           fail(("Model failed to predict (%d, %d): %f vs %f\ncorr: %s\npred: 
%s\nU: %s\n P: %s")
             .format(u, p, correct, prediction, trueRatings, predictedRatings, 
predictedU,
@@ -269,9 +271,9 @@ class ALSSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       var sqErr = 0.0
       var denom = 0.0
       for (u <- 0 until users; p <- 0 until products) {
-        val prediction = predictedRatings.get(u, p)
-        val truePref = truePrefs.get(u, p)
-        val confidence = 1 + 1.0 * abs(trueRatings.get(u, p))
+        val prediction = predictedRatings(u, p)
+        val truePref = truePrefs(u, p)
+        val confidence = 1.0 + abs(trueRatings(u, p))
         val err = confidence * (truePref - prediction) * (truePref - 
prediction)
         sqErr += err
         denom += confidence

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index a200e94..815be32 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.mllib.regression
 
 import scala.util.Random
 
-import org.jblas.DoubleMatrix
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.{LinearDataGenerator, 
LocalClusterSparkContext,
@@ -49,12 +47,12 @@ class RidgeRegressionSuite extends SparkFunSuite with 
MLlibTestSparkContext {
     val numExamples = 50
     val numFeatures = 20
 
-    org.jblas.util.Random.seed(42)
     // Pick weights as random values distributed uniformly in [-0.5, 0.5]
-    val w = DoubleMatrix.rand(numFeatures, 1).subi(0.5)
+    val random = new Random(42)
+    val w = Array.fill(numFeatures)(random.nextDouble() - 0.5)
 
     // Use half of data for training and other half for validation
-    val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2 * 
numExamples, 42, 10.0)
+    val data = LinearDataGenerator.generateLinearInput(3.0, w, 2 * 
numExamples, 42, 10.0)
     val testData = data.take(numExamples)
     val validationData = data.takeRight(numExamples)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/54040f8d/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index dccfd10..90f4672 100644
--- a/pom.xml
+++ b/pom.xml
@@ -144,7 +144,6 @@
     <derby.version>10.10.1.1</derby.version>
     <parquet.version>1.7.0</parquet.version>
     <hive.parquet.version>1.6.0</hive.parquet.version>
-    <jblas.version>1.2.4</jblas.version>
     <jetty.version>8.1.14.v20131031</jetty.version>
     <orbit.version>3.0.0.v201112011016</orbit.version>
     <chill.version>0.7.4</chill.version>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-13715][MLLIB] Remove last usages of jblas in tests

Reply via email to