MAHOUT-1754: Distance and squared distance matrices routines (dlyubimov) MAHOUT-1753: First and second moment routines (dlyubimov)
MAHOUT-1746: mxA ^ 2, mxA ^ 0.5 to mean the same thing as mxA * mxA and mxA ::= sqrt _ (dlyubimov) This closes apache/mahout#145 Squashed commit of the following: commit a6fc57810abfdcf854c2e06a4a8aa87e357901a0 Author: Dmitriy Lyubimov <[email protected]> Date: Wed Jun 24 22:49:20 2015 -0700 formula typo fix. commit 8bd70c043e7486ecf20f26f98094934fb16a51f2 Author: Dmitriy Lyubimov <[email protected]> Date: Wed Jun 24 16:45:44 2015 -0700 Adding comments per public review request commit 9394ac997f014f3e32439cbdd4e40deb9f03d6c5 Author: Dmitriy Lyubimov <[email protected]> Date: Tue Jun 23 16:02:45 2015 -0700 adding `dist` functions commit 7c5576ce1536e8873c08e0e35b6fc032b278ed5d Author: Dmitriy Lyubimov <[email protected]> Date: Tue Jun 23 15:38:28 2015 -0700 un-privatizing some of new functions. commit 526bfd626fbc398886b1b5dec37c6e2939ea7c4a Author: Dmitriy Lyubimov <[email protected]> Date: Tue Jun 23 14:40:32 2015 -0700 MAHOUT-1746: a ^ 2 to mean a * a not pow (a, 2.0) commit 806000a700450b7186f511486ca1ca828225abb3 Author: Dmitriy Lyubimov <[email protected]> Date: Mon Jun 22 18:03:51 2015 -0700 Added distance functions commit 637e050ed3a52b06e2ce1f691c5dfb6a77074a43 Author: Dmitriy Lyubimov <[email protected]> Date: Mon Jun 22 11:56:38 2015 -0700 First port of mu-variance-covariance functions Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/349b94d8 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/349b94d8 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/349b94d8 Branch: refs/heads/mahout-0.10.x Commit: 349b94d887c1fb11fd00318717531f5cd25eab57 Parents: 31ec019 Author: Dmitriy Lyubimov <[email protected]> Authored: Wed Jul 1 16:01:31 2015 -0700 Committer: Dmitriy Lyubimov <[email protected]> Committed: Wed Jul 1 16:02:20 2015 -0700 ---------------------------------------------------------------------- CHANGELOG | 6 + .../apache/mahout/math/drm/RLikeDrmOps.scala | 10 +- .../org/apache/mahout/math/drm/package.scala | 161 ++++++++++++++++++- .../math/scalabindings/RLikeMatrixOps.scala | 11 +- .../math/scalabindings/RLikeVectorOps.scala | 10 +- .../mahout/math/scalabindings/package.scala | 92 ++++++++--- .../mahout/math/drm/DrmLikeOpsSuiteBase.scala | 24 +++ .../mahout/math/scalabindings/MathSuite.scala | 39 ++++- 8 files changed, 321 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index dd65b0e..38c7d17 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,12 @@ Mahout Change Log Release 0.10.2 - unreleased + MAHOUT-1754: Distance and squared distance matrices routines (dlyubimov) + + MAHOUT-1753: First and second moment routines (dlyubimov) + + MAHOUT-1746: mxA ^ 2, mxA ^ 0.5 to mean the same thing as mxA * mxA and mxA ::= sqrt _ (dlyubimov) + MAHOUT-1660: Hadoop1HDFSUtil.readDRMHEader should be taking Hadoop conf (dlyubimov) MAHOUT-1713: Performance and parallelization improvements for AB', A'B, A'A spark physical operators (dlyubimov) http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala index 7927e51..aac7da1 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala @@ -50,7 +50,15 @@ class RLikeDrmOps[K: ClassTag](drm: DrmLike[K]) extends DrmLikeOps[K](drm) { def *:(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = that * _) - def ^(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = math.pow(_, that)) + def ^(that: Double): DrmLike[K] = that match { + // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x ^ 2 and x * x since + // pow(x,2) function return results different from x * x; but much of the code uses this + // interchangeably. Not having this done will create things like NaN entries on main diagonal + // of a distance matrix. + case 2.0 â OpAewUnaryFunc[K](A = this, f = x â x * x) + case 0.5 â OpAewUnaryFunc[K](A = this, f = math.sqrt _) + case _ â OpAewUnaryFunc[K](A = this, f = math.pow(_, that)) + } def /(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = _ / that, evalZeros = that == 0.0) http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala index d865b58..e972dd8 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala @@ -17,7 +17,7 @@ package org.apache.mahout.math -import org.apache.mahout.math.drm.DistributedContext +import org.apache.mahout.math.drm._ import org.apache.mahout.math.indexeddataset.{IndexedDataset, DefaultIndexedDatasetReadSchema, Schema} import org.apache.mahout.math.scalabindings.RLikeOps._ import org.apache.mahout.math.scalabindings._ @@ -160,6 +160,165 @@ package object drm { def dsqrt[K: ClassTag](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.sqrt) def dsignum[K: ClassTag](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.signum) + + /////////////////////////////////////////////////////////// + // Misc. math utilities. + + /** + * Compute column wise means and variances -- distributed version. + * + * @param drmA Note: will pin input to cache if not yet pinned. + * @tparam K + * @return colMeans â colVariances + */ + def dcolMeanVars[K: ClassTag](drmA: DrmLike[K]): (Vector, Vector) = { + + import RLikeDrmOps._ + + val drmAcp = drmA.checkpoint() + + val mu = drmAcp colMeans + + // Compute variance using mean(x^2) - mean(x)^2 + val variances = (drmAcp ^ 2 colMeans) -=: mu * mu + + mu â variances + } + + /** + * Compute column wise means and standard deviations -- distributed version. + * @param drmA note: input will be pinned to cache if not yet pinned + * @return colMeans â colStdevs + */ + def dcolMeanStdevs[K: ClassTag](drmA: DrmLike[K]): (Vector, Vector) = { + val (mu, vars) = dcolMeanVars(drmA) + mu â (vars ::= math.sqrt _) + } + + /** + * Thin column-wise mean and covariance matrix computation. Same as [[dcolMeanCov()]] but suited for + * thin and tall inputs where covariance matrix can be reduced and finalized in driver memory. + * + * @param drmA note: will pin input to cache if not yet pinned. + * @return mean â covariance matrix (in core) + */ + def dcolMeanCovThin[K: ClassTag](drmA: DrmLike[K]):(Vector, Matrix) = { + + import RLikeDrmOps._ + + val drmAcp = drmA.checkpoint() + val mu = drmAcp colMeans + val mxCov = (drmAcp.t %*% drmAcp).collect /= drmAcp.nrow -= (mu cross mu) + mu â mxCov + } + + /** + * Compute COV(X) matrix and mean of row-wise data set. X is presented as row-wise input matrix A. + * + * This is a "wide" procedure, covariance matrix is returned as a DRM. + * + * @param drmA note: will pin input into cache if not yet pinned. + * @return mean â covariance DRM + */ + def dcolMeanCov[K: ClassTag](drmA: DrmLike[K]): (Vector, DrmLike[Int]) = { + + import RLikeDrmOps._ + + implicit val ctx = drmA.context + val drmAcp = drmA.checkpoint() + + val bcastMu = drmBroadcast(drmAcp colMeans) + + // We use multivaraite analogue COV(X)=E(XX')-mu*mu'. In our case E(XX') = (A'A)/A.nrow. + // Compute E(XX') + val drmSigma = (drmAcp.t %*% drmAcp / drmAcp.nrow) + + // Subtract mu*mu'. In this case we assume mu*mu' may still be big enough to be treated by + // driver alone, so we redistribute this operation as well. Hence it may look a bit cryptic. + .mapBlock() { case (keys, block) â + + // Pin mu as vector reference to memory. + val mu:Vector = bcastMu + + keys â (block := { (r, c, v) â v - mu(keys(r)) * mu(c) }) + } + + // return (mu, cov(X) ("bigSigma")). + (bcastMu: Vector) â drmSigma + } + + /** Distributed Squared distance matrix computation. */ + def dsqDist(drmX: DrmLike[Int]): DrmLike[Int] = { + + // This is a specific case of pairwise distances of X and Y. + + import RLikeDrmOps._ + + // Context needed + implicit val ctx = drmX.context + + // Pin to cache if hasn't been pinned yet + val drmXcp = drmX.checkpoint() + + // Compute column sum of squares + val s = drmXcp ^ 2 rowSums + + val sBcast = drmBroadcast(s) + + (drmXcp %*% drmXcp.t) + + // Apply second part of the formula as per in-core algorithm + .mapBlock() { case (keys, block) â + + // Slurp broadcast to memory + val s = sBcast: Vector + + // Update in-place + block := { (r, c, x) â s(keys(r)) + s(c) - 2 * x} + + keys â block + } + } + + + /** + * Compute fold-in distances (distributed version). Here, we use pretty much the same math as with + * squared distances. + * + * D_sq = s*1' + 1*t' - 2*X*Y' + * + * where s is row sums of hadamard product(X, X), and, similarly, + * s is row sums of Hadamard product(Y, Y). + * + * @param drmX m x d row-wise dataset. Pinned to cache if not yet pinned. + * @param drmY n x d row-wise dataset. Pinned to cache if not yet pinned. + * @return m x d pairwise squared distance matrix (between rows of X and Y) + */ + def dsqDist(drmX: DrmLike[Int], drmY: DrmLike[Int]): DrmLike[Int] = { + + import RLikeDrmOps._ + + implicit val ctx = drmX.context + + val drmXcp = drmX.checkpoint() + val drmYcp = drmY.checkpoint() + + val sBcast = drmBroadcast(drmXcp ^ 2 rowSums) + val tBcast = drmBroadcast(drmYcp ^ 2 rowSums) + + (drmX %*% drmY.t) + + // Apply the rest of the formula + .mapBlock() { case (keys, block) => + + // Cache broadcast representations in local task variable + val s = sBcast: Vector + val t = tBcast: Vector + + block := { (r, c, x) => s(keys(r)) + t(c) - 2 * x} + keys â block + } + } } http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala index 7091c53..e994e31 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala @@ -108,8 +108,15 @@ class RLikeMatrixOps(m: Matrix) extends MatrixOps(m) { } def ^=(that: Double) = { - m ::= { x â math.pow(x, that) } - m + that match { + // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x ^ 2 and x * x since + // pow(x,2) function return results different from x * x; but much of the code uses this + // interchangeably. Not having this done will create things like NaN entries on main diagonal + // of a distance matrix. + case 2.0 â m ::= { x â x * x } + case 0.5 â m ::= math.sqrt _ + case _ â m ::= { x â math.pow(x, that) } + } } def ^(that: Double) = m.cloned ^= that http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala index 38a55d6..bf1bb30 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala @@ -67,7 +67,15 @@ class RLikeVectorOps(_v: Vector) extends VectorOps(_v) { /** Elementwise right-associative / */ def /:(that: Vector) = that.cloned /= v - def ^=(that: Double) = v.assign(Functions.POW, that) + def ^=(that: Double) = that match { + // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x ^ 2 and x * x since + // pow(x,2) function return results different from x * x; but much of the code uses this + // interchangeably. Not having this done will create things like NaN entries on main diagonal + // of a distance matrix. + case 2.0 â v.assign(Functions.SQUARE) + case 0.5 â v.assign(Functions.SQRT) + case _ â v.assign (Functions.POW, that) + } def ^=(that: Vector) = v.assign(that, Functions.POW) http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala index 20dc9cd..7ff09bf 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala @@ -125,34 +125,34 @@ package object scalabindings { */ def dense[R](rows: R*): DenseMatrix = { import RLikeOps._ - val data = for (r <- rows) yield { + val data = for (r â rows) yield { r match { - case n: Number => Array(n.doubleValue()) - case t: Vector => Array.tabulate(t.length)(t(_)) - case t: Array[Double] => t - case t: Iterable[_] => + case n: Number â Array(n.doubleValue()) + case t: Vector â Array.tabulate(t.length)(t(_)) + case t: Array[Double] â t + case t: Iterable[_] â t.head match { - case ss: Double => t.asInstanceOf[Iterable[Double]].toArray - case vv: Vector => + case ss: Double â t.asInstanceOf[Iterable[Double]].toArray + case vv: Vector â val m = new DenseMatrix(t.size, t.head.asInstanceOf[Vector].length) t.asInstanceOf[Iterable[Vector]].view.zipWithIndex.foreach { - case (v, idx) => m(idx, ::) := v + case (v, idx) â m(idx, ::) := v } return m } - case t: Product => t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray - case t: Array[Array[Double]] => if (rows.size == 1) + case t: Product â t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray + case t: Array[Array[Double]] â if (rows.size == 1) return new DenseMatrix(t) else throw new IllegalArgumentException( "double[][] data parameter can be the only argument for dense()") - case t: Array[Vector] => + case t: Array[Vector] â val m = new DenseMatrix(t.size, t.head.length) t.view.zipWithIndex.foreach { - case (v, idx) => m(idx, ::) := v + case (v, idx) â m(idx, ::) := v } return m - case _ => throw new IllegalArgumentException("unsupported type in the inline Matrix initializer") + case _ â throw new IllegalArgumentException("unsupported type in the inline Matrix initializer") } } new DenseMatrix(data.toArray) @@ -179,7 +179,7 @@ package object scalabindings { val nrow = rows.size val ncol = rows.map(_.size()).max val m = new SparseRowMatrix(nrow, ncol) - m := rows.map { row => + m := rows.map { row â if (row.length < ncol) { val newRow = row.like(ncol) newRow(0 until row.length) := row @@ -200,7 +200,7 @@ package object scalabindings { val cardinality = if (sdata.size > 0) sdata.map(_._1).max + 1 else 0 val initialCapacity = sdata.size val sv = new RandomAccessSparseVector(cardinality, initialCapacity) - sdata.foreach(t => sv.setQuick(t._1, t._2.asInstanceOf[Number].doubleValue())) + sdata.foreach(t â sv.setQuick(t._1, t._2.asInstanceOf[Number].doubleValue())) sv } @@ -337,12 +337,64 @@ package object scalabindings { /** Matrix-matrix unary func */ - type MMUnaryFunc = (Matrix, Option[Matrix]) => Matrix + type MMUnaryFunc = (Matrix, Option[Matrix]) â Matrix /** Binary matrix-matrix operations which may save result in-place, optionally */ - type MMBinaryFunc = (Matrix, Matrix, Option[Matrix]) => Matrix - type MVBinaryFunc = (Matrix, Vector, Option[Matrix]) => Matrix - type VMBinaryFunc = (Vector, Matrix, Option[Matrix]) => Matrix - type MDBinaryFunc = (Matrix, Double, Option[Matrix]) => Matrix + type MMBinaryFunc = (Matrix, Matrix, Option[Matrix]) â Matrix + type MVBinaryFunc = (Matrix, Vector, Option[Matrix]) â Matrix + type VMBinaryFunc = (Vector, Matrix, Option[Matrix]) â Matrix + type MDBinaryFunc = (Matrix, Double, Option[Matrix]) â Matrix + ///////////////////////////////////// + // Miscellaneous in-core utilities + + /** + * Compute column-wise means and variances. + * + * @return colMeans â colVariances + */ + def colMeanVars(mxA:Matrix): (Vector, Vector) = { + val mu = mxA.colMeans() + val variance = (mxA * mxA colMeans) -= mu ^ 2 + mu â variance + } + + /** + * Compute column-wise means and stdevs. + * @param mxA input + * @return colMeans â colStdevs + */ + def colMeanStdevs(mxA:Matrix) = { + val (mu, variance) = colMeanVars(mxA) + mu â (variance ::= math.sqrt _) + } + + /** Compute square distance matrix. We assume data points are row-wise, similar to R's dist(). */ + def sqDist(mxX: Matrix): Matrix = { + + val s = mxX ^ 2 rowSums + + (mxX %*% mxX.t) := { (r, c, x) â s(r) + s(c) - 2 * x} + } + + /** + * Pairwise squared distance computation. + * @param mxX X, m x d + * @param mxY Y, n x d + * @return pairwise squaired distances of row-wise data points in X and Y (m x n) + */ + def sqDist(mxX: Matrix, mxY: Matrix): Matrix = { + + val s = mxX ^ 2 rowSums + + val t = mxY ^ 2 rowSums + + // D = s*1' + 1*t' - 2XY' + (mxX %*% mxY.t) := { (r, c, d) â s(r) + t(c) - 2.0 * d} + } + + def dist(mxX: Matrix): Matrix = sqDist(mxX) := sqrt _ + + def dist(mxX: Matrix, mxY: Matrix): Matrix = sqDist(mxX, mxY) := sqrt _ + } http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala b/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala index bb42121..fdfb3f9 100644 --- a/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala +++ b/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala @@ -110,4 +110,28 @@ trait DrmLikeOpsSuiteBase extends DistributedMahoutSuite with Matchers { } + test("dsqDist(X,Y)") { + val m = 100 + val n = 300 + val d = 7 + val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5 + val mxY = Matrices.symmetricUniformView(n, d, 1234).cloned += 10 + val (drmX, drmY) = (drmParallelize(mxX, 3), drmParallelize(mxY, 4)) + + val mxDsq = dsqDist(drmX, drmY).collect + val mxDsqControl = new DenseMatrix(m, n) := { (r, c, _) â (mxX(r, ::) - mxY(c, ::)) ^= 2 sum } + (mxDsq - mxDsqControl).norm should be < 1e-7 + } + + test("dsqDist(X)") { + val m = 100 + val d = 7 + val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5 + val drmX = drmParallelize(mxX, 3) + + val mxDsq = dsqDist(drmX).collect + val mxDsqControl = sqDist(drmX) + (mxDsq - mxDsqControl).norm should be < 1e-7 + } + } http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala b/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala index b10cde3..bcfe109 100644 --- a/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala +++ b/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala @@ -17,6 +17,7 @@ package org.apache.mahout.math.scalabindings +import org.apache.mahout.logging._ import org.scalatest.{Matchers, FunSuite} import org.apache.mahout.math._ import scala.math._ @@ -28,6 +29,8 @@ import org.apache.mahout.common.RandomUtils class MathSuite extends FunSuite with MahoutSuite { + private final implicit val log = getLog(classOf[MathSuite]) + test("chol") { // try to solve Ax=b with cholesky: @@ -41,26 +44,26 @@ class MathSuite extends FunSuite with MahoutSuite { // make sure it is symmetric for a valid solution a := a.t %*% a - printf("A= \n%s\n", a) + trace(s"A= \n$a") val b = dense((9, 8, 7)).t - printf("b = \n%s\n", b) + trace(s"b = \n$b") - // fails if chol(a,true) + // Fails if chol(a, true) val ch = chol(a) - printf("L = \n%s\n", ch.getL) + trace(s"L = \n${ch.getL}") - printf("(L^-1)b =\n%s\n", ch.solveLeft(b)) + trace(s"(L^-1)b =\n${ch.solveLeft(b)}\n") val x = ch.solveRight(eye(3)) %*% ch.solveLeft(b) - printf("x = \n%s\n", x.toString) + trace(s"x = \n$x") val axmb = (a %*% x) - b - printf("AX - B = \n%s\n", axmb.toString) + trace(s"AX - B = \n$axmb") axmb.norm should be < 1e-10 @@ -211,4 +214,26 @@ class MathSuite extends FunSuite with MahoutSuite { } + test("sqDist(X,Y)") { + val m = 100 + val n = 300 + val d = 7 + val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5 + val mxY = Matrices.symmetricUniformView(n, d, 1234).cloned += 10 + + val mxDsq = sqDist(mxX, mxY) + val mxDsqControl = new DenseMatrix(m, n) := { (r, c, _) â (mxX(r, ::) - mxY(c, ::)) ^= 2 sum } + (mxDsq - mxDsqControl).norm should be < 1e-7 + } + + test("sqDist(X)") { + val m = 100 + val d = 7 + val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5 + + val mxDsq = sqDist(mxX) + val mxDsqControl = sqDist(mxX, mxX) + (mxDsq - mxDsqControl).norm should be < 1e-7 + } + }
