[23/37] mahout git commit: MAHOUT-1754: Distance and squared distance matrices routines (dlyubimov)

smarthi Fri, 31 Jul 2015 20:27:18 -0700

  MAHOUT-1754: Distance and squared distance matrices routines (dlyubimov)

  MAHOUT-1753: First and second moment routines (dlyubimov)


  MAHOUT-1746: mxA ^ 2, mxA ^ 0.5 to mean the same thing as mxA * mxA and mxA 
::= sqrt _ (dlyubimov)

This closes apache/mahout#145

Squashed commit of the following:

commit a6fc57810abfdcf854c2e06a4a8aa87e357901a0
Author: Dmitriy Lyubimov <[email protected]>
Date:   Wed Jun 24 22:49:20 2015 -0700

    formula typo fix.

commit 8bd70c043e7486ecf20f26f98094934fb16a51f2
Author: Dmitriy Lyubimov <[email protected]>
Date:   Wed Jun 24 16:45:44 2015 -0700

    Adding comments per public review request

commit 9394ac997f014f3e32439cbdd4e40deb9f03d6c5
Author: Dmitriy Lyubimov <[email protected]>
Date:   Tue Jun 23 16:02:45 2015 -0700

    adding `dist` functions

commit 7c5576ce1536e8873c08e0e35b6fc032b278ed5d
Author: Dmitriy Lyubimov <[email protected]>
Date:   Tue Jun 23 15:38:28 2015 -0700

    un-privatizing some of new functions.

commit 526bfd626fbc398886b1b5dec37c6e2939ea7c4a
Author: Dmitriy Lyubimov <[email protected]>
Date:   Tue Jun 23 14:40:32 2015 -0700

    MAHOUT-1746: a ^ 2 to mean a * a not pow (a, 2.0)

commit 806000a700450b7186f511486ca1ca828225abb3
Author: Dmitriy Lyubimov <[email protected]>
Date:   Mon Jun 22 18:03:51 2015 -0700

    Added distance functions

commit 637e050ed3a52b06e2ce1f691c5dfb6a77074a43
Author: Dmitriy Lyubimov <[email protected]>
Date:   Mon Jun 22 11:56:38 2015 -0700

    First port of mu-variance-covariance functions


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/349b94d8
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/349b94d8
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/349b94d8

Branch: refs/heads/mahout-0.10.x
Commit: 349b94d887c1fb11fd00318717531f5cd25eab57
Parents: 31ec019
Author: Dmitriy Lyubimov <[email protected]>
Authored: Wed Jul 1 16:01:31 2015 -0700
Committer: Dmitriy Lyubimov <[email protected]>
Committed: Wed Jul 1 16:02:20 2015 -0700

----------------------------------------------------------------------
 CHANGELOG                                       |   6 +
 .../apache/mahout/math/drm/RLikeDrmOps.scala    |  10 +-
 .../org/apache/mahout/math/drm/package.scala    | 161 ++++++++++++++++++-
 .../math/scalabindings/RLikeMatrixOps.scala     |  11 +-
 .../math/scalabindings/RLikeVectorOps.scala     |  10 +-
 .../mahout/math/scalabindings/package.scala     |  92 ++++++++---
 .../mahout/math/drm/DrmLikeOpsSuiteBase.scala   |  24 +++
 .../mahout/math/scalabindings/MathSuite.scala   |  39 ++++-
 8 files changed, 321 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index dd65b0e..38c7d17 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,12 @@ Mahout Change Log
 
 Release 0.10.2 - unreleased
 
+  MAHOUT-1754: Distance and squared distance matrices routines (dlyubimov)
+
+  MAHOUT-1753: First and second moment routines (dlyubimov)
+
+  MAHOUT-1746: mxA ^ 2, mxA ^ 0.5 to mean the same thing as mxA * mxA and mxA 
::= sqrt _ (dlyubimov)
+
   MAHOUT-1660: Hadoop1HDFSUtil.readDRMHEader should be taking Hadoop conf 
(dlyubimov)
 
   MAHOUT-1713: Performance and parallelization improvements for AB', A'B, A'A 
spark physical operators (dlyubimov)

http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala 
b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
index 7927e51..aac7da1 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/RLikeDrmOps.scala
@@ -50,7 +50,15 @@ class RLikeDrmOps[K: ClassTag](drm: DrmLike[K]) extends 
DrmLikeOps[K](drm) {
 
   def *:(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = that * _)
 
-  def ^(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = 
math.pow(_, that))
+  def ^(that: Double): DrmLike[K] = that match {
+    // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x 
^ 2 and x * x since
+    // pow(x,2) function return results different from x * x; but much of the 
code uses this
+    // interchangeably. Not having this done will create things like NaN 
entries on main diagonal
+    // of a distance matrix.
+    case 2.0 â OpAewUnaryFunc[K](A = this, f = x â x * x)
+    case 0.5 â OpAewUnaryFunc[K](A = this, f = math.sqrt _)
+    case _ â OpAewUnaryFunc[K](A = this, f = math.pow(_, that))
+  }
 
   def /(that: Double): DrmLike[K] = OpAewUnaryFunc[K](A = this, f = _ / that, 
evalZeros = that == 0.0)
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala 
b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
index d865b58..e972dd8 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
@@ -17,7 +17,7 @@
 
 package org.apache.mahout.math
 
-import org.apache.mahout.math.drm.DistributedContext
+import org.apache.mahout.math.drm._
 import org.apache.mahout.math.indexeddataset.{IndexedDataset, 
DefaultIndexedDatasetReadSchema, Schema}
 import org.apache.mahout.math.scalabindings.RLikeOps._
 import org.apache.mahout.math.scalabindings._
@@ -160,6 +160,165 @@ package object drm {
   def dsqrt[K: ClassTag](drmA: DrmLike[K]): DrmLike[K] = new 
OpAewUnaryFunc[K](drmA, math.sqrt)
 
   def dsignum[K: ClassTag](drmA: DrmLike[K]): DrmLike[K] = new 
OpAewUnaryFunc[K](drmA, math.signum)
+  
+  ///////////////////////////////////////////////////////////
+  // Misc. math utilities.
+
+  /**
+   * Compute column wise means and variances -- distributed version.
+   *
+   * @param drmA Note: will pin input to cache if not yet pinned.
+   * @tparam K
+   * @return colMeans â colVariances
+   */
+  def dcolMeanVars[K: ClassTag](drmA: DrmLike[K]): (Vector, Vector) = {
+
+    import RLikeDrmOps._
+
+    val drmAcp = drmA.checkpoint()
+    
+    val mu = drmAcp colMeans
+
+    // Compute variance using mean(x^2) - mean(x)^2
+    val variances = (drmAcp ^ 2 colMeans) -=: mu * mu
+
+    mu â variances
+  }
+
+  /**
+   * Compute column wise means and standard deviations -- distributed version.
+   * @param drmA note: input will be pinned to cache if not yet pinned
+   * @return colMeans â colStdevs
+   */
+  def dcolMeanStdevs[K: ClassTag](drmA: DrmLike[K]): (Vector, Vector) = {
+    val (mu, vars) = dcolMeanVars(drmA)
+    mu â (vars ::= math.sqrt _)
+  }
+
+  /**
+   * Thin column-wise mean and covariance matrix computation. Same as 
[[dcolMeanCov()]] but suited for
+   * thin and tall inputs where covariance matrix can be reduced and finalized 
in driver memory.
+   * 
+   * @param drmA note: will pin input to cache if not yet pinned.
+   * @return mean â covariance matrix (in core)
+   */
+  def dcolMeanCovThin[K: ClassTag](drmA: DrmLike[K]):(Vector, Matrix) = {
+
+    import RLikeDrmOps._
+
+    val drmAcp = drmA.checkpoint()
+    val mu = drmAcp colMeans
+    val mxCov = (drmAcp.t %*% drmAcp).collect /= drmAcp.nrow -= (mu cross mu)
+    mu â mxCov
+  }
+
+  /**
+   * Compute COV(X) matrix and mean of row-wise data set. X is presented as 
row-wise input matrix A.
+   *
+   * This is a "wide" procedure, covariance matrix is returned as a DRM.
+   *
+   * @param drmA note: will pin input into cache if not yet pinned.
+   * @return mean â covariance DRM
+   */
+  def dcolMeanCov[K: ClassTag](drmA: DrmLike[K]): (Vector, DrmLike[Int]) = {
+
+    import RLikeDrmOps._
+
+    implicit val ctx = drmA.context
+    val drmAcp = drmA.checkpoint()
+
+    val bcastMu = drmBroadcast(drmAcp colMeans)
+
+    // We use multivaraite analogue COV(X)=E(XX')-mu*mu'. In our case E(XX') = 
(A'A)/A.nrow.
+    // Compute E(XX')
+    val drmSigma = (drmAcp.t %*% drmAcp / drmAcp.nrow)
+
+      // Subtract mu*mu'. In this case we assume mu*mu' may still be big 
enough to be treated by
+      // driver alone, so we redistribute this operation as well. Hence it may 
look a bit cryptic.
+      .mapBlock() { case (keys, block) â
+
+      // Pin mu as vector reference to memory.
+      val mu:Vector = bcastMu
+
+      keys â (block := { (r, c, v) â v - mu(keys(r)) * mu(c) })
+    }
+
+    // return (mu, cov(X) ("bigSigma")).
+    (bcastMu: Vector) â drmSigma
+  }
+
+  /** Distributed Squared distance matrix computation. */
+  def dsqDist(drmX: DrmLike[Int]): DrmLike[Int] = {
+
+    // This is a specific case of pairwise distances of X and Y.
+
+    import RLikeDrmOps._
+
+    // Context needed
+    implicit val ctx = drmX.context
+
+    // Pin to cache if hasn't been pinned yet
+    val drmXcp = drmX.checkpoint()
+
+    // Compute column sum of squares
+    val s = drmXcp ^ 2 rowSums
+
+    val sBcast = drmBroadcast(s)
+
+    (drmXcp %*% drmXcp.t)
+
+      // Apply second part of the formula as per in-core algorithm
+      .mapBlock() { case (keys, block) â
+
+      // Slurp broadcast to memory
+      val s = sBcast: Vector
+
+      // Update in-place
+      block := { (r, c, x) â s(keys(r)) + s(c) - 2 * x}
+
+      keys â block
+    }
+  }
+
+
+  /**
+   * Compute fold-in distances (distributed version). Here, we use pretty much 
the same math as with
+   * squared distances.
+   *
+   * D_sq = s*1' + 1*t' - 2*X*Y'
+   *
+   * where s is row sums of hadamard product(X, X), and, similarly,
+   * s is row sums of Hadamard product(Y, Y).
+   *
+   * @param drmX m x d row-wise dataset. Pinned to cache if not yet pinned.
+   * @param drmY n x d row-wise dataset. Pinned to cache if not yet pinned.
+   * @return m x d pairwise squared distance matrix (between rows of X and Y)
+   */
+  def dsqDist(drmX: DrmLike[Int], drmY: DrmLike[Int]): DrmLike[Int] = {
+
+    import RLikeDrmOps._
+
+    implicit val ctx = drmX.context
+
+    val drmXcp = drmX.checkpoint()
+    val drmYcp = drmY.checkpoint()
+
+    val sBcast = drmBroadcast(drmXcp ^ 2 rowSums)
+    val tBcast = drmBroadcast(drmYcp ^ 2 rowSums)
+
+    (drmX %*% drmY.t)
+
+      // Apply the rest of the formula
+      .mapBlock() { case (keys, block) =>
+
+      // Cache broadcast representations in local task variable
+      val s = sBcast: Vector
+      val t = tBcast: Vector
+
+      block := { (r, c, x) => s(keys(r)) + t(c) - 2 * x}
+      keys â block
+    }
+  }
 
 }
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
 
b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
index 7091c53..e994e31 100644
--- 
a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
+++ 
b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala
@@ -108,8 +108,15 @@ class RLikeMatrixOps(m: Matrix) extends MatrixOps(m) {
   }
 
   def ^=(that: Double) = {
-    m ::= { x â math.pow(x, that) }
-    m
+    that match {
+      // Special handling of x ^2 and x ^ 0.5: we want consistent handling of 
x ^ 2 and x * x since
+      // pow(x,2) function return results different from x * x; but much of 
the code uses this
+      // interchangeably. Not having this done will create things like NaN 
entries on main diagonal
+      // of a distance matrix.
+      case 2.0 â m ::= { x â x * x }
+      case 0.5 â m ::= math.sqrt _
+      case _ â m ::= { x â math.pow(x, that) }
+    }
   }
 
   def ^(that: Double) = m.cloned ^= that

http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
 
b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
index 38a55d6..bf1bb30 100644
--- 
a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
+++ 
b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala
@@ -67,7 +67,15 @@ class RLikeVectorOps(_v: Vector) extends VectorOps(_v) {
   /** Elementwise right-associative / */
   def /:(that: Vector) = that.cloned /= v
 
-  def ^=(that: Double) = v.assign(Functions.POW, that)
+  def ^=(that: Double) = that match {
+    // Special handling of x ^2 and x ^ 0.5: we want consistent handling of x 
^ 2 and x * x since
+    // pow(x,2) function return results different from x * x; but much of the 
code uses this
+    // interchangeably. Not having this done will create things like NaN 
entries on main diagonal
+    // of a distance matrix.
+    case 2.0 â v.assign(Functions.SQUARE)
+    case 0.5 â v.assign(Functions.SQRT)
+    case _ â v.assign (Functions.POW, that)
+  }
 
   def ^=(that: Vector) = v.assign(that, Functions.POW)
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala 
b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
index 20dc9cd..7ff09bf 100644
--- 
a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
+++ 
b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala
@@ -125,34 +125,34 @@ package object scalabindings {
    */
   def dense[R](rows: R*): DenseMatrix = {
     import RLikeOps._
-    val data = for (r <- rows) yield {
+    val data = for (r â rows) yield {
       r match {
-        case n: Number => Array(n.doubleValue())
-        case t: Vector => Array.tabulate(t.length)(t(_))
-        case t: Array[Double] => t
-        case t: Iterable[_] =>
+        case n: Number â Array(n.doubleValue())
+        case t: Vector â Array.tabulate(t.length)(t(_))
+        case t: Array[Double] â t
+        case t: Iterable[_] â
           t.head match {
-            case ss: Double => t.asInstanceOf[Iterable[Double]].toArray
-            case vv: Vector =>
+            case ss: Double â t.asInstanceOf[Iterable[Double]].toArray
+            case vv: Vector â
               val m = new DenseMatrix(t.size, 
t.head.asInstanceOf[Vector].length)
               t.asInstanceOf[Iterable[Vector]].view.zipWithIndex.foreach {
-                case (v, idx) => m(idx, ::) := v
+                case (v, idx) â m(idx, ::) := v
               }
               return m
           }
-        case t: Product => 
t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray
-        case t: Array[Array[Double]] => if (rows.size == 1)
+        case t: Product â 
t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray
+        case t: Array[Array[Double]] â if (rows.size == 1)
           return new DenseMatrix(t)
         else
           throw new IllegalArgumentException(
             "double[][] data parameter can be the only argument for dense()")
-        case t: Array[Vector] =>
+        case t: Array[Vector] â
           val m = new DenseMatrix(t.size, t.head.length)
           t.view.zipWithIndex.foreach {
-            case (v, idx) => m(idx, ::) := v
+            case (v, idx) â m(idx, ::) := v
           }
           return m
-        case _ => throw new IllegalArgumentException("unsupported type in the 
inline Matrix initializer")
+        case _ â throw new IllegalArgumentException("unsupported type in the 
inline Matrix initializer")
       }
     }
     new DenseMatrix(data.toArray)
@@ -179,7 +179,7 @@ package object scalabindings {
     val nrow = rows.size
     val ncol = rows.map(_.size()).max
     val m = new SparseRowMatrix(nrow, ncol)
-    m := rows.map { row =>
+    m := rows.map { row â
       if (row.length < ncol) {
         val newRow = row.like(ncol)
         newRow(0 until row.length) := row
@@ -200,7 +200,7 @@ package object scalabindings {
     val cardinality = if (sdata.size > 0) sdata.map(_._1).max + 1 else 0
     val initialCapacity = sdata.size
     val sv = new RandomAccessSparseVector(cardinality, initialCapacity)
-    sdata.foreach(t => sv.setQuick(t._1, 
t._2.asInstanceOf[Number].doubleValue()))
+    sdata.foreach(t â sv.setQuick(t._1, 
t._2.asInstanceOf[Number].doubleValue()))
     sv
   }
 
@@ -337,12 +337,64 @@ package object scalabindings {
 
 
   /** Matrix-matrix unary func */
-  type MMUnaryFunc = (Matrix, Option[Matrix]) => Matrix
+  type MMUnaryFunc = (Matrix, Option[Matrix]) â Matrix
   /** Binary matrix-matrix operations which may save result in-place, 
optionally */
-  type MMBinaryFunc = (Matrix, Matrix, Option[Matrix]) => Matrix
-  type MVBinaryFunc = (Matrix, Vector, Option[Matrix]) => Matrix
-  type VMBinaryFunc = (Vector, Matrix, Option[Matrix]) => Matrix
-  type MDBinaryFunc = (Matrix, Double, Option[Matrix]) => Matrix
+  type MMBinaryFunc = (Matrix, Matrix, Option[Matrix]) â Matrix
+  type MVBinaryFunc = (Matrix, Vector, Option[Matrix]) â Matrix
+  type VMBinaryFunc = (Vector, Matrix, Option[Matrix]) â Matrix
+  type MDBinaryFunc = (Matrix, Double, Option[Matrix]) â Matrix
 
 
+  /////////////////////////////////////
+  // Miscellaneous in-core utilities
+
+  /**
+   * Compute column-wise means and variances.
+   *
+   * @return colMeans â colVariances
+   */
+  def colMeanVars(mxA:Matrix): (Vector, Vector) = {
+    val mu = mxA.colMeans()
+    val variance = (mxA * mxA colMeans) -= mu ^ 2
+    mu â variance
+  }
+
+  /**
+   * Compute column-wise means and stdevs.
+   * @param mxA input
+   * @return colMeans â colStdevs
+   */
+  def colMeanStdevs(mxA:Matrix) = {
+    val (mu, variance) = colMeanVars(mxA)
+    mu â (variance ::= math.sqrt _)
+  }
+
+  /** Compute square distance matrix. We assume data points are row-wise, 
similar to R's dist(). */
+  def sqDist(mxX: Matrix): Matrix = {
+
+    val s = mxX ^ 2 rowSums
+
+    (mxX %*% mxX.t) := { (r, c, x) â s(r) + s(c) - 2 * x}
+  }
+
+  /**
+   * Pairwise squared distance computation.
+   * @param mxX X, m x d
+   * @param mxY Y, n x d
+   * @return pairwise squaired distances of row-wise data points in X and Y (m 
x n)
+   */
+  def sqDist(mxX: Matrix, mxY: Matrix): Matrix = {
+
+    val s = mxX ^ 2 rowSums
+
+    val t = mxY ^ 2 rowSums
+
+    // D = s*1' + 1*t' - 2XY'
+    (mxX %*% mxY.t) := { (r, c, d) â s(r) + t(c) - 2.0 * d}
+  }
+
+  def dist(mxX: Matrix): Matrix = sqDist(mxX) := sqrt _
+
+  def dist(mxX: Matrix, mxY: Matrix): Matrix = sqDist(mxX, mxY) := sqrt _
+
 }

http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
 
b/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
index bb42121..fdfb3f9 100644
--- 
a/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
+++ 
b/math-scala/src/test/scala/org/apache/mahout/math/drm/DrmLikeOpsSuiteBase.scala
@@ -110,4 +110,28 @@ trait DrmLikeOpsSuiteBase extends DistributedMahoutSuite 
with Matchers {
 
   }
 
+  test("dsqDist(X,Y)") {
+    val m = 100
+    val n = 300
+    val d = 7
+    val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5
+    val mxY = Matrices.symmetricUniformView(n, d, 1234).cloned += 10
+    val (drmX, drmY) = (drmParallelize(mxX, 3), drmParallelize(mxY, 4))
+
+    val mxDsq = dsqDist(drmX, drmY).collect
+    val mxDsqControl = new DenseMatrix(m, n) := { (r, c, _) â (mxX(r, ::) - 
mxY(c, ::)) ^= 2 sum }
+    (mxDsq - mxDsqControl).norm should be < 1e-7
+  }
+
+  test("dsqDist(X)") {
+    val m = 100
+    val d = 7
+    val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5
+    val drmX = drmParallelize(mxX, 3)
+
+    val mxDsq = dsqDist(drmX).collect
+    val mxDsqControl = sqDist(drmX)
+    (mxDsq - mxDsqControl).norm should be < 1e-7
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/mahout/blob/349b94d8/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
 
b/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
index b10cde3..bcfe109 100644
--- 
a/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
+++ 
b/math-scala/src/test/scala/org/apache/mahout/math/scalabindings/MathSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.math.scalabindings
 
+import org.apache.mahout.logging._
 import org.scalatest.{Matchers, FunSuite}
 import org.apache.mahout.math._
 import scala.math._
@@ -28,6 +29,8 @@ import org.apache.mahout.common.RandomUtils
 
 class MathSuite extends FunSuite with MahoutSuite {
 
+  private final implicit val log = getLog(classOf[MathSuite])
+
   test("chol") {
 
     // try to solve Ax=b with cholesky:
@@ -41,26 +44,26 @@ class MathSuite extends FunSuite with MahoutSuite {
     // make sure it is symmetric for a valid solution
     a := a.t %*% a
 
-    printf("A= \n%s\n", a)
+    trace(s"A= \n$a")
 
     val b = dense((9, 8, 7)).t
 
-    printf("b = \n%s\n", b)
+    trace(s"b = \n$b")
 
-    // fails if chol(a,true)
+    // Fails if chol(a, true)
     val ch = chol(a)
 
-    printf("L = \n%s\n", ch.getL)
+    trace(s"L = \n${ch.getL}")
 
-    printf("(L^-1)b =\n%s\n", ch.solveLeft(b))
+    trace(s"(L^-1)b =\n${ch.solveLeft(b)}\n")
 
     val x = ch.solveRight(eye(3)) %*% ch.solveLeft(b)
 
-    printf("x = \n%s\n", x.toString)
+    trace(s"x = \n$x")
 
     val axmb = (a %*% x) - b
 
-    printf("AX - B = \n%s\n", axmb.toString)
+    trace(s"AX - B = \n$axmb")
 
     axmb.norm should be < 1e-10
 
@@ -211,4 +214,26 @@ class MathSuite extends FunSuite with MahoutSuite {
 
   }
 
+  test("sqDist(X,Y)") {
+    val m = 100
+    val n = 300
+    val d = 7
+    val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5
+    val mxY = Matrices.symmetricUniformView(n, d, 1234).cloned += 10
+
+    val mxDsq = sqDist(mxX, mxY)
+    val mxDsqControl = new DenseMatrix(m, n) := { (r, c, _) â (mxX(r, ::) - 
mxY(c, ::)) ^= 2 sum }
+    (mxDsq - mxDsqControl).norm should be < 1e-7
+  }
+
+  test("sqDist(X)") {
+    val m = 100
+    val d = 7
+    val mxX = Matrices.symmetricUniformView(m, d, 12345).cloned -= 5
+
+    val mxDsq = sqDist(mxX)
+    val mxDsqControl = sqDist(mxX, mxX)
+    (mxDsq - mxDsqControl).norm should be < 1e-7
+  }
+
 }

[23/37] mahout git commit: MAHOUT-1754: Distance and squared distance matrices routines (dlyubimov)

Reply via email to