Repository: mahout Updated Branches: refs/heads/master 63cebf76e -> e4ba7887f
MAHOUT-1529: third collection of various edits against private branch Squashed commit of the following: commit 4328aae135bc56c02b944c21af2cfd6629b262bf Author: Dmitriy Lyubimov <[email protected]> Date: Mon Jul 7 13:13:26 2014 -0700 Various 1529-related stuff collected from private fork Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/e4ba7887 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/e4ba7887 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/e4ba7887 Branch: refs/heads/master Commit: e4ba7887fc6dbf17c3d73f8d4aa1045eeb48d53e Parents: 63cebf7 Author: Dmitriy Lyubimov <[email protected]> Authored: Mon Jul 7 13:15:30 2014 -0700 Committer: Dmitriy Lyubimov <[email protected]> Committed: Mon Jul 7 13:15:30 2014 -0700 ---------------------------------------------------------------------- .../apache/mahout/math/decompositions/ALS.scala | 14 +++++++----- .../mahout/math/decompositions/package.scala | 10 +++++++-- .../mahout/math/scalabindings/MatrixOps.scala | 9 ++++---- .../mahout/math/scalabindings/RLikeOps.scala | 3 +++ .../mahout/math/scalabindings/VectorOps.scala | 23 +++++++++++++++++--- .../mahout/math/scalabindings/package.scala | 15 +++++++++++-- .../mahout/sparkbindings/SparkEngine.scala | 2 +- .../mahout/sparkbindings/blas/package.scala | 2 +- .../apache/mahout/sparkbindings/package.scala | 9 +++++--- .../mahout/math/decompositions/MathSuite.scala | 10 ++++----- 10 files changed, 71 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala index 5103e1c..5aed649 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/ALS.scala @@ -29,7 +29,7 @@ import math._ import org.apache.mahout.common.RandomUtils /** Simple ALS factorization algotithm. To solve, use train() method. */ -object ALS { +private[math] object ALS { private val log = Logger.getLogger(ALS.getClass) @@ -46,8 +46,13 @@ object ALS { def toTuple = (drmU, drmV, iterationsRMSE) } + /** Result class for in-core results */ + class InCoreResult(val inCoreU: Matrix, inCoreV: Matrix, val iterationsRMSE: Iterable[Double]) { + def toTuple = (inCoreU, inCoreV, iterationsRMSE) + } + /** - * Run ALS. + * Run Distributed ALS. * <P> * * Example: @@ -69,7 +74,7 @@ object ALS { * @tparam K row key type of the input (100 is probably more than enough) * @return { @link org.apache.mahout.math.drm.decompositions.ALS.Result} */ - def als[K: ClassTag]( + def dals[K: ClassTag]( drmInput: DrmLike[K], k: Int = 50, lambda: Double = 0.0, @@ -80,7 +85,6 @@ object ALS { assert(convergenceThreshold < 1.0, "convergenceThreshold") assert(maxIterations >= 1, "maxIterations") - val drmA = drmInput val drmAt = drmInput.t @@ -101,7 +105,7 @@ object ALS { while (!stop && i < maxIterations) { // Alternate. This is really what ALS is. - if ( drmV != null) drmV.uncache() + if (drmV != null) drmV.uncache() drmV = (drmAt %*% drmU %*% solve(drmU.t %*% drmU -: diag(lambda, k))).checkpoint() drmU.uncache() http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala index 852a977..1ed9695 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/decompositions/package.scala @@ -92,6 +92,12 @@ package object decompositions { def dspca[K: ClassTag](A: DrmLike[K], k: Int, p: Int = 15, q: Int = 0): (DrmLike[K], DrmLike[Int], Vector) = DSPCA.dspca(A, k, p, q) + /** Result for distributed ALS-type two-component factorization algorithms */ + type FactorizationResult[K] = ALS.Result[K] + + /** Result for distributed ALS-type two-component factorization algorithms, in-core matrices */ + type FactorizationResultInCore = ALS.InCoreResult + /** * Run ALS. * <P> @@ -121,7 +127,7 @@ package object decompositions { lambda: Double = 0.0, maxIterations: Int = 10, convergenceThreshold: Double = 0.10 - ): ALS.Result[K] = - ALS.als(drmInput, k, lambda, maxIterations, convergenceThreshold) + ): FactorizationResult[K] = + ALS.dals(drmInput, k, lambda, maxIterations, convergenceThreshold) } http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala index 28acc5a..bb77ae1 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala @@ -19,7 +19,7 @@ package org.apache.mahout.math.scalabindings import org.apache.mahout.math.{Matrices, QRDecomposition, Vector, Matrix} import scala.collection.JavaConversions._ -import org.apache.mahout.math.function.{VectorFunction, DoubleFunction, Functions} +import org.apache.mahout.math.function.{DoubleDoubleFunction, VectorFunction, DoubleFunction, Functions} import scala.math._ class MatrixOps(val m: Matrix) { @@ -43,9 +43,10 @@ class MatrixOps(val m: Matrix) { def -=(that: Double) = +=(-that) - def -=:(that: Double) = m.assign(new DoubleFunction { - def apply(x: Double): Double = that - x - }) + def -=:(that: Double) = m.assign(Functions.minus(that)) + + /** A := B - A which is -(A - B) */ + def -=:(that: Matrix) = m.assign(that, Functions.chain(Functions.NEGATE, Functions.MINUS)) def +(that: Matrix) = cloned += that http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala index e12fc0f..c96526f 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala @@ -26,8 +26,11 @@ object RLikeOps { implicit def v2vOps(v: Vector) = new RLikeVectorOps(v) + implicit def el2elOps(el: Vector.Element) = new ElementOps(el) + implicit def times2timesOps(m: MatrixTimesOps) = new RLikeTimesOps(m) implicit def m2mOps(m: Matrix) = new RLikeMatrixOps(m) + } http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala index 0a81bcd..c1b5a69 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/VectorOps.scala @@ -25,7 +25,7 @@ import org.apache.mahout.math.function.Functions * Syntactic sugar for mahout vectors * @param v Mahout vector */ -class VectorOps(val v: Vector) { +class VectorOps(private[scalabindings] val v: Vector) { import RLikeOps._ @@ -118,5 +118,22 @@ class VectorOps(val v: Vector) { } -object VectorOps { -} +class ElementOps(private[scalabindings] val el: Vector.Element) { + + def apply = el.get() + + def update(v: Double) = el.set(v) + + def :=(v: Double) = el.set(v) + + def +(that: Double) = el.get() + that + + def -(that: Double) = el.get() - that + + def :-(that: Double) = that - el.get() + + def /(that: Double) = el.get() / that + + def :/(that: Double) = that / el.get() + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala index 2b7773b..8e0c07f 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/package.scala @@ -130,7 +130,16 @@ package object scalabindings { case t: Product => t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray case t: Vector => Array.tabulate(t.length)(t(_)) case t: Array[Double] => t - case t: Iterable[Double] => t.toArray + case t: Iterable[_] => + t.head match { + case ss: Double => t.asInstanceOf[Iterable[Double]].toArray + case vv: Vector => + val m = new DenseMatrix(t.size, t.head.asInstanceOf[Vector].length) + t.asInstanceOf[Iterable[Vector]].view.zipWithIndex.foreach { + case (v, idx) => m(idx, ::) := v + } + return m + } case t: Array[Array[Double]] => if (rows.size == 1) return new DenseMatrix(t) else @@ -138,7 +147,9 @@ package object scalabindings { "double[][] data parameter can be the only argument for dense()") case t:Array[Vector] => val m = new DenseMatrix(t.size,t.head.length) - t.view.zipWithIndex.foreach({case(v,idx) => m(idx,::) := v}) + t.view.zipWithIndex.foreach{ + case(v,idx) => m(idx,::) := v + } return m case _ => throw new IllegalArgumentException("unsupported type in the inline Matrix initializer") } http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala ---------------------------------------------------------------------- diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala index dbdc934..b68a98e 100644 --- a/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala +++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/SparkEngine.scala @@ -219,7 +219,7 @@ object SparkEngine extends DistributedEngine { new CheckpointedDrmSpark[Long](rdd, nrow, ncol) } - private def cacheHint2Spark(cacheHint: CacheHint.CacheHint): StorageLevel = cacheHint match { + private[mahout] def cacheHint2Spark(cacheHint: CacheHint.CacheHint): StorageLevel = cacheHint match { case CacheHint.NONE => StorageLevel.NONE case CacheHint.DISK_ONLY => StorageLevel.DISK_ONLY case CacheHint.DISK_ONLY_2 => StorageLevel.DISK_ONLY_2 http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala ---------------------------------------------------------------------- diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala index d2d5340..32d6fb5 100644 --- a/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala +++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/blas/package.scala @@ -20,7 +20,7 @@ package org.apache.mahout.sparkbindings import scala.reflect.ClassTag /** - * This package contains distributed algorithms that distributed matrix expression optimizer picks + * This validation contains distributed algorithms that distributed matrix expression optimizer picks * from. */ package object blas { http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala ---------------------------------------------------------------------- diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala index e9fd7ac..8726766 100644 --- a/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala +++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/package.scala @@ -52,7 +52,9 @@ package object sparkbindings { * @param customJars * @return */ - def mahoutSparkContext(masterUrl: String, appName: String, + def mahoutSparkContext( + masterUrl: String, + appName: String, customJars: TraversableOnce[String] = Nil, sparkConf: SparkConf = new SparkConf(), addMahoutJars: Boolean = true @@ -177,13 +179,14 @@ package object sparkbindings { def drmWrap[K : ClassTag]( rdd: DrmRdd[K], nrow: Int = -1, - ncol: Int = -1 + ncol: Int = -1, + cacheHint:CacheHint.CacheHint = CacheHint.NONE ): CheckpointedDrm[K] = new CheckpointedDrmSpark[K]( rdd = rdd, _nrow = nrow, _ncol = ncol, - _cacheStorageLevel = StorageLevel.NONE + _cacheStorageLevel = SparkEngine.cacheHint2Spark(cacheHint) ) http://git-wip-us.apache.org/repos/asf/mahout/blob/e4ba7887/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala ---------------------------------------------------------------------- diff --git a/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala b/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala index 6060dfd..03c7190 100644 --- a/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala +++ b/spark/src/test/scala/org/apache/mahout/math/decompositions/MathSuite.scala @@ -17,8 +17,6 @@ package org.apache.mahout.math.decompositions -import org.scalatest.{Matchers, FunSuite} -import org.apache.mahout.sparkbindings.test.MahoutLocalContext import org.apache.mahout.math._ import drm._ import scalabindings._ @@ -27,6 +25,8 @@ import RLikeDrmOps._ import org.apache.mahout.sparkbindings._ import org.apache.mahout.common.RandomUtils import scala.math._ +import org.scalatest.{Matchers, FunSuite} +import org.apache.mahout.sparkbindings.test.MahoutLocalContext class MathSuite extends FunSuite with Matchers with MahoutLocalContext { @@ -171,7 +171,7 @@ class MathSuite extends FunSuite with Matchers with MahoutLocalContext { } - test("als") { + test("dals") { val rnd = RandomUtils.getRandom @@ -202,8 +202,8 @@ class MathSuite extends FunSuite with Matchers with MahoutLocalContext { printf("ALS factorized approximation block:\n%s\n", predict(0 until 3, 0 until 3)) val err = (inCoreA - predict).norm - printf ("norm of residuals %f\n",err) - printf ("train iteration rmses: %s\n", rmse) + printf("norm of residuals %f\n", err) + printf("train iteration rmses: %s\n", rmse) err should be < 1e-2
