Repository: mahout Updated Branches: refs/heads/master 74c10743d -> f870a6302
http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/scala/org/apache/mahout/h2obindings/H2ODistributedContext.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/H2ODistributedContext.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/H2ODistributedContext.scala new file mode 100644 index 0000000..e562229 --- /dev/null +++ b/h2o/src/main/scala/org/apache/mahout/h2obindings/H2ODistributedContext.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings + +import org.apache.mahout.math.drm._ + +class H2ODistributedContext(val masterUrl: String) extends DistributedContext { + val h2octx = new H2OContext(masterUrl); + + def close(): Unit = return + + val engine: DistributedEngine = H2OEngine +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/scala/org/apache/mahout/h2obindings/H2OEngine.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/H2OEngine.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/H2OEngine.scala new file mode 100644 index 0000000..860fb84 --- /dev/null +++ b/h2o/src/main/scala/org/apache/mahout/h2obindings/H2OEngine.scala @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings + +import scala.reflect._ +import org.apache.mahout.math._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.math.drm.logical._ +import org.apache.mahout.h2obindings.ops._ +import org.apache.mahout.h2obindings.drm._ + +object H2OEngine extends DistributedEngine { + def colMeans[K:ClassTag](drm: CheckpointedDrm[K]): Vector = + H2OHelper.colMeans(drm.h2odrm.frame) + + def colSums[K:ClassTag](drm: CheckpointedDrm[K]): Vector = + H2OHelper.colSums(drm.h2odrm.frame) + + def norm[K: ClassTag](drm: CheckpointedDrm[K]): Double = + H2OHelper.sumSqr(drm.h2odrm.frame) + + def numNonZeroElementsPerColumn[K: ClassTag](drm: CheckpointedDrm[K]): Vector = + H2OHelper.nonZeroCnt(drm.h2odrm.frame) + + def drmBroadcast(m: Matrix)(implicit dc: DistributedContext): BCast[Matrix] = + new H2OBCast(m) + + def drmBroadcast(v: Vector)(implicit dc: DistributedContext): BCast[Vector] = + new H2OBCast(v) + + def drmFromHDFS(path: String, parMin: Int = 0)(implicit dc: DistributedContext): CheckpointedDrm[_] = + new CheckpointedDrmH2O(H2OHdfs.drm_from_file(path, parMin), dc) + + def drmParallelizeEmpty(nrow: Int, ncol: Int, numPartitions: Int)(implicit dc: DistributedContext): CheckpointedDrm[Int] = + new CheckpointedDrmH2O[Int](H2OHelper.empty_drm(nrow, ncol, numPartitions, -1), dc) + + def drmParallelizeEmptyLong(nrow: Long, ncol: Int, numPartitions: Int)(implicit dc: DistributedContext): CheckpointedDrm[Long] = + new CheckpointedDrmH2O[Long](H2OHelper.empty_drm(nrow, ncol, numPartitions, -1), dc) + + def drmParallelizeWithRowIndices(m: Matrix, numPartitions: Int)(implicit dc: DistributedContext): CheckpointedDrm[Int] = + new CheckpointedDrmH2O[Int](H2OHelper.drm_from_matrix(m, numPartitions, -1), dc) + + def drmParallelizeWithRowLabels(m: Matrix, numPartitions: Int)(implicit dc: DistributedContext): CheckpointedDrm[String] = + new CheckpointedDrmH2O[String](H2OHelper.drm_from_matrix(m, numPartitions, -1), dc) + + def toPhysical[K:ClassTag](plan: DrmLike[K], ch: CacheHint.CacheHint): CheckpointedDrm[K] = + new CheckpointedDrmH2O[K](tr2phys(plan), plan.context) + + // H2O specific + + private def tr2phys[K: ClassTag](oper: DrmLike[K]): H2ODrm = { + oper match { + case OpAtAnyKey(_) => + throw new IllegalArgumentException("\"A\" must be Int-keyed in this A.t expression.") + case op@OpAt(a) => At.At(tr2phys(a)(op.classTagA)) + case op@OpABt(a, b) => ABt.ABt(tr2phys(a)(op.classTagA), tr2phys(b)(op.classTagB)) + case op@OpAtB(a, b) => AtB.AtB(tr2phys(a)(op.classTagA), tr2phys(b)(op.classTagB)) + case op@OpAtA(a) => AtA.AtA(tr2phys(a)(op.classTagA)) + case op@OpAx(a, v) => Ax.Ax(tr2phys(a)(op.classTagA), v) + case op@OpAtx(a, v) => Atx.Atx(tr2phys(a)(op.classTagA), v) + case op@OpAewB(a, b, opId) => AewB.AewB(tr2phys(a)(op.classTagA), tr2phys(b)(op.classTagB), opId) + // Non arithmetic + case op@OpCbind(a, b) => Cbind.Cbind(tr2phys(a)(op.classTagA), tr2phys(b)(op.classTagB)) + case op@OpRbind(a, b) => Rbind.Rbind(tr2phys(a)(op.classTagA), tr2phys(b)(op.classTagB)) + case op@OpAewScalar(a, s, opId) => AewScalar.AewScalar(tr2phys(a)(op.classTagA), s, opId) + case op@OpRowRange(a, r) => RowRange.RowRange(tr2phys(a)(op.classTagA), r) + case op@OpTimesRightMatrix(a, m) => TimesRightMatrix.TimesRightMatrix(tr2phys(a)(op.classTagA), m) + // Custom operators, we just execute them + case blockOp: OpMapBlock[K, _] => MapBlock.exec(tr2phys(blockOp.A)(blockOp.classTagA), blockOp.ncol, blockOp.bmf, + (blockOp.classTagK == implicitly[ClassTag[String]]), blockOp.classTagA, blockOp.classTagK) + case op@OpPar(a, m, e) => Par.exec(tr2phys(a)(op.classTagA), m, e) + case cp: CheckpointedDrm[K] => cp.h2odrm + case _ => throw new IllegalArgumentException("Internal:Optimizer has no exec policy for operator %s." + .format(oper)) + } + } + + implicit def cp2cph2o[K:ClassTag](drm: CheckpointedDrm[K]): CheckpointedDrmH2O[K] = drm.asInstanceOf[CheckpointedDrmH2O[K]] +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/scala/org/apache/mahout/h2obindings/drm/CheckpointedDrmH2O.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/drm/CheckpointedDrmH2O.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/drm/CheckpointedDrmH2O.scala new file mode 100644 index 0000000..8d10cbc --- /dev/null +++ b/h2o/src/main/scala/org/apache/mahout/h2obindings/drm/CheckpointedDrmH2O.scala @@ -0,0 +1,32 @@ +package org.apache.mahout.h2obindings.drm + +import org.apache.mahout.math.{Matrix, Vector} +import org.apache.mahout.math.scalabindings._ +import RLikeOps._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.h2obindings._ + +import scala.reflect._ + +/** H2O-specific optimizer-checkpointed DRM. */ +class CheckpointedDrmH2O[K: ClassTag]( + val h2odrm: H2ODrm, + val context: DistributedContext +) extends CheckpointedDrm[K] { + + def collect: Matrix = H2OHelper.matrix_from_drm(h2odrm) + /* XXX: call frame.remove */ + def uncache(): this.type = this + + def writeDRM(path: String): Unit = H2OHdfs.drm_to_file(path, h2odrm) + + def checkpoint(cacheHint: CacheHint.CacheHint): CheckpointedDrm[K] = this + + def ncol: Int = h2odrm.frame.numCols + + def nrow: Long = h2odrm.frame.numRows + + def canHaveMissingRows: Boolean = false + + protected[mahout] def partitioningTag: Long = h2odrm.frame.anyVec.group.hashCode +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala new file mode 100644 index 0000000..0384826 --- /dev/null +++ b/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.ops + +import org.apache.mahout.math.Matrix +import org.apache.mahout.math.drm.BlockMapFunc +import scala.reflect.ClassTag + +import water.fvec.{Vec,NewChunk} +import water.parser.ValueString + +object MapBlockHelper { + def exec[K: ClassTag, R: ClassTag](bmf: Object, in: Matrix, startlong: Long, labels: Vec, nclabel: NewChunk): Matrix = { + val i = implicitly[ClassTag[Int]] + val l = implicitly[ClassTag[Long]] + val s = implicitly[ClassTag[String]] + + val inarray = implicitly[ClassTag[K]] match { + case `i` => val startint: Int = startlong.asInstanceOf[Int] + startint until (startint + in.rowSize) toArray + case `l` => startlong until (startlong + in.rowSize) toArray + case `s` => { + val arr = new Array[String](in.rowSize) + val vstr = new ValueString + for (i <- 0 to in.rowSize) { + arr(i) = labels.atStr(vstr, i + startlong).toString + } + arr + } + } + + val _bmf = bmf.asInstanceOf[BlockMapFunc[K,R]] + val out = _bmf((inarray.asInstanceOf[Array[K]], in)) + + implicitly[ClassTag[R]] match { + case `s` => { + val vstr = new ValueString + for (str <- out._1) { + nclabel.addStr(vstr.setTo(str.asInstanceOf[String])) + } + } + case _ => Unit + } + out._2 + } +} + http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/scala/org/apache/mahout/h2obindings/package.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/package.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/package.scala new file mode 100644 index 0000000..79d9c5b --- /dev/null +++ b/h2o/src/main/scala/org/apache/mahout/h2obindings/package.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout + +package object h2obindings { + def mahoutH2OContext(masterURL: String): H2ODistributedContext = { + new H2ODistributedContext(masterURL) + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeOpsSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeOpsSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeOpsSuite.scala new file mode 100644 index 0000000..c6deb35 --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeOpsSuite.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.drm + +import org.apache.mahout.math._ +import scalabindings._ +import drm._ +import RLikeOps._ +import RLikeDrmOps._ +import org.apache.mahout.h2obindings._ +import org.scalatest.FunSuite +import org.apache.mahout.h2obindings.test.DistributedH2OSuite + +/** Tests for DrmLikeOps */ +class DrmLikeOpsSuite extends FunSuite with DistributedH2OSuite with DrmLikeOpsSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeSuite.scala new file mode 100644 index 0000000..07eb9d7 --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeSuite.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.drm + +import org.scalatest.FunSuite +import org.apache.mahout.math._ +import scalabindings._ +import drm._ +import RLikeOps._ +import RLikeDrmOps._ +import org.apache.mahout.h2obindings.test.DistributedH2OSuite + + +class DrmLikeSuite extends FunSuite with DistributedH2OSuite with DrmLikeSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/RLikeDrmOpsSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/RLikeDrmOpsSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/RLikeDrmOpsSuite.scala new file mode 100644 index 0000000..f052247 --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/RLikeDrmOpsSuite.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.drm + +import org.scalatest.FunSuite +import org.apache.mahout.math._ +import drm._ +import org.apache.mahout.h2obindings._ +import test.DistributedH2OSuite + +/** ==R-like DRM DSL operation tests -- H2O== */ +class RLikeDrmOpsSuite extends FunSuite with DistributedH2OSuite with RLikeDrmOpsSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/ABtSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/ABtSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/ABtSuite.scala new file mode 100644 index 0000000..b0d2ad7 --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/ABtSuite.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.ops + +import org.apache.mahout.h2obindings.test.DistributedH2OSuite +import org.scalatest.FunSuite +import org.apache.mahout.math.scalabindings._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.h2obindings._ +import org.apache.mahout.h2obindings.drm._ +import RLikeOps._ +import RLikeDrmOps._ +import org.apache.mahout.math.drm._ + +/** Tests for AB' operator algorithms */ +class ABtSuite extends FunSuite with DistributedH2OSuite { + test("ABt") { + val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5)) + val inCoreB = dense((3, 4, 5), (5, 6, 7)) + val A = drmParallelize(m = inCoreA, numPartitions = 3) + val B = drmParallelize(m = inCoreB, numPartitions = 2) + + val ABt = A %*% B.t + + val inCoreMControl = inCoreA %*% inCoreB.t + val inCoreM = ABt.collect + + assert((inCoreM - inCoreMControl).norm < 1E-5) + + println(inCoreM) + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AewBSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AewBSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AewBSuite.scala new file mode 100644 index 0000000..2f2133f --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AewBSuite.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.ops + +import org.scalatest.FunSuite +import org.apache.mahout.h2obindings.test.DistributedH2OSuite +import org.apache.mahout.math.scalabindings._ +import RLikeOps._ +import org.apache.mahout.math.drm._ +import RLikeDrmOps._ +import org.apache.mahout.math.drm.logical._ + +/** Elementwise matrix operation tests */ +class AewBSuite extends FunSuite with DistributedH2OSuite { + test("A * B Hadamard") { + val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5), (7, 8, 9)) + val inCoreB = dense((3, 4, 5), (5, 6, 7), (0, 0, 0), (9, 8, 7)) + val A = drmParallelize(m = inCoreA, numPartitions = 2) + val B = drmParallelize(m = inCoreB) + + val M = A * B + + val inCoreM = M.collect + val inCoreMControl = inCoreA * inCoreB + + assert((inCoreM - inCoreMControl).norm < 1E-10) + } + + test("A + B Elementwise") { + val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5), (7, 8, 9)) + val inCoreB = dense((3, 4, 5), (5, 6, 7), (0, 0, 0), (9, 8, 7)) + val A = drmParallelize(m = inCoreA, numPartitions = 2) + val B = drmParallelize(m = inCoreB) + + val M = A + B + + val inCoreM = M.collect + val inCoreMControl = inCoreA + inCoreB + + assert((inCoreM - inCoreMControl).norm < 1E-10) + } + + test("A - B Elementwise") { + val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5), (7, 8, 9)) + val inCoreB = dense((3, 4, 5), (5, 6, 7), (0, 0, 0), (9, 8, 7)) + val A = drmParallelize(m = inCoreA, numPartitions = 2) + val B = drmParallelize(m = inCoreB) + + val M = A - B + + val inCoreM = M.collect + val inCoreMControl = inCoreA - inCoreB + + assert((inCoreM - inCoreMControl).norm < 1E-10) + } + + test("A / B Elementwise") { + val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 0), (7, 8, 9)) + val inCoreB = dense((3, 4, 5), (5, 6, 7), (10, 20, 30), (9, 8, 7)) + val A = drmParallelize(m = inCoreA, numPartitions = 2) + val B = drmParallelize(m = inCoreB) + + val M = A / B + + val inCoreM = M.collect + val inCoreMControl = inCoreA / inCoreB + + assert((inCoreM - inCoreMControl).norm < 1E-10) + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtASuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtASuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtASuite.scala new file mode 100644 index 0000000..3dc3fcc --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtASuite.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.ops + +import org.scalatest.FunSuite +import org.apache.mahout.h2obindings.test.DistributedH2OSuite +import org.apache.mahout.math.scalabindings._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.h2obindings._ +import org.apache.mahout.h2obindings.drm._ +import RLikeOps._ +import RLikeDrmOps._ +import org.apache.mahout.math.drm._ + +/** Tests for {@link XtX} */ +class AtASuite extends FunSuite with DistributedH2OSuite { + test("AtA slim") { + val inCoreA = dense((1, 2), (2, 3)) + val drmA = drmParallelize(inCoreA) + + val M = drmA.t %*% drmA + val inCoreAtA = M.collect + println(inCoreAtA) + + val expectedAtA = inCoreA.t %*% inCoreA + println(expectedAtA) + + assert(expectedAtA === inCoreAtA) + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtSuite.scala new file mode 100644 index 0000000..c521388 --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtSuite.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.ops + +import org.scalatest.FunSuite +import org.apache.mahout.h2obindings.test.DistributedH2OSuite +import org.apache.mahout.math.scalabindings._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.h2obindings._ +import org.apache.mahout.h2obindings.drm._ +import RLikeOps._ +import RLikeDrmOps._ +import org.apache.mahout.math.drm._ + +/** Tests for A' algorithms */ +class AtSuite extends FunSuite with DistributedH2OSuite { + test("At") { + val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5)) + val A = drmParallelize(m = inCoreA, numPartitions = 2) + + val AtDrm = A.t + val inCoreAt = AtDrm.collect + val inCoreControlAt = inCoreA.t + + println(inCoreAt) + assert((inCoreAt - inCoreControlAt).norm < 1E-5) + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/test/DistributedH2OSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/test/DistributedH2OSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/test/DistributedH2OSuite.scala new file mode 100644 index 0000000..4568fad --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/test/DistributedH2OSuite.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.h2obindings.test + +import org.scalatest.Suite +import org.apache.mahout.h2obindings._ +import org.apache.mahout.test.{DistributedMahoutSuite,MahoutSuite} +import org.apache.mahout.math.drm.DistributedContext + +trait DistributedH2OSuite extends DistributedMahoutSuite with LoggerConfiguration { + this: Suite => + + protected implicit var mahoutCtx: DistributedContext = _ + + override protected def beforeEach() { + super.beforeEach() + + mahoutCtx = mahoutH2OContext("mah2out") + } + + override protected def afterEach() { + if (mahoutCtx != null) { + try { + mahoutCtx.close() + } finally { + mahoutCtx = null + } + } + super.afterEach() + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/h2obindings/test/LoggerConfiguration.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/test/LoggerConfiguration.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/test/LoggerConfiguration.scala new file mode 100644 index 0000000..b0cfd30 --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/h2obindings/test/LoggerConfiguration.scala @@ -0,0 +1,13 @@ +package org.apache.mahout.h2obindings.test + +import org.scalatest.{Suite, ConfigMap} +import org.apache.log4j.{Level, Logger, BasicConfigurator} + +trait LoggerConfiguration extends org.apache.mahout.test.LoggerConfiguration { + this: Suite => + + override protected def beforeAll(configMap: ConfigMap): Unit = { + super.beforeAll(configMap) + Logger.getLogger("org.apache.mahout.h2obindings").setLevel(Level.DEBUG) + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuite.scala new file mode 100644 index 0000000..00cf0ba --- /dev/null +++ b/h2o/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuite.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.decompositions + +import org.apache.mahout.math._ +import drm._ +import scalabindings._ +import RLikeOps._ +import RLikeDrmOps._ +import org.apache.mahout.h2obindings._ +import org.apache.mahout.common.RandomUtils +import scala.math._ +import org.scalatest.{Matchers, FunSuite} +import org.apache.mahout.h2obindings.test.DistributedH2OSuite + +class DistributedDecompositionsSuite extends FunSuite with DistributedH2OSuite with DistributedDecompositionsSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index ef9ae03..89ed1a7 100644 --- a/pom.xml +++ b/pom.xml @@ -110,6 +110,7 @@ <scala.major>2.10</scala.major> <scala.version>2.10.4</scala.version> <spark.version>1.0.1</spark.version> + <h2o.version>0.1.5</h2o.version> </properties> <issueManagement> <system>Jira</system> @@ -700,6 +701,7 @@ <module>math-scala</module> <module>spark</module> <module>spark-shell</module> + <module>h2o</module> </modules> <profiles> <profile>
