http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewUnaryFuncFusion.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewUnaryFuncFusion.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewUnaryFuncFusion.scala deleted file mode 100644 index 19bdc64..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAewUnaryFuncFusion.scala +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import scala.reflect.ClassTag -import org.apache.mahout.math.drm.DrmLike -import scala.util.Random - -/** - * Composition of unary elementwise functions. - */ -case class OpAewUnaryFuncFusion[K]( - override var A: DrmLike[K], - var ff:List[OpAewUnaryFunc[K]] = Nil - ) extends AbstractUnaryOp[K,K] with TEwFunc { - - override protected[mahout] lazy val partitioningTag: Long = - if (A.canHaveMissingRows) - Random.nextLong() - else A.partitioningTag - - /** Stuff like `A +1` is always supposed to fix this */ - override protected[mahout] lazy val canHaveMissingRows: Boolean = false - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag: ClassTag[K] = A.keyClassTag - - /** R-like syntax for number of rows. */ - def nrow: Long = A.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = A.ncol - - /** Apply to degenerate elements? */ - override def evalZeros: Boolean = ff.exists(_.evalZeros) - - /** the function itself */ - override def f: (Double) => Double = { - - // Make sure composed collection becomes an attribute of this closure because we will be sending - // it to the backend. - val composedFunc = ff.map(_.f) - - // Create functional closure and return. - (x: Double) => (composedFunc :\ x) { case (f, xarg) => f(xarg)} - - } -} -
http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAt.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAt.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAt.scala deleted file mode 100644 index 59c71bd..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAt.scala +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.drm._ - -import scala.reflect.ClassTag - -/** Logical A' */ -case class OpAt( - override var A: DrmLike[Int]) - extends AbstractUnaryOp[Int, Int] { - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = ClassTag.Int - - /** R-like syntax for number of rows. */ - def nrow: Long = A.ncol - - /** R-like syntax for number of columns */ - def ncol: Int = safeToNonNegInt(A.nrow) - - /** A' after simplifications cannot produce missing rows, ever. */ - override protected[mahout] lazy val canHaveMissingRows: Boolean = false -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtA.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtA.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtA.scala deleted file mode 100644 index 4c01f46..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtA.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import scala.reflect.ClassTag -import org.apache.mahout.math.drm.DrmLike - -/** A'A */ -case class OpAtA[K]( - override var A: DrmLike[K] - ) extends AbstractUnaryOp[K, Int] { - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = ClassTag.Int - - /** R-like syntax for number of rows. */ - def nrow: Long = A.ncol - - /** R-like syntax for number of columns */ - def ncol: Int = A.ncol - - override protected[mahout] lazy val canHaveMissingRows: Boolean = false - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtAnyKey.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtAnyKey.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtAnyKey.scala deleted file mode 100644 index b23dca7..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtAnyKey.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import scala.reflect.ClassTag -import org.apache.mahout.math.drm._ - -/** Logical A' for any row key to support A'A optimizations */ -case class OpAtAnyKey[A]( - override var A: DrmLike[A]) - extends AbstractUnaryOp[A, Int] { - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = ClassTag.Int - - /** R-like syntax for number of rows. */ - def nrow: Long = A.ncol - - /** R-like syntax for number of columns */ - def ncol: Int = safeToNonNegInt(A.nrow) - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtB.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtB.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtB.scala deleted file mode 100644 index 7ec8585..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtB.scala +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import scala.reflect.ClassTag -import org.apache.mahout.math.drm.DrmLike - -/** Logical A'B */ -case class OpAtB[A]( - override var A: DrmLike[A], - override var B: DrmLike[A]) - extends AbstractBinaryOp[A, A, Int] { - - assert(A.nrow == B.nrow, "Incompatible operand geometry") - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = ClassTag.Int - - /** R-like syntax for number of rows. */ - def nrow: Long = A.ncol - - /** R-like syntax for number of columns */ - def ncol: Int = B.ncol - - /** Non-zero element count */ - def nNonZero: Long = - // TODO: for purposes of cost calculation, approximate based on operands - throw new UnsupportedOperationException - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtx.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtx.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtx.scala deleted file mode 100644 index 97b6de1..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAtx.scala +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.Vector -import org.apache.mahout.math.scalabindings._ -import RLikeOps._ -import org.apache.mahout.math.drm._ - -import scala.reflect.ClassTag - -/** Logical A'x. */ -case class OpAtx( - override var A: DrmLike[Int], - val x: Vector - ) extends AbstractUnaryOp[Int, Int] { - - override protected[mahout] lazy val partitioningTag: Long = A.partitioningTag - - assert(A.nrow == x.length, "Incompatible operand geometry") - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override val keyClassTag = ClassTag.Int - - /** R-like syntax for number of rows. */ - def nrow: Long = safeToNonNegInt(A.ncol) - - /** R-like syntax for number of columns */ - def ncol: Int = 1 - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAx.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAx.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAx.scala deleted file mode 100644 index d25e0d9..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpAx.scala +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import scala.reflect.ClassTag -import org.apache.mahout.math.Vector -import org.apache.mahout.math.scalabindings._ -import RLikeOps._ -import org.apache.mahout.math.drm.DrmLike - -/** Logical Ax. */ -case class OpAx[K]( - override var A: DrmLike[K], - val x: Vector - ) extends AbstractUnaryOp[K, K] { - - override protected[mahout] lazy val partitioningTag: Long = A.partitioningTag - - assert(A.ncol == x.length, "Incompatible operand geometry") - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag: ClassTag[K] = A.keyClassTag - - /** R-like syntax for number of rows. */ - def nrow: Long = A.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = 1 - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpCbind.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpCbind.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpCbind.scala deleted file mode 100644 index cbc20ae..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpCbind.scala +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.drm.DrmLike -import scala.util.Random - -/** cbind() logical operator */ -case class OpCbind[K]( - override var A: DrmLike[K], - override var B: DrmLike[K] - ) extends AbstractBinaryOp[K, K, K] { - - assert(A.nrow == B.nrow, "arguments must have same number of rows") - require(A.keyClassTag == B.keyClassTag, "arguments must have same row key") - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = A.keyClassTag - - override protected[mahout] lazy val partitioningTag: Long = - if (A.partitioningTag == B.partitioningTag) A.partitioningTag - else Random.nextLong() - - /** R-like syntax for number of rows. */ - def nrow: Long = A.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = A.ncol + B.ncol - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpCbindScalar.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpCbindScalar.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpCbindScalar.scala deleted file mode 100644 index c3775ed..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpCbindScalar.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.drm.DrmLike - -case class OpCbindScalar[K]( - override var A:DrmLike[K], - var x:Double, - val leftBind:Boolean ) extends AbstractUnaryOp[K,K] { - - override protected[mahout] lazy val canHaveMissingRows: Boolean = false - - override protected[mahout] lazy val partitioningTag: Long = A.partitioningTag - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = A.keyClassTag - - /** R-like syntax for number of rows. */ - def nrow: Long = A.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = A.ncol + 1 - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpMapBlock.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpMapBlock.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpMapBlock.scala deleted file mode 100644 index 95e690b..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpMapBlock.scala +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.drm.{BlockMapFunc, DrmLike} - -import scala.reflect.{ClassTag, classTag} -import scala.util.Random - -case class OpMapBlock[S, R: ClassTag]( - override var A: DrmLike[S], - val bmf: BlockMapFunc[S, R], - val _ncol: Int = -1, - val _nrow: Long = -1, - identicallyPartitioned:Boolean - ) extends AbstractUnaryOp[S, R] { - - override protected[mahout] lazy val partitioningTag: Long = - if (identicallyPartitioned) A.partitioningTag else Random.nextLong() - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = classTag[R] - - /** R-like syntax for number of rows. */ - def nrow: Long = if (_nrow >= 0) _nrow else A.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = if (_ncol >= 0) _ncol else A.ncol - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpPar.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpPar.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpPar.scala deleted file mode 100644 index 2402b1f..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpPar.scala +++ /dev/null @@ -1,23 +0,0 @@ -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.drm.DrmLike - -/** Parallelism operator */ -case class OpPar[K]( - override var A: DrmLike[K], - val minSplits: Int = -1, - val exactSplits: Int = -1) - extends AbstractUnaryOp[K, K] { - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = A.keyClassTag - - /** R-like syntax for number of rows. */ - def nrow: Long = A.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = A.ncol -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpRbind.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpRbind.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpRbind.scala deleted file mode 100644 index 1c67868..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpRbind.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.drm.DrmLike -import scala.util.Random - -/** rbind() logical operator */ -case class OpRbind[K]( - override var A: DrmLike[K], - override var B: DrmLike[K] - ) extends AbstractBinaryOp[K, K, K] { - - assert(A.ncol == B.ncol, "arguments must have same number of columns") - require(A.keyClassTag == B.keyClassTag, "arguments of rbind() must have the same row key type") - - override protected[mahout] lazy val partitioningTag: Long = Random.nextLong() - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override def keyClassTag = A.keyClassTag - - /** R-like syntax for number of rows. */ - def nrow: Long = A.nrow + B.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = A.ncol - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpRowRange.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpRowRange.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpRowRange.scala deleted file mode 100644 index c7d3bfa..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpRowRange.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.drm.DrmLike - -import scala.reflect.ClassTag - -/** Logical row-range slicing */ -case class OpRowRange( - override var A: DrmLike[Int], - val rowRange: Range - ) extends AbstractUnaryOp[Int, Int] { - - assert(rowRange.head >= 0 && rowRange.last < A.nrow, "row range out of range") - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override val keyClassTag = ClassTag.Int - - /** R-like syntax for number of rows. */ - def nrow: Long = rowRange.length - - /** R-like syntax for number of columns */ - def ncol: Int = A.ncol - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesLeftMatrix.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesLeftMatrix.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesLeftMatrix.scala deleted file mode 100644 index 016171d..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesLeftMatrix.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.Matrix -import org.apache.mahout.math.scalabindings._ -import RLikeOps._ -import org.apache.mahout.math.drm.DrmLike - -import scala.reflect.ClassTag - -/** Logical Times-left over in-core matrix operand */ -case class OpTimesLeftMatrix( - left: Matrix, - override var A: DrmLike[Int] - ) extends AbstractUnaryOp[Int, Int] { - - assert(left.ncol == A.nrow, "Incompatible operand geometry") - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override val keyClassTag = ClassTag.Int - - /** R-like syntax for number of rows. */ - def nrow: Long = left.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = A.ncol - - /** Non-zero element count */ - // TODO - def nNonZero: Long = throw new UnsupportedOperationException - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesRightMatrix.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesRightMatrix.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesRightMatrix.scala deleted file mode 100644 index 94104bb..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesRightMatrix.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -import org.apache.mahout.math.Matrix -import org.apache.mahout.math.scalabindings._ -import RLikeOps._ -import org.apache.mahout.math.drm.DrmLike - -/** Logical times-right over in-core matrix operand. */ -case class OpTimesRightMatrix[K]( - override var A: DrmLike[K], - val right: Matrix - ) extends AbstractUnaryOp[K, K] { - - override protected[mahout] lazy val partitioningTag: Long = A.partitioningTag - - assert(A.ncol == right.nrow, "Incompatible operand geometry") - - /** - * Explicit extraction of key class Tag since traits don't support context bound access; but actual - * implementation knows it - */ - override lazy val keyClassTag = A.keyClassTag - - /** R-like syntax for number of rows. */ - def nrow: Long = A.nrow - - /** R-like syntax for number of columns */ - def ncol: Int = right.ncol - - /** Non-zero element count */ - // TODO - def nNonZero: Long = throw new UnsupportedOperationException - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/TEwFunc.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/TEwFunc.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/TEwFunc.scala deleted file mode 100644 index 0eb5f65..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/logical/TEwFunc.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.drm.logical - -/** - * Trait denoting logical operators providing elementwise operations that work as unary operators - * on each element of a matrix. - */ -trait TEwFunc { - - /** Apply to degenerate elments? */ - def evalZeros: Boolean - - /** the function itself */ - def f: (Double) => Double - - /** - * Self assignment ok? If yes, may cause side effects if works off non-serialized cached object - * tree! - */ - def selfAssignOk: Boolean = false -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala deleted file mode 100644 index cdec954..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math - -import org.apache.mahout.math.drm._ -import org.apache.mahout.math.scalabindings.RLikeOps._ -import org.apache.mahout.math.scalabindings._ - -import scala.reflect.ClassTag -import org.apache.mahout.math.drm.logical.OpAewUnaryFunc - -import collection._ - -package object drm { - - /** Drm row-wise tuple */ - type DrmTuple[K] = (K, Vector) - - /** Drm block-wise tuple: Array of row keys and the matrix block. */ - type BlockifiedDrmTuple[K] = (Array[K], _ <: Matrix) - - - /** Block-map func */ - type BlockMapFunc[S, R] = BlockifiedDrmTuple[S] â BlockifiedDrmTuple[R] - - type BlockMapFunc2[S] = BlockifiedDrmTuple[S] â Matrix - - type BlockReduceFunc = (Matrix, Matrix) â Matrix - - /** CacheHint type */ - // type CacheHint = CacheHint.CacheHint - - def safeToNonNegInt(x: Long): Int = { - assert(x == x << -31 >>> -31, "transformation from long to Int is losing significant bits, or is a negative number") - x.toInt - } - - /** Broadcast support API */ - def drmBroadcast(m:Matrix)(implicit ctx:DistributedContext):BCast[Matrix] = ctx.drmBroadcast(m) - - /** Broadcast support API */ - def drmBroadcast(v:Vector)(implicit ctx:DistributedContext):BCast[Vector] = ctx.drmBroadcast(v) - - /** Load DRM from hdfs (as in Mahout DRM format) */ - def drmDfsRead (path: String)(implicit ctx: DistributedContext): CheckpointedDrm[_] = ctx.drmDfsRead(path) - - /** Shortcut to parallelizing matrices with indices, ignore row labels. */ - def drmParallelize(m: Matrix, numPartitions: Int = 1) - (implicit sc: DistributedContext): CheckpointedDrm[Int] = drmParallelizeWithRowIndices(m, numPartitions)(sc) - - /** Parallelize in-core matrix as a distributed matrix, using row ordinal indices as data set keys. */ - def drmParallelizeWithRowIndices(m: Matrix, numPartitions: Int = 1) - (implicit ctx: DistributedContext): CheckpointedDrm[Int] = ctx.drmParallelizeWithRowIndices(m, numPartitions) - - /** Parallelize in-core matrix as a distributed matrix, using row labels as a data set keys. */ - def drmParallelizeWithRowLabels(m: Matrix, numPartitions: Int = 1) - (implicit ctx: DistributedContext): CheckpointedDrm[String] = ctx.drmParallelizeWithRowLabels(m, numPartitions) - - /** This creates an empty DRM with specified number of partitions and cardinality. */ - def drmParallelizeEmpty(nrow: Int, ncol: Int, numPartitions: Int = 10) - (implicit ctx: DistributedContext): CheckpointedDrm[Int] = ctx.drmParallelizeEmpty(nrow, ncol, numPartitions) - - /** Creates empty DRM with non-trivial height */ - def drmParallelizeEmptyLong(nrow: Long, ncol: Int, numPartitions: Int = 10) - (implicit ctx: DistributedContext): CheckpointedDrm[Long] = ctx.drmParallelizeEmptyLong(nrow, ncol, numPartitions) - - /** Implicit broadcast -> value conversion. */ - implicit def bcast2val[T](bcast: BCast[T]): T = bcast.value - - /** Just throw all engine operations into context as well. */ - implicit def ctx2engine(ctx: DistributedContext): DistributedEngine = ctx.engine - - implicit def drm2drmCpOps[K](drm: CheckpointedDrm[K]): CheckpointedOps[K] = - new CheckpointedOps[K](drm) - - /** - * We assume that whenever computational action is invoked without explicit checkpoint, the user - * doesn't imply caching - */ - implicit def drm2Checkpointed[K](drm: DrmLike[K]): CheckpointedDrm[K] = drm.checkpoint(CacheHint.NONE) - - /** Implicit conversion to in-core with NONE caching of the result. */ - implicit def drm2InCore[K](drm: DrmLike[K]): Matrix = drm.collect - - /** Do vertical concatenation of collection of blockified tuples */ - private[mahout] def rbind[K:ClassTag](blocks: Iterable[BlockifiedDrmTuple[K]]): BlockifiedDrmTuple[K] = { - assert(blocks.nonEmpty, "rbind: 0 blocks passed in") - if (blocks.size == 1) { - // No coalescing required. - blocks.head - } else { - // compute total number of rows in a new block - val m = blocks.view.map(_._2.nrow).sum - val n = blocks.head._2.ncol - val coalescedBlock = blocks.head._2.like(m, n) - val coalescedKeys = new Array[K](m) - var row = 0 - for (elem <- blocks.view) { - val block = elem._2 - val rowEnd = row + block.nrow - coalescedBlock(row until rowEnd, ::) := block - elem._1.copyToArray(coalescedKeys, row) - row = rowEnd - } - coalescedKeys -> coalescedBlock - } - } - - /** - * Convert arbitrarily-keyed matrix to int-keyed matrix. Some algebra will accept only int-numbered - * row matrices. So this method is to help. - * - * @param drmX input to be transcoded - * @param computeMap collect `old key -> int key` map to front-end? - * @tparam K key type - * @return Sequentially keyed matrix + (optionally) map from non-int key to [[Int]] key. If the - * key type is actually Int, then we just return the argument with None for the map, - * regardless of computeMap parameter. - */ - def drm2IntKeyed[K](drmX: DrmLike[K], computeMap: Boolean = false): (DrmLike[Int], Option[DrmLike[K]]) = - drmX.context.engine.drm2IntKeyed(drmX, computeMap) - - /** - * (Optional) Sampling operation. Consistent with Spark semantics of the same. - * @param drmX - * @param fraction - * @param replacement - * @tparam K - * @return samples - */ - def drmSampleRows[K](drmX: DrmLike[K], fraction: Double, replacement: Boolean = false): DrmLike[K] = - drmX.context.engine.drmSampleRows(drmX, fraction, replacement) - - def drmSampleKRows[K](drmX: DrmLike[K], numSamples: Int, replacement: Boolean = false): Matrix = - drmX.context.engine.drmSampleKRows(drmX, numSamples, replacement) - - /** - * Convert a DRM sample into a Tab Separated Vector (TSV) to be loaded into an R-DataFrame - * for plotting and sketching - * @param drmX - DRM - * @param samplePercent - Percentage of Sample elements from the DRM to be fished out for plotting - * @tparam K - * @return TSV String - */ - def drmSampleToTSV[K](drmX: DrmLike[K], samplePercent: Double = 1): String = { - - val drmSize = drmX.checkpoint().numRows() - val sampleRatio: Double = 1.0 * samplePercent / 100 - val numSamples: Int = (drmSize * sampleRatio).toInt - - val plotMatrix = drmSampleKRows(drmX, numSamples, replacement = false) - - // Plot Matrix rows - val matrixRows = plotMatrix.numRows() - val matrixCols = plotMatrix.numCols() - - // Convert the Plot Matrix Rows to TSV - var str = "" - - for (i <- 0 until matrixRows) { - for (j <- 0 until matrixCols) { - str += plotMatrix(i, j) - if (j <= matrixCols - 2) { - str += '\t' - } - } - str += '\n' - } - - str - } - - /////////////////////////////////////////////////////////// - // Elementwise unary functions on distributed operands. - def dexp[K](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.exp, true) - - def dlog[K](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.log, true) - - def dabs[K](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.abs) - - def dsqrt[K](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.sqrt) - - def dsignum[K](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.signum) - - /////////////////////////////////////////////////////////// - // Misc. math utilities. - - /** - * Compute column wise means and variances -- distributed version. - * - * @param drmA Note: will pin input to cache if not yet pinned. - * @tparam K - * @return colMeans â colVariances - */ - def dcolMeanVars[K](drmA: DrmLike[K]): (Vector, Vector) = { - - import RLikeDrmOps._ - - val drmAcp = drmA.checkpoint() - - val mu = drmAcp colMeans - - // Compute variance using mean(x^2) - mean(x)^2 - val variances = (drmAcp ^ 2 colMeans) -=: mu * mu - - mu â variances - } - - /** - * Compute column wise means and standard deviations -- distributed version. - * @param drmA note: input will be pinned to cache if not yet pinned - * @return colMeans â colStdevs - */ - def dcolMeanStdevs[K](drmA: DrmLike[K]): (Vector, Vector) = { - val (mu, vars) = dcolMeanVars(drmA) - mu â (vars ::= math.sqrt _) - } - - /** - * Thin column-wise mean and covariance matrix computation. Same as [[dcolMeanCov()]] but suited for - * thin and tall inputs where covariance matrix can be reduced and finalized in driver memory. - * - * @param drmA note: will pin input to cache if not yet pinned. - * @return mean â covariance matrix (in core) - */ - def dcolMeanCovThin[K: ClassTag](drmA: DrmLike[K]):(Vector, Matrix) = { - - import RLikeDrmOps._ - - val drmAcp = drmA.checkpoint() - val mu = drmAcp colMeans - val mxCov = (drmAcp.t %*% drmAcp).collect /= drmAcp.nrow -= (mu cross mu) - mu â mxCov - } - - /** - * Compute COV(X) matrix and mean of row-wise data set. X is presented as row-wise input matrix A. - * - * This is a "wide" procedure, covariance matrix is returned as a DRM. - * - * @param drmA note: will pin input into cache if not yet pinned. - * @return mean â covariance DRM - */ - def dcolMeanCov[K: ClassTag](drmA: DrmLike[K]): (Vector, DrmLike[Int]) = { - - import RLikeDrmOps._ - - implicit val ctx = drmA.context - val drmAcp = drmA.checkpoint() - - val bcastMu = drmBroadcast(drmAcp colMeans) - - // We use multivaraite analogue COV(X)=E(XX')-mu*mu'. In our case E(XX') = (A'A)/A.nrow. - // Compute E(XX') - val drmSigma = (drmAcp.t %*% drmAcp / drmAcp.nrow) - - // Subtract mu*mu'. In this case we assume mu*mu' may still be big enough to be treated by - // driver alone, so we redistribute this operation as well. Hence it may look a bit cryptic. - .mapBlock() { case (keys, block) â - - // Pin mu as vector reference to memory. - val mu:Vector = bcastMu - - keys â (block := { (r, c, v) â v - mu(keys(r)) * mu(c) }) - } - - // return (mu, cov(X) ("bigSigma")). - (bcastMu: Vector) â drmSigma - } - - /** Distributed Squared distance matrix computation. */ - def dsqDist(drmX: DrmLike[Int]): DrmLike[Int] = { - - // This is a specific case of pairwise distances of X and Y. - - import RLikeDrmOps._ - - // Context needed - implicit val ctx = drmX.context - - // Pin to cache if hasn't been pinned yet - val drmXcp = drmX.checkpoint() - - // Compute column sum of squares - val s = drmXcp ^ 2 rowSums - - val sBcast = drmBroadcast(s) - - (drmXcp %*% drmXcp.t) - - // Apply second part of the formula as per in-core algorithm - .mapBlock() { case (keys, block) â - - // Slurp broadcast to memory - val s = sBcast: Vector - - // Update in-place - block := { (r, c, x) â s(keys(r)) + s(c) - 2 * x} - - keys â block - } - } - - - /** - * Compute fold-in distances (distributed version). Here, we use pretty much the same math as with - * squared distances. - * - * D_sq = s*1' + 1*t' - 2*X*Y' - * - * where s is row sums of hadamard product(X, X), and, similarly, - * s is row sums of Hadamard product(Y, Y). - * - * @param drmX m x d row-wise dataset. Pinned to cache if not yet pinned. - * @param drmY n x d row-wise dataset. Pinned to cache if not yet pinned. - * @return m x d pairwise squared distance matrix (between rows of X and Y) - */ - def dsqDist(drmX: DrmLike[Int], drmY: DrmLike[Int]): DrmLike[Int] = { - - import RLikeDrmOps._ - - implicit val ctx = drmX.context - - val drmXcp = drmX.checkpoint() - val drmYcp = drmY.checkpoint() - - val sBcast = drmBroadcast(drmXcp ^ 2 rowSums) - val tBcast = drmBroadcast(drmYcp ^ 2 rowSums) - - (drmX %*% drmY.t) - - // Apply the rest of the formula - .mapBlock() { case (keys, block) => - - // Cache broadcast representations in local task variable - val s = sBcast: Vector - val t = tBcast: Vector - - block := { (r, c, x) => s(keys(r)) + t(c) - 2 * x} - keys â block - } - } -} - -package object indexeddataset { - /** Load IndexedDataset from text delimited files */ - def indexedDatasetDFSRead(src: String, - schema: Schema = DefaultIndexedDatasetReadSchema, - existingRowIDs: Option[BiDictionary] = None) - (implicit ctx: DistributedContext): - IndexedDataset = ctx.indexedDatasetDFSRead(src, schema, existingRowIDs) - - def indexedDatasetDFSReadElements(src: String, - schema: Schema = DefaultIndexedDatasetReadSchema, - existingRowIDs: Option[BiDictionary] = None) - (implicit ctx: DistributedContext): - IndexedDataset = ctx.indexedDatasetDFSReadElements(src, schema, existingRowIDs) - -} - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/BiMap.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/BiMap.scala b/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/BiMap.scala deleted file mode 100644 index 6c0d432..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/BiMap.scala +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.math.indexeddataset - -import scala.collection.immutable.HashMap - -/** - * Immutable Bi-directional Map. - * @param m Map to use for forward reference - * @param i optional reverse map of value to key, will create one lazily if none is provided - * and is required to have no duplicate reverse mappings. - */ -class BiMap[K, V] ( - private val m: Map[K, V], - // if this is serialized we allow i to be discarded and recalculated when deserialized - @transient private var i: Option[BiMap[V, K]] = None - ) extends Serializable { - - // NOTE: make inverse's inverse point back to current BiMap - // if this is serialized we allow inverse to be discarded and recalculated when deserialized - @transient lazy val inverse: BiMap[V, K] = { - if( i == null.asInstanceOf[Option[BiMap[V, K]]] ) - i = None - i.getOrElse { - val rev = m.map(_.swap) - require((rev.size == m.size), "Failed to create reversed map. Cannot have duplicated values.") - new BiMap(rev, Some(this)) - } - } - - // forces inverse to be calculated in the constructor when deserialized - // not when first used - @transient val size_ = inverse.size - - def get(k: K): Option[V] = m.get(k) - - def getOrElse(k: K, default: => V): V = m.getOrElse(k, default) - - def contains(k: K): Boolean = m.contains(k) - - def apply(k: K): V = m.apply(k) - - /** - * Converts to a map. - * @return a map of type immutable.Map[K, V] - */ - def toMap: Map[K, V] = m - - /** - * Converts to a sequence. - * @return a sequence containing all elements of this map - */ - def toSeq: Seq[(K, V)] = m.toSeq - - def size: Int = m.size - - def take(n: Int) = BiMap(m.take(n)) - - override def toString = m.toString -} - -object BiMap { - - /** Extra constructor from a map */ - def apply[K, V](x: Map[K, V]): BiMap[K, V] = new BiMap(x) - -} - -/** BiDictionary is a specialized BiMap that has non-negative Ints as values for use as DRM keys */ -class BiDictionary ( - private val m: Map[String, Int], - @transient private val i: Option[BiMap[Int, String]] = None ) - extends BiMap[String, Int](m, i) { - - /** - * Create a new BiDictionary with the keys supplied and values ranging from 0 to size -1 - * @param keys a set of String - */ - def this(keys: Seq[String]) = { - this(HashMap(keys.view.zipWithIndex: _*)) - } - - def merge( - keys: Seq[String]): BiDictionary = { - - var newIDs = List[String]() - - for (key <- keys) { - if (!m.contains(key)) newIDs = key +: newIDs - } - if(newIDs.isEmpty) this else new BiDictionary(m ++ HashMap(newIDs.view.zip (Stream from size): _*)) - - } - -} - -/** BiDictionary is a specialized BiMap that has non-negative Ints as values for use as DRM keys. - * The companion object provides modification methods specific to maintaining contiguous Int values - * and unique String keys */ -object BiDictionary { - - /** - * Append new keys to an existing BiDictionary and return the result. The values will start - * at m.size and increase to create a continuous non-zero value set from 0 to size - 1 - * @param keys new keys to append, not checked for uniqueness so may be dangerous - * @param biDi merge keys to this BiDictionary and create new values buy incremeting from the highest Int value - * @return a BiDictionary with added mappings - */ - /*def append(keys: Seq[String], biDi: BiDictionary): BiDictionary = { - val hm = HashMap(keys.view.zip (Stream from biDi.size): _*) - new BiDictionary(biDi.m ++ hm) - }*/ - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/IndexedDataset.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/IndexedDataset.scala b/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/IndexedDataset.scala deleted file mode 100644 index eeca736..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/IndexedDataset.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.indexeddataset - -import org.apache.mahout.math.drm.{DistributedContext, CheckpointedDrm} - -/** - * Wrap an [[org.apache.mahout.math.drm.DrmLike]] with bidirectional ID mappings [[org.apache.mahout.math.indexeddataset.BiDictionary]] - * so a user specified labels/IDs can be stored and mapped to and from the Mahout Int ID used internal to Mahout - * core code. - * @todo Often no need for both or perhaps either dictionary, so save resources by allowing to be not created - * when not needed. - */ - -trait IndexedDataset { - val matrix: CheckpointedDrm[Int] - val rowIDs: BiDictionary - val columnIDs: BiDictionary - - /** - * Write a text delimited file(s) with the row and column IDs from dictionaries. - * @param dest write location, usually a directory - * @param schema params to control writing - * @param sc the [[org.apache.mahout.math.drm.DistributedContext]] used to do a distributed write - */ - def dfsWrite(dest: String, schema: Schema)(implicit sc: DistributedContext): Unit - - /** Factory method, creates the extending class and returns a new instance */ - def create(matrix: CheckpointedDrm[Int], rowIDs: BiDictionary, columnIDs: BiDictionary): - IndexedDataset - - /** - * Adds the equivalent of blank rows to the sparse CheckpointedDrm, which only changes the row cardinality value. - * No changes are made to the underlying drm. - * @param n number to use for new row cardinality, should be larger than current - * @return a new IndexedDataset or extending class with new cardinality - * @note should be done before any optimizer actions are performed on the matrix or you'll get unpredictable - * results. - */ - def newRowCardinality(n: Int): IndexedDataset = { - // n is validated in matrix - this.create(matrix.newRowCardinality(n), rowIDs, columnIDs) - } - -} - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/ReaderWriter.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/ReaderWriter.scala b/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/ReaderWriter.scala deleted file mode 100644 index 65c0d8f..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/ReaderWriter.scala +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.indexeddataset - -import org.apache.mahout.math.drm.DistributedContext -import org.apache.mahout.math.indexeddataset - -/** - * Reader trait is abstract in the sense that the elementReader and rowReader functions must be supplied by an - * extending trait, which also defines the type to be read. - * @tparam T type of object to read. - */ -trait Reader[T]{ - - val mc: DistributedContext - val readSchema: Schema - - /** - * Override in extending trait to supply T and perform a parallel read of collection elements - * @param mc a [[org.apache.mahout.math.drm.DistributedContext]] to read from - * @param readSchema map of parameters controlling formating and how the read is executed - * @param source list of comma delimited files to read from - * @param existingRowIDs [[indexeddataset.BiDictionary]] containing row IDs that have already - * been applied to this collection--used to synchronize row IDs between several - * collections - * @return a new collection of type T - */ - protected def elementReader( - mc: DistributedContext, - readSchema: Schema, - source: String, - existingRowIDs: Option[BiDictionary] = None): T - - /** - * Override in extending trait to supply T and perform a parallel read of collection rows - * @param mc a [[org.apache.mahout.math.drm.DistributedContext]] to read from - * @param readSchema map of parameters controlling formating and how the read is executed - * @param source list of comma delimited files to read from - * @param existingRowIDs [[indexeddataset.BiDictionary]] containing row IDs that have already - * been applied to this collection--used to synchronize row IDs between several - * collections - * @return a new collection of type T - */ - protected def rowReader( - mc: DistributedContext, - readSchema: Schema, - source: String, - existingRowIDs: Option[BiDictionary] = None): T - - /** - * Public method called to perform the element-wise read. Usually no need to override - * @param source comma delimited URIs to read from - * @param existingRowIDs a [[indexeddataset.BiDictionary]] containing previously used id mappings--used - * to synchronize all row ids is several collections - * @return a new collection of type T - */ - def readElementsFrom( - source: String, - existingRowIDs: Option[BiDictionary] = None): T = - elementReader(mc, readSchema, source, existingRowIDs) - - /** - * Public method called to perform the row-wise read. Usually no need to override. - * @param source comma delimited URIs to read from - * @param existingRowIDs a [[indexeddataset.BiDictionary]] containing previously used id mappings--used - * to synchronize all row ids is several collections - * @return a new collection of type T - */ - def readRowsFrom( - source: String, - existingRowIDs: Option[BiDictionary] = None): T = - rowReader(mc, readSchema, source, existingRowIDs) -} - -/** - * Writer trait is abstract in the sense that the writer method must be supplied by an extending trait, - * which also defines the type to be written. - * @tparam T type of object to write, usually a matrix type thing. - */ -trait Writer[T]{ - - val mc: DistributedContext - val sort: Boolean - val writeSchema: Schema - - /** - * Override to provide writer method - * @param mc context used to do distributed write - * @param writeSchema map with params to control format and execution of the write - * @param dest root directory to write to - * @param collection usually a matrix like collection to write - * @param sort flags whether to sort the rows by value descending - */ - protected def writer(mc: DistributedContext, writeSchema: Schema, dest: String, collection: T, sort: Boolean): Unit - - /** - * Call this method to perform the write, usually no need to override. - * @param collection what to write - * @param dest root directory to write to - */ - def writeTo(collection: T, dest: String) = writer(mc, writeSchema, dest, collection, sort) -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/Schema.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/Schema.scala b/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/Schema.scala deleted file mode 100644 index b7f120b..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/indexeddataset/Schema.scala +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.indexeddataset - -import scala.collection.mutable.HashMap - -/** - * Syntactic sugar for mutable.HashMap[String, Any] - * @param params list of mappings for instantiation {{{val mySchema = new Schema("one" -> 1, "two" -> "2", ...)}}} - */ -class Schema(params: Tuple2[String, Any]*) extends HashMap[String, Any] { - // note: this require a mutable HashMap, do we care? - this ++= params - - /** - * Constructor for copying an existing Schema - * @param schemaToClone return a copy of this Schema - */ - def this(schemaToClone: Schema){ - this() - this ++= schemaToClone - } -} - -// These can be used to keep the text in and out fairly standard to Mahout, where an application specific -// format is not required. These apply to formatting of [[org.apache.mahout.math.indexeddataset.IndexedDataset]] -// which can be used to create a Mahout DRM for DSL ops. - -/** - * Simple default Schema for typical text delimited element file input - * This tells the reader to input elements of the default (rowID<comma, tab, or space>columnID - * <comma, tab, or space>here may be other ignored text...) - */ -object DefaultIndexedDatasetElementReadSchema extends Schema( - "delim" -> "[,\t ]", //comma, tab or space - "filter" -> "", - "rowIDColumn" -> 0, - "columnIDPosition" -> 1, - "filterColumn" -> -1) - -/** - * Default Schema for text delimited [[org.apache.mahout.math.indexeddataset.IndexedDataset]] file output with - * one row per line. - * The default form: - * (rowID<tab>columnID1:score1<space>columnID2:score2...) - */ -object DefaultIndexedDatasetWriteSchema extends Schema( - "rowKeyDelim" -> "\t", - "columnIdStrengthDelim" -> ":", - "elementDelim" -> " ", - "omitScore" -> false) - -/** - * Default Schema for typical text delimited [[org.apache.mahout.math.indexeddataset.IndexedDataset]] file - * row-wise input. This tells the reader to input text lines of the form: - * (rowID<tab>columnID1:score1,columnID2:score2,...) - */ -object DefaultIndexedDatasetReadSchema extends Schema( - "rowKeyDelim" -> "\t", - "columnIdStrengthDelim" -> ":", - "elementDelim" -> " ", - "omitScore" -> false) - -/** - * Default Schema for reading a text delimited [[org.apache.mahout.math.indexeddataset.IndexedDataset]] file where - * the score of any element is ignored. - * This tells the reader to input DRM lines of the form - * (rowID<tab>columnID1:score1<space>columnID2:score2...) remember the score is ignored. - * Alternatively the format can be - * (rowID<tab>columnID1<space>columnID2 ...) where presence indicates a score of 1. This is the default - * output format for [[IndexedDatasetWriteBooleanSchema]] - */ -object IndexedDatasetReadBooleanSchema extends Schema( - "rowKeyDelim" -> "\t", - "columnIdStrengthDelim" -> ":", - "elementDelim" -> " ", - "omitScore" -> true) - -/** - * Default Schema for typical text delimited [[org.apache.mahout.math.indexeddataset.IndexedDataset]] file output - * where the score of a element is omitted. This tells the writer to output - * [[org.apache.mahout.math.indexeddataset.IndexedDataset]] row of the form - * (rowID<tab>columnID1<space>columnID2...) - */ -object IndexedDatasetWriteBooleanSchema extends Schema( - "rowKeyDelim" -> "\t", - "columnIdStrengthDelim" -> ":", - "elementDelim" -> " ", - "omitScore" -> true) - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MMul.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MMul.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MMul.scala deleted file mode 100644 index f9bda8a..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MMul.scala +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math._ -import org.apache.mahout.math.flavor.{BackEnum, TraversingStructureEnum} -import org.apache.mahout.math.function.Functions -import RLikeOps._ -import org.apache.mahout.logging._ -import org.apache.mahout.math.backend.incore.MMulSolver - -import scala.collection.JavaConversions._ - -object MMul extends MMulSolver { - - private final implicit val log = getLog(MMul.getClass) - - override def apply(a: Matrix, b: Matrix, r: Option[Matrix]): Matrix = { - - require(a.ncol == b.nrow, "Incompatible matrix sizes in matrix multiplication.") - - val (af, bf) = (a.getFlavor, b.getFlavor) - val backs = (af.getBacking, bf.getBacking) - val sd = (af.getStructure, densityAnalysis(a), bf.getStructure, densityAnalysis(b)) - - val alg: MMulAlg = backs match { - - // Both operands are jvm memory backs. - case (BackEnum.JVMMEM, BackEnum.JVMMEM) â - - sd match { - - // Multiplication cases by a diagonal matrix. - case (TraversingStructureEnum.VECTORBACKED, _, TraversingStructureEnum.COLWISE, _) - if a.isInstanceOf[DiagonalMatrix] â jvmDiagCW - case (TraversingStructureEnum.VECTORBACKED, _, TraversingStructureEnum.SPARSECOLWISE, _) - if a.isInstanceOf[DiagonalMatrix] â jvmDiagCW - case (TraversingStructureEnum.VECTORBACKED, _, TraversingStructureEnum.ROWWISE, _) - if a.isInstanceOf[DiagonalMatrix] â jvmDiagRW - case (TraversingStructureEnum.VECTORBACKED, _, TraversingStructureEnum.SPARSEROWWISE, _) - if a.isInstanceOf[DiagonalMatrix] â jvmDiagRW - - case (TraversingStructureEnum.COLWISE, _, TraversingStructureEnum.VECTORBACKED, _) - if b.isInstanceOf[DiagonalMatrix] â jvmCWDiag - case (TraversingStructureEnum.SPARSECOLWISE, _, TraversingStructureEnum.VECTORBACKED, _) - if b.isInstanceOf[DiagonalMatrix] â jvmCWDiag - case (TraversingStructureEnum.ROWWISE, _, TraversingStructureEnum.VECTORBACKED, _) - if b.isInstanceOf[DiagonalMatrix] â jvmRWDiag - case (TraversingStructureEnum.SPARSEROWWISE, _, TraversingStructureEnum.VECTORBACKED, _) - if b.isInstanceOf[DiagonalMatrix] â jvmRWDiag - - // Dense-dense cases - case (TraversingStructureEnum.ROWWISE, true, TraversingStructureEnum.COLWISE, true) if a eq b.t â jvmDRWAAt - case (TraversingStructureEnum.ROWWISE, true, TraversingStructureEnum.COLWISE, true) if a.t eq b â jvmDRWAAt - case (TraversingStructureEnum.ROWWISE, true, TraversingStructureEnum.COLWISE, true) â jvmRWCW - case (TraversingStructureEnum.ROWWISE, true, TraversingStructureEnum.ROWWISE, true) â jvmRWRW - case (TraversingStructureEnum.COLWISE, true, TraversingStructureEnum.COLWISE, true) â jvmCWCW - case (TraversingStructureEnum.COLWISE, true, TraversingStructureEnum.ROWWISE, true) if a eq b.t â jvmDCWAAt - case (TraversingStructureEnum.COLWISE, true, TraversingStructureEnum.ROWWISE, true) if a.t eq b â jvmDCWAAt - case (TraversingStructureEnum.COLWISE, true, TraversingStructureEnum.ROWWISE, true) â jvmCWRW - - // Sparse row matrix x sparse row matrix (array of vectors) - case (TraversingStructureEnum.ROWWISE, false, TraversingStructureEnum.ROWWISE, false) â jvmSparseRWRW - case (TraversingStructureEnum.ROWWISE, false, TraversingStructureEnum.COLWISE, false) â jvmSparseRWCW - case (TraversingStructureEnum.COLWISE, false, TraversingStructureEnum.ROWWISE, false) â jvmSparseCWRW - case (TraversingStructureEnum.COLWISE, false, TraversingStructureEnum.COLWISE, false) â jvmSparseCWCW - - // Sparse matrix x sparse matrix (hashtable of vectors) - case (TraversingStructureEnum.SPARSEROWWISE, false, TraversingStructureEnum.SPARSEROWWISE, false) â - jvmSparseRowRWRW - case (TraversingStructureEnum.SPARSEROWWISE, false, TraversingStructureEnum.SPARSECOLWISE, false) â - jvmSparseRowRWCW - case (TraversingStructureEnum.SPARSECOLWISE, false, TraversingStructureEnum.SPARSEROWWISE, false) â - jvmSparseRowCWRW - case (TraversingStructureEnum.SPARSECOLWISE, false, TraversingStructureEnum.SPARSECOLWISE, false) â - jvmSparseRowCWCW - - // Sparse matrix x non-like - case (TraversingStructureEnum.SPARSEROWWISE, false, TraversingStructureEnum.ROWWISE, _) â jvmSparseRowRWRW - case (TraversingStructureEnum.SPARSEROWWISE, false, TraversingStructureEnum.COLWISE, _) â jvmSparseRowRWCW - case (TraversingStructureEnum.SPARSECOLWISE, false, TraversingStructureEnum.ROWWISE, _) â jvmSparseRowCWRW - case (TraversingStructureEnum.SPARSECOLWISE, false, TraversingStructureEnum.COLWISE, _) â jvmSparseCWCW - case (TraversingStructureEnum.ROWWISE, _, TraversingStructureEnum.SPARSEROWWISE, false) â jvmSparseRWRW - case (TraversingStructureEnum.ROWWISE, _, TraversingStructureEnum.SPARSECOLWISE, false) â jvmSparseRWCW - case (TraversingStructureEnum.COLWISE, _, TraversingStructureEnum.SPARSEROWWISE, false) â jvmSparseCWRW - case (TraversingStructureEnum.COLWISE, _, TraversingStructureEnum.SPARSECOLWISE, false) â jvmSparseRowCWCW - - // Everything else including at least one sparse LHS or RHS argument - case (TraversingStructureEnum.ROWWISE, false, TraversingStructureEnum.ROWWISE, _) â jvmSparseRWRW - case (TraversingStructureEnum.ROWWISE, false, TraversingStructureEnum.COLWISE, _) â jvmSparseRWCW - case (TraversingStructureEnum.COLWISE, false, TraversingStructureEnum.ROWWISE, _) â jvmSparseCWRW - case (TraversingStructureEnum.COLWISE, false, TraversingStructureEnum.COLWISE, _) â jvmSparseCWCW2flips - - // Sparse methods are only effective if the first argument is sparse, so we need to do a swap. - case (_, _, _, false) â (a, b, r) â apply(b.t, a.t, r.map {_.t}).t - - // Default jvm-jvm case. - case _ â jvmRWCW - } - } - - alg(a, b, r) - } - - type MMulAlg = MMBinaryFunc - - @inline - private def jvmRWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix = { - - require(r.forall(mxR â mxR.nrow == a.nrow && mxR.ncol == b.ncol)) - val (m, n) = (a.nrow, b.ncol) - - val mxR = r.getOrElse(if (densityAnalysis(a)) a.like(m, n) else b.like(m, n)) - - for (row â 0 until mxR.nrow; col â 0 until mxR.ncol) { - // this vector-vector should be sort of optimized, right? - mxR(row, col) = a(row, ::) dot b(::, col) - } - mxR - } - - - @inline - private def jvmRWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix = { - - // A bit hackish: currently, this relies a bit on the fact that like produces RW(?) - val bclone = b.like(b.ncol, b.nrow).t - for (brow â b) bclone(brow.index(), ::) := brow - - require(bclone.getFlavor.getStructure == TraversingStructureEnum.COLWISE || bclone.getFlavor.getStructure == - TraversingStructureEnum.SPARSECOLWISE, "COL wise conversion assumption of RHS is wrong, do over this code.") - - jvmRWCW(a, bclone, r) - } - - private def jvmCWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix = { - jvmRWRW(b.t, a.t, r.map(_.t)).t - } - - private def jvmCWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix = { - // This is a primary contender with Outer Prod sum algo. - // Here, we force-reorient both matrices and run RWCW. - // A bit hackish: currently, this relies a bit on the fact that clone always produces RW(?) - val aclone = a.cloned - - require(aclone.getFlavor.getStructure == TraversingStructureEnum.ROWWISE || aclone.getFlavor.getStructure == - TraversingStructureEnum.SPARSEROWWISE, "Row wise conversion assumption of RHS is wrong, do over this code.") - - jvmRWRW(aclone, b, r) - } - - private def jvmSparseRWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix = { - val mxR = r.getOrElse(b.like(a.nrow, b.ncol)) - - // This is basically almost the algorithm from SparseMatrix.times - for (arow â a; ael â arow.nonZeroes) - mxR(arow.index(), ::).assign(b(ael.index, ::), Functions.plusMult(ael)) - - mxR - } - - private def jvmSparseRowRWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix = { - val mxR = r.getOrElse(b.like(a.nrow, b.ncol)) - for (arow â a.iterateNonEmpty(); ael â arow.vector.nonZeroes) - mxR(arow.index(), ::).assign(b(ael.index, ::), Functions.plusMult(ael)) - - mxR - } - - private def jvmSparseRowCWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None) = - jvmSparseRowRWRW(b.t, a.t, r.map(_.t)).t - - private def jvmSparseRowCWCW2flips(a: Matrix, b: Matrix, r: Option[Matrix] = None) = - jvmSparseRowRWRW(a cloned, b cloned, r) - - private def jvmSparseRowRWCW(a: Matrix, b: Matrix, r: Option[Matrix]) = - jvmSparseRowRWRW(a, b cloned, r) - - - private def jvmSparseRowCWRW(a: Matrix, b: Matrix, r: Option[Matrix]) = - jvmSparseRowRWRW(a cloned, b, r) - - private def jvmSparseRWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None) = - jvmSparseRWRW(a, b.cloned, r) - - private def jvmSparseCWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None) = - jvmSparseRWRW(a cloned, b, r) - - private def jvmSparseCWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None) = - jvmSparseRWRW(b.t, a.t, r.map(_.t)).t - - private def jvmSparseCWCW2flips(a: Matrix, b: Matrix, r: Option[Matrix] = None) = - jvmSparseRWRW(a cloned, b cloned, r) - - private def jvmDiagRW(diagm:Matrix, b:Matrix, r:Option[Matrix] = None):Matrix = { - val mxR = r.getOrElse(b.like(diagm.nrow, b.ncol)) - - for (del â diagm.diagv.nonZeroes()) - mxR(del.index, ::).assign(b(del.index, ::), Functions.plusMult(del)) - - mxR - } - - private def jvmDiagCW(diagm: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix = { - val mxR = r.getOrElse(b.like(diagm.nrow, b.ncol)) - for (bcol â b.t) mxR(::, bcol.index()) := bcol * diagm.diagv - mxR - } - - private def jvmCWDiag(a: Matrix, diagm: Matrix, r: Option[Matrix] = None) = - jvmDiagRW(diagm, a.t, r.map {_.t}).t - - private def jvmRWDiag(a: Matrix, diagm: Matrix, r: Option[Matrix] = None) = - jvmDiagCW(diagm, a.t, r.map {_.t}).t - - - /** Dense column-wise AA' */ - private def jvmDCWAAt(a:Matrix, b:Matrix, r:Option[Matrix] = None) = { - // a.t must be equiv. to b. Cloning must rewrite to row-wise. - jvmDRWAAt(a.cloned,null,r) - } - - /** Dense Row-wise AA' */ - private def jvmDRWAAt(a:Matrix, b:Matrix, r:Option[Matrix] = None) = { - // a.t must be equiv to b. - - debug("AAt computation detected.") - - // Check dimensions if result is supplied. - require(r.forall(mxR â mxR.nrow == a.nrow && mxR.ncol == a.nrow)) - - val mxR = r.getOrElse(a.like(a.nrow, a.nrow)) - - // This is symmetric computation. Compile upper triangular first. - for (row â 0 until mxR.nrow) { - // diagonal value - mxR(row, row) = a(row, ::).aggregate(Functions.PLUS, Functions.SQUARE) - - for ( col â row + 1 until mxR.ncol) { - // this vector-vector should be sort of optimized, right? - val v = a(row, ::) dot a(col, ::) - - mxR(row, col) = v - mxR(col,row) = v - } - } - - mxR - } - - private def jvmOuterProdSum(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix = { - - // This may be already laid out for outer product computation, which may be faster than reorienting - // both matrices? need to check. - val (m, n) = (a.nrow, b.ncol) - - // Prefer col-wise result iff a is dense and b is sparse. In all other cases default to row-wise. - val preferColWiseR = densityAnalysis(a) && !densityAnalysis(b) - - val mxR = r.getOrElse { - (densityAnalysis(a), preferColWiseR) match { - case (false, false) â b.like(m, n) - case (false, true) â b.like(n, m).t - case (true, false) â a.like(m, n) - case (true, true) â a.like(n, m).t - } - } - - // Loop outer products - if (preferColWiseR) { - // this means B is sparse and A is not, so we need to iterate over b values and update R columns with += - // one at a time. - for ((acol, brow) â a.t.zip(b); bel â brow.nonZeroes) mxR(::, bel.index()) += bel * acol - } else { - for ((acol, brow) â a.t.zip(b); ael â acol.nonZeroes()) mxR(ael.index(), ::) += ael * brow - } - - mxR - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MahoutCollections.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MahoutCollections.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MahoutCollections.scala deleted file mode 100644 index 8251b3a..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MahoutCollections.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.Vector - -class MahoutVectorInterfaces(v: Vector) { - /** Convert to Array[Double] */ - def toArray: Array[Double] = { - var a = new Array[Double](v.size) - for (i <- 0 until v.size){ - a(i) = v.get(i) - } - a - } - - /** Convert to Map[Int, Double] */ - def toMap: Map[Int, Double] = { - import collection.JavaConverters._ - val ms = collection.mutable.Map[Int, Double]() - for (e <- v.nonZeroes().asScala) { - ms += (e.index -> e.get) - } - ms.toMap - } - -} - -object MahoutCollections { - implicit def v2scalaish(v: Vector) = new MahoutVectorInterfaces(v) -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeMatrixOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeMatrixOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeMatrixOps.scala deleted file mode 100644 index 13d80ea..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeMatrixOps.scala +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.{Vector, Matrix} -import scala.collection.JavaConversions._ -import RLikeOps._ - -class MatlabLikeMatrixOps(_m: Matrix) extends MatrixOps(_m) { - - /** - * matrix-matrix multiplication - * @param that - * @return - */ - def *(that: Matrix) = m.times(that) - - /** - * matrix-vector multiplication - * @param that - * @return - */ - def *(that: Vector) = m.times(that) - - /** - * Hadamard product - * - * @param that - * @return - */ - - private[math] def *@(that: Matrix) = cloned *= that - - private[math] def *@(that: Double) = cloned *= that - - /** - * in-place Hadamard product. We probably don't want to use assign - * to optimize for sparse operations, in case of Hadamard product - * it really can be done - * @param that - */ - private[math] def *@=(that: Matrix) = { - m.zip(that).foreach(t => t._1.vector *= t._2.vector) - m - } - - private[math] def *@=(that: Double) = { - m.foreach(_.vector() *= that) - m - } -} - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeOps.scala deleted file mode 100644 index 8304af7..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeOps.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.{Vector, MatrixTimesOps, Matrix} - -/** - * Matlab-like operators. Declare <code>import MatlabLikeOps._</code> to enable. - * - * (This option is mutually exclusive to other translations such as RLikeOps). - */ -object MatlabLikeOps { - - implicit def v2vOps(v: Vector) = new MatlabLikeVectorOps(v) - - implicit def times2timesOps(m: MatrixTimesOps) = new MatlabLikeTimesOps(m) - - implicit def m2mOps(m: Matrix) = new MatlabLikeMatrixOps(m) - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeTimesOps.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeTimesOps.scala b/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeTimesOps.scala deleted file mode 100644 index 9af179a..0000000 --- a/math-scala/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeTimesOps.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.apache.mahout.math.{Matrix, MatrixTimesOps} - -class MatlabLikeTimesOps(m: MatrixTimesOps) { - - def :*(that: Matrix) = m.timesRight(that) - - def *:(that: Matrix) = m.timesLeft(that) - -}
