http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionTestsSuite.scala ---------------------------------------------------------------------- diff --git a/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionTestsSuite.scala b/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionTestsSuite.scala deleted file mode 100644 index 8ddab41..0000000 --- a/flink/src/test/scala/org/apache/mahout/flinkbindings/standard/RegressionTestsSuite.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.flinkbindings.standard - -import org.apache.mahout.flinkbindings.DistributedFlinkSuite -import org.apache.mahout.math.algorithms.RegressionTestsSuiteBase -import org.scalatest.FunSuite - -class RegressionTestsSuite extends FunSuite - with DistributedFlinkSuite with RegressionTestsSuiteBase -
http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/README.md ---------------------------------------------------------------------- diff --git a/h2o/README.md b/h2o/README.md deleted file mode 100644 index 0776354..0000000 --- a/h2o/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# Introduction - -This document demonstrates the integration between Mahout (http://mahout.apache.org) and H2O (http://www.h2o.ai). The integration provides a H2O backend to the Mahout algebra DSL (similar to the Spark backend.) - -## Setup - -The integration depends on h2o-core maven artifact. This can either be fetched automatically through sonatype, or can be installed locally from source (run 'gradle install -x test' in http://github.com/0xdata/h2o-dev) - -## Test - -The integration with H2O can be used in either a local mode (single node) or a clustered mode. - -### Simple (single node/local) test - -Testing in local mode is pretty straight forward. Just run 'mvn test' as shown below. - - sh:~/mahout$ cd h2o - sh:~/mahout/h2o$ mvn test - ... - ... - All tests passed. - ... - sh:~/mahout/h2o$ - -### Distributed test - -H2O is fundamentally a peer-to-peer system. H2O nodes join together to form a cloud on which high performance distributed math can be executed. Each node joins a cloud of a given name. Multiple clouds can exist on the same network at the same time as long as their names are different. Multiple nodes can exist on the same server as well (even belonging to the same cloud.) - -The Mahout H2O integration is fit into this model by having N-1 "worker" nodes and one driver node, all belonging to the same cloud name. The default cloud name used for the integration is "mah2out". Clouds have to be spun up per task/job. - -**WARNING**: Some Linux systems have default firewall rules which might block traffic required for the following tests. In order to successfully run the tests you might need to temporarily turn off firewall rules with `sh# iptables -F` - -First bring up worker nodes: - - host-1:~/mahout$ ./bin/mahout h2o-node - ... - .. INFO: Cloud of size 1 formed [/W.X.Y.Z:54321] - -Similarly, - - host-2:~/mahout$ ./bin/mahout h2o-node - ... - .. INFO: Cloud of size 2 formed [/A.B.C.D:54322] - -... and so on. For the purpose of testing multiple (even all) instances can be run on the same system too. - -The nodes discover each other over a multicast channel and establish consensus with Paxos. Next, start the driver just like running in local mode. - - host-N:~/mahout/h2o$ mvn test - ... - .. INFO: Cloud of size 3 formed [/E.F.G.H:54323] - ... - All tests passed. - ... - host-N:~/mahout/h2o$ - -The workers have to be restarted when when the driver node terminates (automating this is a future task.) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/pom.xml ---------------------------------------------------------------------- diff --git a/h2o/pom.xml b/h2o/pom.xml deleted file mode 100644 index 22aa1a3..0000000 --- a/h2o/pom.xml +++ /dev/null @@ -1,243 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <parent> - <groupId>org.apache.mahout</groupId> - <artifactId>mahout</artifactId> - <version>0.13.1-SNAPSHOT</version> - <relativePath>../pom.xml</relativePath> - </parent> - - <artifactId>mahout-h2o_${scala.compat.version}</artifactId> - <name>Mahout H2O backend</name> - <description> - H2O Backend for Mahout DSL - </description> - - <packaging>jar</packaging> - - - - <build> - - <plugins> - <!-- copy jars to top directory, which is MAHOUT_HOME --> - <plugin> - <artifactId>maven-antrun-plugin</artifactId> - <version>1.4</version> - <executions> - <execution> - <id>copy</id> - <phase>package</phase> - <configuration> - <tasks> - <copy file="target/mahout-h2o_${scala.compat.version}-${version}.jar" tofile="../mahout-h2o_${scala.compat.version}-${version}.jar" /> - </tasks> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-assembly-plugin</artifactId> - <executions> - <execution> - <id>dependency-reduced</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - <configuration> - <descriptors> - <descriptor>src/main/assembly/dependency-reduced.xml</descriptor> - </descriptors> - </configuration> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-javadoc-plugin</artifactId> - </plugin> - - <plugin> - <artifactId>maven-source-plugin</artifactId> - </plugin> - - <plugin> - <groupId>net.alchim31.maven</groupId> - <artifactId>scala-maven-plugin</artifactId> - <executions> - <execution> - <id>add-scala-sources</id> - <phase>initialize</phase> - <goals> - <goal>add-source</goal> - </goals> - </execution> - <execution> - <id>scala-compile</id> - <phase>process-resources</phase> - <goals> - <goal>compile</goal> - </goals> - </execution> - <execution> - <id>scala-test-compile</id> - <phase>process-test-resources</phase> - <goals> - <goal>testCompile</goal> - </goals> - </execution> - </executions> - </plugin> - - <!--this is what scalatest recommends to do to enable scala tests --> - - <!-- disable surefire --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-surefire-plugin</artifactId> - <configuration> - <skipTests>true</skipTests> - </configuration> - </plugin> - <!-- enable scalatest --> - <plugin> - <groupId>org.scalatest</groupId> - <artifactId>scalatest-maven-plugin</artifactId> - <executions> - <execution> - <id>test</id> - <goals> - <goal>test</goal> - </goals> - </execution> - </executions> - </plugin> - <!-- remove jars from top directory on clean --> - <plugin> - <artifactId>maven-clean-plugin</artifactId> - <version>3.0.0</version> - <configuration> - <filesets> - <fileset> - <directory>../</directory> - <includes> - <include>mahout-h2o*.jar</include> - </includes> - <followSymlinks>false</followSymlinks> - </fileset> - </filesets> - </configuration> - </plugin> - </plugins> - </build> - - <dependencies> - <dependency> - <groupId>org.scala-lang</groupId> - <artifactId>scala-library</artifactId> - <version>${scala.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.mahout</groupId> - <artifactId>mahout-math-scala_${scala.compat.version}</artifactId> - <version>${project.version}</version> - </dependency> - - <!-- for MatrixWritable and VectorWritable --> - <dependency> - <groupId>org.apache.mahout</groupId> - <artifactId>mahout-hdfs</artifactId> - <version>${project.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.mahout</groupId> - <artifactId>mahout-math-scala_${scala.compat.version}</artifactId> - <classifier>tests</classifier> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.apache.mahout</groupId> - <artifactId>mahout-math</artifactId> - <version>${project.version}</version> - </dependency> - - <!-- 3rd-party --> - - <!-- H2O --> - - <dependency> - <groupId>ai.h2o</groupId> - <artifactId>h2o-core</artifactId> - <version>${h2o.version}</version> - <exclusions> - <exclusion> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - </exclusion> - </exclusions> - </dependency> - - <!-- scala stuff --> - <dependency> - <groupId>org.scalatest</groupId> - <artifactId>scalatest_${scala.compat.version}</artifactId> - </dependency> - - </dependencies> - - <profiles> - <profile> - <id>mahout-release</id> - <build> - <plugins> - <plugin> - <groupId>net.alchim31.maven</groupId> - <artifactId>scala-maven-plugin</artifactId> - <executions> - <execution> - <id>generate-scaladoc</id> - <goals> - <goal>doc</goal> - </goals> - </execution> - <execution> - <id>attach-scaladoc-jar</id> - <goals> - <goal>doc-jar</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> - </profile> - </profiles> -</project> http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/assembly/dependency-reduced.xml ---------------------------------------------------------------------- diff --git a/h2o/src/main/assembly/dependency-reduced.xml b/h2o/src/main/assembly/dependency-reduced.xml deleted file mode 100644 index 0636f1d..0000000 --- a/h2o/src/main/assembly/dependency-reduced.xml +++ /dev/null @@ -1,44 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<assembly - xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 - http://maven.apache.org/xsd/assembly-1.1.0.xsd"> - <id>dependency-reduced</id> - <formats> - <format>jar</format> - </formats> - <includeBaseDirectory>false</includeBaseDirectory> - <dependencySets> - <dependencySet> - <unpack>true</unpack> - <unpackOptions> - <!-- MAHOUT-1126 --> - <excludes> - <exclude>META-INF/LICENSE</exclude> - </excludes> - </unpackOptions> - <scope>runtime</scope> - <outputDirectory>/</outputDirectory> - <useTransitiveFiltering>true</useTransitiveFiltering> - <includes> - <include>ai.h2o:h2o-core</include> - <include>org.scala-lang:scala-library</include> - </includes> - </dependencySet> - </dependencySets> -</assembly> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/H2OBlockMatrix.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OBlockMatrix.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OBlockMatrix.java deleted file mode 100644 index 378f7b6..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OBlockMatrix.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings; - -import org.apache.mahout.math.Matrix; -import org.apache.mahout.math.Vector; -import org.apache.mahout.math.AbstractMatrix; -import org.apache.mahout.math.DenseMatrix; -import org.apache.mahout.math.SparseMatrix; -import org.apache.mahout.math.flavor.MatrixFlavor; - -import water.fvec.Chunk; - -/** - * A Matrix implementation to represent a vertical Block of DRM. - * - * Creation of the matrix is an O(1) operation with negligible - * overhead, and will remain so as long as the matrix is only - * read from (no modifications). - * - * On the first modification, create a copy on write Matrix and - * all further operations happen on this cow matrix. - * - * The benefit is, mapBlock() closures which never modify the - * input matrix save on the copy overhead. - */ -public class H2OBlockMatrix extends AbstractMatrix { - /** Backing chunks which store the original matrix data */ - private Chunk chks[]; - /** Copy on write matrix created on demand when original matrix is modified */ - private Matrix cow; - - /** Class constructor. */ - public H2OBlockMatrix(Chunk chks[]) { - super(chks[0].len(), chks.length); - this.chks = chks; - } - - /** - * Internal method to create the copy on write matrix. - * - * Once created, all further operations are performed on the CoW matrix - */ - private void cow() { - if (cow != null) { - return; - } - - if (chks[0].isSparse()) { - cow = new SparseMatrix(chks[0].len(), chks.length); - } else { - cow = new DenseMatrix(chks[0].len(), chks.length); - } - - for (int c = 0; c < chks.length; c++) { - for (int r = 0; r < chks[0].len(); r++) { - cow.setQuick(r, c, chks[c].atd(r)); - } - } - } - - @Override - public void setQuick(int row, int col, double val) { - cow(); - cow.setQuick(row, col, val); - } - - @Override - public Matrix like(int nrow, int ncol) { - if (chks[0].isSparse()) { - return new SparseMatrix(nrow, ncol); - } else { - return new DenseMatrix(nrow, ncol); - } - } - - @Override - public Matrix like() { - if (chks[0].isSparse()) { - return new SparseMatrix(rowSize(), columnSize()); - } else { - return new DenseMatrix(rowSize(), columnSize()); - } - } - - @Override - public double getQuick(int row, int col) { - if (cow != null) { - return cow.getQuick(row, col); - } else { - return chks[col].atd(row); - } - } - - @Override - public Matrix assignRow(int row, Vector v) { - cow(); - cow.assignRow(row, v); - return cow; - } - - @Override - public Matrix assignColumn(int col, Vector v) { - cow(); - cow.assignColumn(col, v); - return cow; - } - - @Override - public MatrixFlavor getFlavor() { - if (cow != null) { - return cow.getFlavor(); - } else if (chks[0].isSparse()) { - return MatrixFlavor.SPARSELIKE; - } else { - return MatrixFlavor.DENSELIKE; - } - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/H2OContext.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OContext.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OContext.java deleted file mode 100644 index 96a2f8f..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OContext.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings; - -import water.H2O; - -/** - * Context to an H2O Cloud. - */ -public class H2OContext { - /** - * Class constructor. - * - * @param masterURL The cloud name (name of cluster) to which all the H2O - * worker nodes "join into". This is not a hostname or IP - * address of a server, but a string which all cluster - * members agree on. - */ - public H2OContext(String masterURL) { - H2O.main(new String[]{"-md5skip", "-name", masterURL}); - H2O.joinOthers(); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java deleted file mode 100644 index c0f1ee7..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.mahout.h2obindings.drm.H2ODrm; -import org.apache.mahout.math.DenseVector; -import org.apache.mahout.math.SequentialAccessSparseVector; -import org.apache.mahout.math.Vector; -import org.apache.mahout.math.VectorWritable; -import water.Futures; -import water.fvec.Frame; -import water.fvec.Vec; -import water.parser.ValueString; -import water.util.FrameUtils; - -import java.io.File; -import java.io.IOException; -import java.net.URI; - -/** - * SequenceFile I/O class (on HDFS) - */ -public class H2OHdfs { - /** - * Predicate to check if a given filename is a SequenceFile. - * - * Inspect the first three bytes to determine the format of the file. - * - * @param filename Name of the file to check. - * @return True if file is of SequenceFile format. - */ - public static boolean isSeqfile(String filename) { - try { - Configuration conf = new Configuration(); - Path path = new Path(filename); - FileSystem fs = FileSystem.get(URI.create(filename), conf); - FSDataInputStream fin = fs.open(path); - byte seq[] = new byte[3]; - - fin.read(seq); - fin.close(); - - return seq[0] == 'S' && seq[1] == 'E' && seq[2] == 'Q'; - } catch (IOException e) { - return false; - } - } - - /** - * Create DRM from SequenceFile. - * - * Create a Mahout DRM backed on H2O from the specified SequenceFile. - * - * @param filename Name of the sequence file. - * @param parMin Minimum number of data partitions in the DRM. - * @return DRM object created. - */ - public static H2ODrm drmFromFile(String filename, int parMin) { - try { - if (isSeqfile(filename)) { - return drmFromSeqfile(filename, parMin); - } else { - return new H2ODrm(FrameUtils.parseFrame(null,new File(filename))); - } - } catch (IOException e) { - return null; - } - } - - /** - * Internal method called from <code>drmFromFile</code> if format verified. - */ - public static H2ODrm drmFromSeqfile(String filename, int parMin) { - long rows = 0; - int cols = 0; - Frame frame = null; - Vec labels = null; - - SequenceFile.Reader reader = null; - try { - Configuration conf = new Configuration(); - Path path = new Path(filename); - FileSystem fs = FileSystem.get(URI.create(filename), conf); - Vec.Writer writers[]; - Vec.Writer labelwriter = null; - boolean isIntKey = false, isLongKey = false, isStringKey = false; - - reader = new SequenceFile.Reader(fs, path, conf); - - if (reader.getValueClass() != VectorWritable.class) { - System.out.println("ValueClass in file " + filename + - "must be VectorWritable, but found " + - reader.getValueClassName()); - return null; - } - - Writable key = (Writable) - ReflectionUtils.newInstance(reader.getKeyClass(), conf); - VectorWritable value = (VectorWritable) - ReflectionUtils.newInstance(reader.getValueClass(), conf); - - long start = reader.getPosition(); - - if (reader.getKeyClass() == Text.class) { - isStringKey = true; - } else if (reader.getKeyClass() == LongWritable.class) { - isLongKey = true; - } else { - isIntKey = true; - } - - while (reader.next(key, value)) { - if (cols == 0) { - Vector v = value.get(); - cols = Math.max(v.size(), cols); - } - if (isLongKey) { - rows = Math.max(((LongWritable)(key)).get()+1, rows); - } - if (isIntKey) { - rows = Math.max(((IntWritable)(key)).get()+1, rows); - } - if (isStringKey) { - rows++; - } - } - reader.seek(start); - - frame = H2OHelper.emptyFrame(rows, cols, parMin, -1); - writers = new Vec.Writer[cols]; - for (int i = 0; i < writers.length; i++) { - writers[i] = frame.vecs()[i].open(); - } - - if (reader.getKeyClass() == Text.class) { - labels = H2OHelper.makeEmptyStrVec(frame.anyVec()); - labelwriter = labels.open(); - } - - long r = 0; - while (reader.next(key, value)) { - Vector v = value.get(); - if (isLongKey) { - r = ((LongWritable)(key)).get(); - } - if (isIntKey) { - r = ((IntWritable)(key)).get(); - } - for (int c = 0; c < v.size(); c++) { - writers[c].set(r, v.getQuick(c)); - } - if (labels != null) { - labelwriter.set(r, (key).toString()); - } - if (isStringKey) { - r++; - } - } - - Futures fus = new Futures(); - for (Vec.Writer w : writers) { - w.close(fus); - } - if (labelwriter != null) { - labelwriter.close(fus); - } - fus.blockForPending(); - } catch (java.io.IOException e) { - return null; - } finally { - IOUtils.closeStream(reader); - } - return new H2ODrm(frame, labels); - } - - /** - * Create SequenceFile on HDFS from DRM object. - * - * @param filename Filename to create and store DRM data in. - * @param drm DRM object storing Matrix data in memory. - */ - public static void drmToFile(String filename, H2ODrm drm) throws java.io.IOException { - Frame frame = drm.frame; - Vec labels = drm.keys; - Configuration conf = new Configuration(); - Path path = new Path(filename); - FileSystem fs = FileSystem.get(URI.create(filename), conf); - SequenceFile.Writer writer; - boolean isSparse = H2OHelper.isSparse(frame); - ValueString vstr = new ValueString(); - - if (labels != null) { - writer = SequenceFile.createWriter(fs, conf, path, Text.class, VectorWritable.class); - } else { - writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, VectorWritable.class); - } - - for (long r = 0; r < frame.anyVec().length(); r++) { - Vector v; - if (isSparse) { - v = new SequentialAccessSparseVector(frame.numCols()); - } else { - v = new DenseVector(frame.numCols()); - } - - for (int c = 0; c < frame.numCols(); c++) { - v.setQuick(c, frame.vecs()[c].at(r)); - } - - if (labels != null) { - writer.append(new Text(labels.atStr(vstr, r).toString()), new VectorWritable(v)); - } else { - writer.append(new IntWritable((int)r), new VectorWritable(v)); - } - } - - writer.close(); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java deleted file mode 100644 index c9d91f9..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java +++ /dev/null @@ -1,472 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings; - -import org.apache.mahout.math.Matrix; -import org.apache.mahout.math.Vector; -import org.apache.mahout.math.DenseMatrix; -import org.apache.mahout.math.SparseMatrix; -import org.apache.mahout.math.DenseVector; - -import water.MRTask; -import water.Futures; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.parser.ValueString; -import water.util.ArrayUtils; - -import java.util.Map; -import java.util.HashMap; -import java.io.Serializable; - -import org.apache.mahout.h2obindings.drm.H2ODrm; -import org.apache.mahout.h2obindings.drm.H2OBCast; - -// for makeEmptyStrVec -import water.Key; -import water.DKV; -import water.fvec.CStrChunk; - -import scala.Function1; -import scala.Function2; - -/** - * Collection of helper methods for H2O backend. - */ -public class H2OHelper { - /** - * Predicate to check if data is sparse in Frame. - * - * If the number of missing elements is 32x times the number of present - * elements, consider it as sparse. - * - * @param frame Frame storing matrix data. - * @return True if data is sparse in Frame. - */ - public static boolean isSparse(Frame frame) { - long rows = frame.numRows(); - long cols = frame.numCols(); - - /** - * MRTask to aggregate precalculated per-chunk sparse lengths - */ - class MRTaskNZ extends MRTask<MRTaskNZ> { - long sparselen; - @Override - public void map(Chunk chks[]) { - for (Chunk chk : chks) { - sparselen += chk.sparseLen(); - } - } - @Override - public void reduce(MRTaskNZ other) { - sparselen += other.sparselen; - } - } - - long sparselen = new MRTaskNZ().doAll(frame).sparselen; - - return (((rows * cols) / (sparselen + 1)) > 32); - } - - /** - * Create a Mahout Matrix from a DRM. - * - * Create either Sparse or Dense Matrix depending on number of missing - * elements in DRM. - * - * @param drm DRM object to create Matrix from. - * @return created Matrix. - */ - public static Matrix matrixFromDrm(H2ODrm drm) { - Frame frame = drm.frame; - Vec labels = drm.keys; - Matrix m; - - if (isSparse(frame)) { - m = new SparseMatrix((int)frame.numRows(), frame.numCols()); - } else { - m = new DenseMatrix((int)frame.numRows(), frame.numCols()); - } - - int c = 0; - // Fill matrix, column at a time. - for (Vec v : frame.vecs()) { - for (int r = 0; r < frame.numRows(); r++) { - double d; - if (!v.isNA(r) && ((d = v.at(r)) != 0.0)) { - m.setQuick(r, c, d); - } - } - c++; - } - - // If string keyed, set the stings as rowlabels. - if (labels != null) { - Map<String,Integer> map = new HashMap<>(); - ValueString vstr = new ValueString(); - for (long i = 0; i < labels.length(); i++) { - map.put(labels.atStr(vstr, i).toString(), (int)i); - } - m.setRowLabelBindings(map); - } - return m; - } - - /** - * Calculate Means of elements in a column, and return as a Vector. - * - * H2O precalculates means in a Vec, and a Vec corresponds to a column. - * - * @param frame Frame backing the H2O DRM. - * @return Vector of pre-calculated means. - */ - public static Vector colMeans(Frame frame) { - double means[] = new double[frame.numCols()]; - for (int i = 0; i < frame.numCols(); i++) { - means[i] = frame.vecs()[i].mean(); - } - return new DenseVector(means); - } - - /** - * Calculate Sums of elements in a column, and return as a Vector. - * - * Run an MRTask Job to add up sums. - * WARNING: Vulnerable to overflow. No way around it. - * - * @param frame Frame backing the H2O DRM. - * @return Vector of calculated sums. - */ - public static Vector colSums(Frame frame) { - /** - * MRTask to calculate sums of elements in all columns. - */ - class MRTaskSum extends MRTask<MRTaskSum> { - public double sums[]; - @Override - public void map(Chunk chks[]) { - sums = new double[chks.length]; - - for (int c = 0; c < chks.length; c++) { - for (int r = 0; r < chks[c].len(); r++) { - sums[c] += chks[c].atd(r); - } - } - } - @Override - public void reduce(MRTaskSum other) { - ArrayUtils.add(sums, other.sums); - } - } - return new DenseVector(new MRTaskSum().doAll(frame).sums); - } - - /** - * Calculate Sum of squares of all elements in the DRM. - * - * Run an MRTask Job to add up sums of squares. - * WARNING: Vulnerable to overflow. No way around it. - * - * @param frame Frame backing the H2O DRM. - * @return Sum of squares of all elements in the DRM. - */ - public static double sumSqr(Frame frame) { - /** - * MRTask to calculate sums of squares of all elements. - */ - class MRTaskSumSqr extends MRTask<MRTaskSumSqr> { - public double sumSqr; - @Override - public void map(Chunk chks[]) { - for (Chunk chk : chks) { - for (int r = 0; r < chk.len(); r++) { - sumSqr += (chk.atd(r) * chk.atd(r)); - } - } - } - @Override - public void reduce(MRTaskSumSqr other) { - sumSqr += other.sumSqr; - } - } - return new MRTaskSumSqr().doAll(frame).sumSqr; - } - - /** - * Count non-zero elements in all columns, and return as a Vector. - * - * Run an MRTask Job to count non-zero elements per column. - * - * @param frame Frame backing the H2O DRM. - * @return Vector of counted non-zero elements. - */ - public static Vector nonZeroCnt(Frame frame) { - /** - * MRTask to count all non-zero elements. - */ - class MRTaskNonZero extends MRTask<MRTaskNonZero> { - public double sums[]; - @Override - public void map(Chunk chks[]) { - sums = new double[chks.length]; - - for (int c = 0; c < chks.length; c++) { - for (int r = 0; r < chks[c].len(); r++) { - if ((long)chks[c].atd(r) != 0) { - sums[c] ++; - } - } - } - } - @Override - public void reduce(MRTaskNonZero other) { - ArrayUtils.add(sums, other.sums); - } - } - return new DenseVector(new MRTaskNonZero().doAll(frame).sums); - } - - /** Convert String->Integer map to Integer->String map */ - private static Map<Integer,String> reverseMap(Map<String, Integer> map) { - if (map == null) { - return null; - } - - Map<Integer,String> rmap = new HashMap<>(); - - for(Map.Entry<String,Integer> entry : map.entrySet()) { - rmap.put(entry.getValue(),entry.getKey()); - } - - return rmap; - } - - /** - * Calculate optimum chunk size for given parameters. - * - * Chunk size is the number of elements stored per partition per column. - * - * @param nrow Number of rows in the DRM. - * @param minHint Minimum number of partitions to create, if passed value is not -1. - * @param exactHint Exact number of partitions to create, if passed value is not -1. - * @return Calculated optimum chunk size. - */ - private static int chunkSize(long nrow, int minHint, int exactHint) { - int chunkSz; - int partsHint = Math.max(minHint, exactHint); - - if (partsHint < 1) { - /* XXX: calculate based on cloud size and # of cpu */ - partsHint = 4; - } - - chunkSz = (int)(((nrow - 1) / partsHint) + 1); - if (exactHint > 0) { - return chunkSz; - } - - if (chunkSz > 1e6) { - chunkSz = (int)1e6; - } - - if (minHint > 0) { - return chunkSz; - } - - if (chunkSz < 1e3) { - chunkSz = (int)1e3; - } - - return chunkSz; - } - - /** - * Ingest a Mahout Matrix into an H2O DRM. - * - * Frame is the backing data structure behind CheckpointedDrm. - * - * @param m Mahout Matrix to ingest data from. - * @param minHint Hint for minimum number of partitions in created DRM. - * @param exactHint Hint for exact number of partitions in created DRM. - * @return Created H2O backed DRM. - */ - public static H2ODrm drmFromMatrix(Matrix m, int minHint, int exactHint) { - // First create an empty (0-filled) frame of the required dimensions - Frame frame = emptyFrame(m.rowSize(), m.columnSize(), minHint, exactHint); - Vec labels = null; - Vec.Writer writers[] = new Vec.Writer[m.columnSize()]; - Futures closer = new Futures(); - - // "open" vectors for writing efficiently in bulk - for (int i = 0; i < writers.length; i++) { - writers[i] = frame.vecs()[i].open(); - } - - for (int r = 0; r < m.rowSize(); r++) { - for (int c = 0; c < m.columnSize(); c++) { - writers[c].set(r, m.getQuick(r, c)); - } - } - - for (int c = 0; c < m.columnSize(); c++) { - writers[c].close(closer); - } - // If string labeled matrix, create aux Vec - Map<String,Integer> map = m.getRowLabelBindings(); - if (map != null) { - // label vector must be similarly partitioned like the Frame - labels = makeEmptyStrVec(frame.anyVec()); - Vec.Writer writer = labels.open(); - Map<Integer,String> rmap = reverseMap(map); - for (int r = 0; r < m.rowSize(); r++) { - writer.set(r, rmap.get(r)); - } - - writer.close(closer); - } - - closer.blockForPending(); - - return new H2ODrm(frame, labels); - } - - /** - * Create an empty (zero-filled) H2O Frame efficiently. - * - * Create a zero filled Frame with specified cardinality. - * Do not actually fill zeroes in each cell, create pre-compressed chunks. - * Time taken per column asymptotically at O(nChunks), not O(nrow). - * - * @param nrow Number of rows in the Frame. - * @param ncol Number of columns in the Frame. - * @param minHint Hint for minimum number of chunks per column in created Frame. - * @param exactHint Hint for exact number of chunks per column in created Frame. - * @return Created Frame. - */ - public static Frame emptyFrame(long nrow, int ncol, int minHint, int exactHint) { - Vec.VectorGroup vg = new Vec.VectorGroup(); - - return emptyFrame(nrow, ncol, minHint, exactHint, vg); - } - - /** - * Create an empty (zero-filled) H2O Frame efficiently. - * - * Create a zero filled Frame with specified cardinality. - * Do not actually fill zeroes in each cell, create pre-compressed chunks. - * Time taken per column asymptotically at O(nChunks), not O(nrow). - * - * @param nrow Number of rows in the Frame. - * @param ncol Number of columns in the Frame. - * @param minHint Hint for minimum number of chunks per column in created Frame. - * @param exactHint Hint for exact number of chunks per column in created Frame. - * @param vg Shared VectorGroup so that all columns are similarly partitioned. - * @return Created Frame. - */ - public static Frame emptyFrame(long nrow, int ncol, int minHint, int exactHint, Vec.VectorGroup vg) { - int chunkSz = chunkSize(nrow, minHint, exactHint); - int nchunks = (int)((nrow - 1) / chunkSz) + 1; // Final number of Chunks per Vec - long espc[] = new long[nchunks + 1]; - - for (int i = 0; i < nchunks; i++) { - espc[i] = i * chunkSz; - } - espc[nchunks] = nrow; - // Create a vector template for new vectors - Vec vtemplate = new Vec(vg.addVec(), espc); - // Make ncol-numeric vectors - Vec[] vecs = vtemplate.makeCons(ncol, 0, null, null); - - return new Frame(vecs); - } - - - /** - * The following two methods: vecChunkLen and makeEmptyStrVec - * are h2o-0.1.25 specific. - */ - public static Vec makeEmptyStrVec(final Vec template) { - final int nChunks = template.nChunks(); - Key<Vec> key = template.group().addVec(); - final Vec emptystr = new Vec(key, template._espc, null, Vec.T_NUM); - - new MRTask() { - @Override protected void setupLocal() { - for (int i = 0; i < nChunks; i++) { - Key k = emptystr.chunkKey(i); - int chklen = vecChunkLen(template, i); - int stridx[] = new int[chklen]; - byte b[] = new byte[1]; b[0] = 0; - for (int j = 0; j < chklen; j++) stridx[j] = -1; - if (k.home()) DKV.put(k, new CStrChunk(1, b, chklen, stridx), _fs); - } - if (emptystr._key.home()) DKV.put(emptystr._key, emptystr, _fs); - } - }.doAllNodes(); - return emptystr; - } - - public static int vecChunkLen(Vec template, int chunk) { - return (int) (template._espc[chunk + 1] - template._espc[chunk]); - } - - /** - * Create an empty (zero-filled) H2O DRM. - * - * Create a zero filled DRM with specified cardinality. - * Use the efficient emptyFrame() method internally. - * - * @param nrow Number of rows in the Frame. - * @param ncol Number of columns in the Frame. - * @param minHint Hint for minimum number of chunks per column in created Frame. - * @param exactHint Hint for exact number of chunks per column in created Frame. - * @return Created DRM. - */ - public static H2ODrm emptyDrm(long nrow, int ncol, int minHint, int exactHint) { - return new H2ODrm(emptyFrame(nrow, ncol, minHint, exactHint)); - } - - public static Matrix allreduceBlock(H2ODrm drmA, Object bmfn, Object rfn) { - class MRTaskMR extends MRTask<MRTaskMR> { - H2OBCast<Matrix> bmf_out; - Serializable bmf; - Serializable rf; - - public MRTaskMR(Object _bmf, Object _rf) { - bmf = (Serializable) _bmf; - rf = (Serializable) _rf; - } - - @Override - public void map(Chunk chks[]) { - Function1 f = (Function1) bmf; - bmf_out = new H2OBCast((Matrix)f.apply(new scala.Tuple2(null, new H2OBlockMatrix(chks)))); - } - - @Override - public void reduce(MRTaskMR that) { - Function2 f = (Function2) rf; - bmf_out = new H2OBCast((Matrix)f.apply(this.bmf_out.value(), that.bmf_out.value())); - } - } - - return new MRTaskMR(bmfn, rfn).doAll(drmA.frame).bmf_out.value(); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2OBCast.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2OBCast.java b/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2OBCast.java deleted file mode 100644 index ebcc626..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2OBCast.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.drm; - -import org.apache.hadoop.io.Writable; -import org.apache.mahout.math.Matrix; -import org.apache.mahout.math.MatrixWritable; -import org.apache.mahout.math.Vector; -import org.apache.mahout.math.VectorWritable; -import org.apache.mahout.math.drm.BCast; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Serializable; - -/** - * Broadcast class wrapper around Matrix and Vector. - * - * Use MatrixWritable and VectorWritable internally. - * Even though the class is generically typed, we do runtime - * enforcement to assert the type is either Matrix or Vector. - * - * H2OBCast object is created around a Matrix or Vector. Matrix or Vector - * objects cannot be freely referred in closures. Instead create and refer the - * corresponding H2OBCast object. The original Matrix or Vector can be - * obtained by calling the ->value() method on the H2OBCast object within a - * closure. - */ -public class H2OBCast<T> implements BCast<T>, Serializable { - private transient T obj; - private byte buf[]; - private boolean isMatrix; - - /** - * Class constructor. - */ - public H2OBCast(T o) { - obj = o; - if (o instanceof Matrix) { - buf = serialize(new MatrixWritable((Matrix)o)); - isMatrix = true; - } else if (o instanceof Vector) { - buf = serialize(new VectorWritable((Vector)o)); - isMatrix = false; - } else { - throw new IllegalArgumentException("Only Matrix or Vector supported for now"); - } - } - - /** - * Get the serialized object. - */ - public T value() { - if (obj == null) { - obj = deserialize(buf); - } - return obj; - } - - /** - * Internal method to serialize the object. - * - * @param w Either MatrixWritable or VectorWritable corresponding to - * either Matrix or Vector as the class is typed. - * @return serialized sequence of bytes. - */ - private byte[] serialize(Writable w) { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - try { - ObjectOutputStream oos = new ObjectOutputStream(bos); - w.write(oos); - oos.close(); - } catch (IOException e) { - return null; - } - return bos.toByteArray(); - } - - /** - * Internal method to deserialize a sequence of bytes. - * - * @param buf Sequence of bytes previously serialized by serialize() method. - * @return The original Matrix or Vector object. - */ - private T deserialize(byte buf[]) { - T ret = null; - try (ByteArrayInputStream bis = new ByteArrayInputStream(buf)){ - ObjectInputStream ois = new ObjectInputStream(bis); - if (isMatrix) { - MatrixWritable w = new MatrixWritable(); - w.readFields(ois); - ret = (T) w.get(); - } else { - VectorWritable w = new VectorWritable(); - w.readFields(ois); - ret = (T) w.get(); - } - } catch (IOException e) { - e.printStackTrace(); - } - return ret; - } - - /** - * Stop broadcasting when called on driver side. Release any network resources. - * - */ - @Override - public void close() throws IOException { - - // TODO: review this. It looks like it is not really a broadcast mechanism but rather just a - // serialization wrapper. In which case it doesn't hold any network resources. - - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2ODrm.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2ODrm.java b/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2ODrm.java deleted file mode 100644 index 7288dff..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2ODrm.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.drm; - -import water.fvec.Frame; -import water.fvec.Vec; - -/** - * Class which represents a Mahout DRM in H2O. - */ -public class H2ODrm { - /** frame stores all the numerical data of a DRM. */ - public Frame frame; - /** keys stores the row key bindings (String or Long) */ - public Vec keys; - - /** - * Class constructor. Null key represents Int keyed DRM. - */ - public H2ODrm(Frame m) { - frame = m; - keys = null; - } - - /** - * Class constructor. Both Numerical and row key bindings specified. - */ - public H2ODrm(Frame m, Vec k) { - frame = m; - keys = k; - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/ABt.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/ABt.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/ABt.java deleted file mode 100644 index 89085de..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/ABt.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; - -/** - * Calculate AB' - */ -public class ABt { - /** - * Calculate AB' on two DRMs to create a new DRM holding the result. - * - * @param drmA DRM representing matrix A - * @param drmB DRM representing matrix B - * @return new DRM containing AB' - */ - public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB) { - Frame A = drmA.frame; - Vec keys = drmA.keys; - final Frame B = drmB.frame; - int ABt_cols = (int)B.numRows(); - - // ABt is written into ncs[] with an MRTask on A, and therefore will - // be similarly partitioned as A. - // - // chks.length == A.numCols() (== B.numCols()) - // ncs.length == ABt_cols (B.numRows()) - Frame ABt = new MRTask() { - public void map(Chunk chks[], NewChunk ncs[]) { - int chunkSize = chks[0].len(); - Vec B_vecs[] = B.vecs(); - - for (int c = 0; c < ncs.length; c++) { - for (int r = 0; r < chunkSize; r++) { - double v = 0; - for (int i = 0; i < chks.length; i++) { - v += (chks[i].atd(r) * B_vecs[i].at(c)); - } - ncs[c].addNum(v); - } - } - } - }.doAll(ABt_cols, A).outputFrame(null, null); - - // Carry forward labels of A blindly into ABt - return new H2ODrm(ABt, keys); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewB.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewB.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewB.java deleted file mode 100644 index 3708af6..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewB.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; - -/** - * Element-wise DRM-DRM operations - */ -public class AewB { - /** - * Perform element-wise operation on two DRMs to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param drmB DRM representing matrix B. - * @param op Element-wise operator encoded as a String. - * @return new DRM containing A (element-wise) B. - */ - public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB, final String op) { - final Frame A = drmA.frame; - final Frame B = drmB.frame; - Vec keys = drmA.keys; - int AewB_cols = A.numCols(); - - // AewB is written into ncs[] with an MRTask on A, and therefore will - // be similarly partitioned as A. - // - // B may or may not be similarly partitioned as A, but must have the - // same dimensions of A. - Frame AewB = new MRTask() { - private double opfn(String op, double a, double b) { - if (a == 0.0 && b == 0.0) { - return 0.0; - } - if (op.equals("+")) { - return a + b; - } else if (op.equals("-")) { - return a - b; - } else if (op.equals("*")) { - return a * b; - } else if (op.equals("/")) { - return a / b; - } - return 0.0; - } - @Override - public void map(Chunk chks[], NewChunk ncs[]) { - int chunkSize = chks[0].len(); - Vec B_vecs[] = B.vecs(); - long start = chks[0].start(); - - for (int c = 0; c < chks.length; c++) { - for (int r = 0; r < chunkSize; r++) { - ncs[c].addNum(opfn(op, chks[c].atd(r), B_vecs[c].at(start + r))); - } - } - } - }.doAll(AewB_cols, A).outputFrame(null, null); - - // Carry forward labels of A blindly into ABt - return new H2ODrm(AewB, keys); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewScalar.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewScalar.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewScalar.java deleted file mode 100644 index 323296a..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewScalar.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; - -/** - * Element-wise DRM-Scalar operations - */ -public class AewScalar { - /** - * Perform element-wise operation on a DRM with a Scalar to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param s Scalar value represented as a double. - * @param op Element-wise operator encoded as a String. - * @return new DRM containing A (element-wise) B. - */ - public static H2ODrm exec(H2ODrm drmA, final double s, final String op) { - Frame A = drmA.frame; - Vec keys = drmA.keys; - int AewScalar_cols = A.numCols(); - - // AewScalar is written into ncs[] with an MRTask on A, and therefore will - // be similarly partitioned as A. - Frame AewScalar = new MRTask() { - private double opfn(String op, double a, double b) { - if (a == 0.0 && b == 0.0) { - return 0.0; - } - if (op.equals("+")) { - return a + b; - } else if (op.equals("-")) { - return a - b; - } else if (op.equals("*")) { - return a * b; - } else if (op.equals("/")) { - return a / b; - } - return 0.0; - } - public void map(Chunk chks[], NewChunk ncs[]) { - int chunkSize = chks[0].len(); - long start = chks[0].start(); - - for (int c = 0; c < chks.length; c++) { - for (int r = 0; r < chunkSize; r++) { - ncs[c].addNum(opfn(op, chks[c].atd(r), s)); - } - } - } - }.doAll(AewScalar_cols, A).outputFrame(null, null); - - // Carry forward labels of A blindly into ABt - return new H2ODrm(AewScalar, keys); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewUnary.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewUnary.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewUnary.java deleted file mode 100644 index 6e9cd82..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewUnary.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.h2obindings.drm.H2ODrm; -import scala.Function1; -import water.MRTask; -import water.fvec.Chunk; -import water.fvec.Frame; -import water.fvec.NewChunk; -import water.fvec.Vec; - -import java.io.Serializable; - -/** - * MapBlock operator. - */ -public class AewUnary { - /** - * Execute a UnaryFunc on each element of a DRM. Create a new DRM - * with the new values. - * - * @param drmA DRM representing matrix A. - * @param f UnaryFunc f, that accepts and Double and returns a Double. - * @param evalZeros Whether or not to execute function on zeroes (in case of sparse DRM). - * @return new DRM constructed from mapped values of drmA through f. - */ - public static H2ODrm exec(H2ODrm drmA, Object f, final boolean evalZeros) { - - Frame A = drmA.frame; - Vec keys = drmA.keys; - final int ncol = A.numCols(); - - /** - * MRTask to execute fn on all elements. - */ - class MRTaskAewUnary extends MRTask<MRTaskAewUnary> { - Serializable fn; - MRTaskAewUnary(Object _fn) { - fn = (Serializable)_fn; - } - public void map(Chunk chks[], NewChunk ncs[]) { - for (int c = 0; c < chks.length; c++) { - Chunk chk = chks[c]; - Function1 f = (Function1) fn; - int ChunkLen = chk.len(); - - if (!evalZeros && chk.isSparse()) { - /* sparse and skip zeros */ - int prev_offset = -1; - for (int r = chk.nextNZ(-1); r < ChunkLen; r = chk.nextNZ(prev_offset)) { - if (r - prev_offset > 1) - ncs[c].addZeros(r - prev_offset - 1); - ncs[c].addNum((double)f.apply(chk.atd(r))); - prev_offset = r; - } - if (ChunkLen - prev_offset > 1) - ncs[c].addZeros(chk._len - prev_offset - 1); - } else { - /* dense or non-skip zeros */ - for (int r = 0; r < ChunkLen; r++) { - ncs[c].addNum((double)f.apply(chk.atd(r))); - } - } - } - } - } - - Frame fmap = new MRTaskAewUnary(f).doAll(ncol, A).outputFrame(null, null); - - return new H2ODrm(fmap, keys); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/At.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/At.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/At.java deleted file mode 100644 index 21882cc..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/At.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.h2obindings.H2OHelper; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; - -/** - * Calculate A' (transpose) - */ -public class At { - /** - * Perform transpose operation on a DRM to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @return new DRM containing A'. - */ - public static H2ODrm exec(H2ODrm drmA) { - final Frame A = drmA.frame; - // First create a new frame of the required dimensions, A.numCols() rows - // and A.numRows() columns. - Frame At = H2OHelper.emptyFrame(A.numCols(), (int) A.numRows(), -1, -1); - - // Execute MRTask on the new frame, and fill each cell (initially 0) by - // pulling in the appropriate value from A. - new MRTask() { - public void map(Chunk chks[]) { - int chunkSize = chks[0].len(); - long start = chks[0].start(); - Vec A_vecs[] = A.vecs(); - - for (int c = 0; c < chks.length; c++) { - for (int r = 0; r < chunkSize; r++) { - chks[c].set(r, A_vecs[(int)(start + r)].at(c)); - } - } - } - }.doAll(At); - - // At is NOT similarly partitioned as A, drop labels - return new H2ODrm(At); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtA.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtA.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtA.java deleted file mode 100644 index 8493327..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtA.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.h2obindings.H2OHelper; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; - -/** - * Calculate A'A - */ -public class AtA { - /** - * Perform A'A operation on a DRM to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @return new DRM containing A'A. - */ - public static H2ODrm exec(H2ODrm drmA) { - final Frame A = drmA.frame; - // First create an empty Frame of the required dimensions - Frame AtA = H2OHelper.emptyFrame(A.numCols(), A.numCols(), -1, -1); - - // Execute MRTask on the new Frame, and fill each cell (initially 0) by - // computing appropriate values from A. - // - // chks.length == A.numCols() - new MRTask() { - public void map(Chunk chks[]) { - int chunkSize = chks[0].len(); - long start = chks[0].start(); - Vec A_vecs[] = A.vecs(); - long A_rows = A.numRows(); - - for (int c = 0; c < chks.length; c++) { - for (int r = 0; r < chunkSize; r++) { - double v = 0; - for (long i = 0; i < A_rows; i++) { - v += (A_vecs[(int)(start + r)].at(i) * A_vecs[c].at(i)); - } - chks[c].set(r, v); - } - } - } - }.doAll(AtA); - - // AtA is NOT similarly partitioned as A, drop labels - return new H2ODrm(AtA); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtB.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtB.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtB.java deleted file mode 100644 index 7276134..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtB.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.h2obindings.H2OHelper; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; - -/** - * Calculate A'B - */ -public class AtB { - /** - * Perform A'B operation on two DRMs to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param drmB DRM representing matrix B. - * @return new DRM containing A'B. - */ - public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB) { - final Frame A = drmA.frame; - final Frame B = drmB.frame; - - // First create an empty frame of the required dimensions - Frame AtB = H2OHelper.emptyFrame(A.numCols(), B.numCols(), -1, -1); - - // Execute MRTask on the new Frame, and fill each cell (initially 0) by - // computing appropriate values from A and B. - // - // chks.length == B.numCols() - new MRTask() { - public void map(Chunk chks[]) { - int chunkSize = chks[0].len(); - long start = chks[0].start(); - long A_rows = A.numRows(); - Vec A_vecs[] = A.vecs(); - Vec B_vecs[] = B.vecs(); - - for (int c = 0; c < chks.length; c++) { - for (int r = 0; r < chunkSize; r++) { - double v = 0; - for (long i = 0; i < A_rows; i++) { - v += (A_vecs[(int)(start + r)].at(i) * B_vecs[c].at(i)); - } - chks[c].set(r, v); - } - } - } - }.doAll(AtB); - - // AtB is NOT similarly partitioned as A, drop labels - return new H2ODrm(AtB); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Atx.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Atx.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Atx.java deleted file mode 100644 index c78e871..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Atx.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.math.Vector; -import org.apache.mahout.math.DenseVector; -import org.apache.mahout.math.Matrix; -import org.apache.mahout.math.DenseMatrix; -import org.apache.mahout.h2obindings.H2OHelper; -import org.apache.mahout.h2obindings.drm.H2OBCast; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Chunk; -import water.util.ArrayUtils; - -/** - * Calculate A'x (where x is an in-core Vector) - */ -public class Atx { - /** - * Perform A'x operation with a DRM and an in-core Vector to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param x in-core Mahout Vector. - * @return new DRM containing A'x. - */ - public static H2ODrm exec(H2ODrm drmA, Vector x) { - Frame A = drmA.frame; - final H2OBCast<Vector> bx = new H2OBCast<>(x); - - // A'x is computed into atx[] with an MRTask on A (with - // x available as a Broadcast - // - // x.size() == A.numRows() - // atx.length == chks.length == A.numCols() - class MRTaskAtx extends MRTask<MRTaskAtx> { - double atx[]; - public void map(Chunk chks[]) { - int chunkSize = chks[0].len(); - Vector x = bx.value(); - long start = chks[0].start(); - - atx = new double[chks.length]; - for (int r = 0; r < chunkSize; r++) { - double d = x.getQuick((int)start + r); - for (int c = 0; c < chks.length; c++) { - atx[c] += (chks[c].atd(r) * d); - } - } - } - public void reduce(MRTaskAtx other) { - ArrayUtils.add(atx, other.atx); - } - } - - // Take the result in .atx[], and convert into a Frame - // using existing helper functions (creating a Matrix - // along the way for the Helper) - Vector v = new DenseVector(new MRTaskAtx().doAll(A).atx); - Matrix m = new DenseMatrix(A.numCols(), 1); - m.assignColumn(0, v); - return H2OHelper.drmFromMatrix(m, -1, -1); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Ax.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Ax.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Ax.java deleted file mode 100644 index 5269709..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Ax.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.math.Vector; -import org.apache.mahout.h2obindings.drm.H2OBCast; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; - -/** - * Calculate Ax (where x is an in-core Vector) - */ -public class Ax { - /** - * Perform Ax operation with a DRM and an in-core Vector to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param x in-core Mahout Vector. - * @return new DRM containing Ax. - */ - public static H2ODrm exec(H2ODrm drmA, Vector x) { - Frame A = drmA.frame; - Vec keys = drmA.keys; - final H2OBCast<Vector> bx = new H2OBCast<>(x); - - // Ax is written into nc (single element, not array) with an MRTask on A, - // and therefore will be similarly partitioned as A. - // - // x.size() == A.numCols() == chks.length - Frame Ax = new MRTask() { - public void map(Chunk chks[], NewChunk nc) { - int chunkSize = chks[0].len(); - Vector x = bx.value(); - - for (int r = 0; r < chunkSize; r++) { - double v = 0; - for (int c = 0; c < chks.length; c++) { - v += (chks[c].atd(r) * x.getQuick(c)); - } - nc.addNum(v); - } - } - }.doAll(1, A).outputFrame(null, null); - - // Carry forward labels of A blindly into ABt - return new H2ODrm(Ax, keys); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Cbind.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Cbind.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Cbind.java deleted file mode 100644 index 03e3793..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Cbind.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; - -import org.apache.mahout.h2obindings.drm.H2ODrm; - -/** - * R-like cbind like operator, on two DRMs - */ -public class Cbind { - /** - * Combine the columns of two DRMs A and B to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param drmB DRM representing matrix B. - * @return new DRM containing columns of A and B adjacent. - */ - public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB) { - Frame fra = drmA.frame; - Vec keysa = drmA.keys; - Frame frb = drmB.frame; - Vec keysb = drmB.keys; - - // If A and B are similarly partitioned, .. - if (fra.anyVec().group() == frb.anyVec().group()) { - // .. then, do a light weight zip() - return zip(fra, keysa, frb, keysb); - } else { - // .. else, do a heavy weight join() which involves moving data over the wire - return join(fra, keysa, frb, keysb); - } - } - - /** Light weight zip(), no data movement */ - private static H2ODrm zip(final Frame fra, final Vec keysa, final Frame frb, final Vec keysb) { - // Create a new Vec[] to hold the concatenated list of A and B's column vectors - Vec vecs[] = new Vec[fra.vecs().length + frb.vecs().length]; - int d = 0; - // fill A's column vectors - for (Vec vfra : fra.vecs()) { - vecs[d++] = vfra; - } - // and B's - for (Vec vfrb : frb.vecs()) { - vecs[d++] = vfrb; - } - // and create a new Frame with the combined list of column Vecs - Frame fr = new Frame(vecs); - /* Finally, inherit A's string labels into the result */ - return new H2ODrm(fr, keysa); - } - - /** Heavy weight join(), involves moving data */ - private static H2ODrm join(final Frame fra, final Vec keysa, final Frame frb, final Vec keysb) { - // The plan is to re-organize B to be "similarly partitioned as A", and then zip() - Vec bvecs[] = new Vec[frb.vecs().length]; - - for (int i = 0; i < bvecs.length; i++) { - // First create column Vecs which are similarly partitioned as A - bvecs[i] = fra.anyVec().makeZero(); - } - - // Next run an MRTask on the new vectors, and fill each cell (initially 0) - // by pulling in appropriate values from B (frb) - new MRTask() { - public void map(Chunk chks[]) { - int chunkSize = chks[0].len(); - long start = chks[0].start(); - Vec vecs[] = frb.vecs(); - - for (int r = 0; r < chunkSize; r++) { - for (int c = 0; c < chks.length; c++) { - // assert va.atStr(start+r) == vb.atStr(start+r) - chks[c].set(r, vecs[c].at(start + r)); - } - } - } - }.doAll(bvecs); - - // now that bvecs[] is compatible, just zip'em'up - return zip(fra, keysa, new Frame(bvecs), null); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/CbindScalar.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/CbindScalar.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/CbindScalar.java deleted file mode 100644 index 4ff8244..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/CbindScalar.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.h2obindings.drm.H2ODrm; -import water.fvec.Frame; -import water.fvec.Vec; - -/** - * R-like cbind like operator, on a DRM and a new column containing - * the given scalar value. - */ -public class CbindScalar { - /** - * Combine the columns of DRM A with a new column storing - * the given scalar. - * - * @param drmA DRM representing matrix A. - * @param scalar value to be filled in new column. - * @param leftbind true if binding to the left - * @return new DRM containing columns of A and d. - */ - public static H2ODrm exec(H2ODrm drmA, double scalar, boolean leftbind) { - Frame fra = drmA.frame; - Vec newcol = fra.anyVec().makeCon(scalar); - Vec vecs[] = new Vec[fra.vecs().length + 1]; - int d = 0; - - if (leftbind) - vecs[d++] = newcol; - for (Vec vfra : fra.vecs()) - vecs[d++] = vfra; - if (!leftbind) - vecs[d++] = newcol; - - return new H2ODrm(new Frame(vecs), drmA.keys); - } -}
