Repository: mahout
Updated Branches:
  refs/heads/master 260753fdb -> 5197ac9e8


MAHOUT-1638: H2O bindings fail at drmParallelizeWithRowLabels(...) closes 
apache/mahout#99 and MAHOUT-1493-h2o closes apache/mahout#72


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/5197ac9e
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/5197ac9e
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/5197ac9e

Branch: refs/heads/master
Commit: 5197ac9e8393763dd4ed58f5f1e9f167e75d3078
Parents: 260753f
Author: Andrew Palumbo <[email protected]>
Authored: Fri Apr 3 12:43:04 2015 -0400
Committer: Andrew Palumbo <[email protected]>
Committed: Fri Apr 3 12:43:04 2015 -0400

----------------------------------------------------------------------
 CHANGELOG                                       |  2 ++
 .../org/apache/mahout/h2obindings/H2OHdfs.java  |  2 +-
 .../apache/mahout/h2obindings/H2OHelper.java    | 37 +++++++++++++++++++-
 .../mahout/h2obindings/ops/MapBlockHelper.scala |  2 +-
 .../classifier/naivebayes/NBH2OTestSuite.scala  | 26 ++++++++++++++
 .../stats/ClassifierStatsH2OTestSuite.scala     | 26 ++++++++++++++
 6 files changed, 92 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/5197ac9e/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index fcf2bc3..ba96f26 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.10.0 - unreleased
 
+  MAHOUT-1638: H2O bindings fail at drmParallelizeWithRowLabels(...) (Anand 
Avati via apalumbo)
+
   MAHOUT-1667: Hadoop 1 and 2 profile in POM (sslavic)
 
   MAHOUT-1564: Naive Bayes Classifier for New Text Documents (apalumbo)

http://git-wip-us.apache.org/repos/asf/mahout/blob/5197ac9e/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java 
b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java
index f21ebe0..56b3745 100644
--- a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java
@@ -167,7 +167,7 @@ public class H2OHdfs {
       }
 
       if (reader.getKeyClass() == Text.class) {
-        labels = frame.anyVec().makeZero();
+        labels = H2OHelper.makeEmptyStrVec(frame.anyVec());
         labelwriter = labels.open();
       }
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/5197ac9e/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java 
b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java
index 2ede8cf..859e5b4 100644
--- a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java
@@ -36,6 +36,11 @@ import java.util.HashMap;
 
 import org.apache.mahout.h2obindings.drm.H2ODrm;
 
+// for makeEmptyStrVec
+import water.Key;
+import water.DKV;
+import water.fvec.CStrChunk;
+
 /**
  * Collection of helper methods for H2O backend.
  */
@@ -323,7 +328,7 @@ public class H2OHelper {
     Map<String,Integer> map = m.getRowLabelBindings();
     if (map != null) {
       // label vector must be similarly partitioned like the Frame
-      labels = frame.anyVec().makeZero();
+      labels = makeEmptyStrVec(frame.anyVec());
       Vec.Writer writer = labels.open();
       Map<Integer,String> rmap = reverseMap(map);
       for (int r = 0; r < m.rowSize(); r++) {
@@ -389,6 +394,36 @@ public class H2OHelper {
     return new Frame(vecs);
   }
 
+
+  /**
+   * The following two methods: vecChunkLen and makeEmptyStrVec
+   * are h2o-0.1.25 specific.
+   */
+  public static Vec makeEmptyStrVec(final Vec template) {
+    final int nChunks = template.nChunks();
+    Key<Vec> key = template.group().addVec();
+    final Vec emptystr = new Vec(key, template._espc, null, Vec.T_NUM);
+
+    new MRTask() {
+      @Override protected void setupLocal() {
+        for (int i = 0; i < nChunks; i++) {
+          Key k = emptystr.chunkKey(i);
+          int chklen = vecChunkLen(template, i);
+          int stridx[] = new int[chklen];
+          byte b[] = new byte[1]; b[0] = 0;
+          for (int j = 0; j < chklen; j++) stridx[j] = -1;
+          if (k.home()) DKV.put(k, new CStrChunk(1, b, chklen, stridx), _fs);
+        }
+        if (emptystr._key.home()) DKV.put(emptystr._key, emptystr, _fs);
+      }
+    }.doAllNodes();
+    return emptystr;
+  }
+
+  public static int vecChunkLen(Vec template, int chunk) {
+    return (int) (template._espc[chunk + 1] - template._espc[chunk]);
+  }
+
   /**
    * Create an empty (zero-filled) H2O DRM.
    *

http://git-wip-us.apache.org/repos/asf/mahout/blob/5197ac9e/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala
----------------------------------------------------------------------
diff --git 
a/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala 
b/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala
index 0384826..f69a844 100644
--- a/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala
+++ b/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala
@@ -37,7 +37,7 @@ object MapBlockHelper {
       case `s` => {
         val arr = new Array[String](in.rowSize)
         val vstr = new ValueString
-        for (i <- 0 to in.rowSize) {
+        for (i <- 0 to (in.rowSize - 1)) {
           arr(i) = labels.atStr(vstr, i + startlong).toString
         }
         arr

http://git-wip-us.apache.org/repos/asf/mahout/blob/5197ac9e/h2o/src/test/scala/org/apache/mahout/classifier/naivebayes/NBH2OTestSuite.scala
----------------------------------------------------------------------
diff --git 
a/h2o/src/test/scala/org/apache/mahout/classifier/naivebayes/NBH2OTestSuite.scala
 
b/h2o/src/test/scala/org/apache/mahout/classifier/naivebayes/NBH2OTestSuite.scala
new file mode 100644
index 0000000..8759e3e
--- /dev/null
+++ 
b/h2o/src/test/scala/org/apache/mahout/classifier/naivebayes/NBH2OTestSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.classifier.naivebayes
+
+import org.apache.mahout.math._
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import org.apache.mahout.math.scalabindings._
+import org.apache.mahout.h2obindings.test.DistributedH2OSuite
+import org.apache.mahout.test.MahoutSuite
+import org.scalatest.FunSuite
+
+class NBH2OTestSuite extends FunSuite with MahoutSuite with 
DistributedH2OSuite with NBTestBase

http://git-wip-us.apache.org/repos/asf/mahout/blob/5197ac9e/h2o/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsH2OTestSuite.scala
----------------------------------------------------------------------
diff --git 
a/h2o/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsH2OTestSuite.scala
 
b/h2o/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsH2OTestSuite.scala
new file mode 100644
index 0000000..909a8fa
--- /dev/null
+++ 
b/h2o/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsH2OTestSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.stats
+
+import org.apache.mahout.h2obindings.test.DistributedH2OSuite
+import org.apache.mahout.test.MahoutSuite
+import org.scalatest.FunSuite
+
+class ClassifierStatsH2OTestSuite extends FunSuite with MahoutSuite with 
DistributedH2OSuite with ClassifierStatsTestBase
+
+

Reply via email to