Repository: mahout
Updated Branches:
  refs/heads/master b044a0779 -> a8e09cd3a


MAHOUT-1578 Optimizations in matrix serialization (ssc) closes apache/mahout#16


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/a8e09cd3
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/a8e09cd3
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/a8e09cd3

Branch: refs/heads/master
Commit: a8e09cd3aa1c9d6fdda2eaf84f86a06a33963658
Parents: b044a07
Author: ssc <[email protected]>
Authored: Thu Jun 12 19:03:19 2014 +0200
Committer: ssc <[email protected]>
Committed: Thu Jun 12 19:03:19 2014 +0200

----------------------------------------------------------------------
 CHANGELOG                                       |  2 +
 .../org/apache/mahout/math/MatrixWritable.java  | 30 +++++++------
 .../org/apache/mahout/math/VectorWritable.java  | 47 +++++++++++++++++---
 3 files changed, 59 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/a8e09cd3/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 2f604e1..7111122 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 1.0 - unreleased
 
+  MAHOUT-1578: Optimizations in matrix serialization (ssc)
+
   MAHOUT-1572: blockify() to detect (naively) the data sparsity in the loaded 
data (dlyubimov)
 
   MAHOUT-1571: Functional Views are not serialized as dense/sparse correctly 
(dlyubimov)

http://git-wip-us.apache.org/repos/asf/mahout/blob/a8e09cd3/mrlegacy/src/main/java/org/apache/mahout/math/MatrixWritable.java
----------------------------------------------------------------------
diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/MatrixWritable.java 
b/mrlegacy/src/main/java/org/apache/mahout/math/MatrixWritable.java
index 7f0c1bc..c521f3e 100644
--- a/mrlegacy/src/main/java/org/apache/mahout/math/MatrixWritable.java
+++ b/mrlegacy/src/main/java/org/apache/mahout/math/MatrixWritable.java
@@ -113,27 +113,28 @@ public class MatrixWritable implements Writable {
     int rows = in.readInt();
     int columns = in.readInt();
 
+    byte vectorFlags = in.readByte();
+
     Matrix matrix;
+
     if (dense) {
       matrix = new DenseMatrix(rows, columns);
-    } else {
-      if (isSparseRowMatrix) {
-        matrix = new SparseRowMatrix(rows, columns, sequential);
-      } else {
-        matrix = new SparseMatrix(rows, columns);
+      for (int row = 0; row < rows; row++) {
+        matrix.assignRow(row, VectorWritable.readVector(in, vectorFlags, 
columns));
       }
-    }
-
-    if (dense || isSparseRowMatrix) {
+    } else if (isSparseRowMatrix) {
+      Vector[] rowVectors = new Vector[rows];
       for (int row = 0; row < rows; row++) {
-        matrix.assignRow(row, VectorWritable.readVector(in));
+        rowVectors[row] = VectorWritable.readVector(in, vectorFlags, columns);
       }
+      matrix = new SparseRowMatrix(rows, columns, rowVectors, true, 
!sequential);
     } else {
+      matrix = new SparseMatrix(rows, columns);
       int numNonZeroRows = in.readInt();
       int rowsRead = 0;
       while (rowsRead++ < numNonZeroRows) {
         int rowIndex = in.readInt();
-        matrix.assignRow(rowIndex, VectorWritable.readVector(in));
+        matrix.assignRow(rowIndex, VectorWritable.readVector(in, vectorFlags, 
columns));
       }
     }
 
@@ -172,13 +173,16 @@ public class MatrixWritable implements Writable {
     }
 
     out.writeInt(flags);
-
     out.writeInt(matrix.rowSize());
     out.writeInt(matrix.columnSize());
 
+    // We only use vectors of the same type, so we write out the type 
information only once!
+    byte vectorFlags = VectorWritable.flags(matrix.viewRow(0), false);
+    out.writeByte(vectorFlags);
+
     if (isDense || isSparseRowMatrix) {
       for (int i = 0; i < matrix.rowSize(); i++) {
-        VectorWritable.writeVector(out, matrix.viewRow(i), false);
+        VectorWritable.writeVectorContents(out, matrix.viewRow(i), 
vectorFlags);
       }
     } else {
       IntArrayList rowIndices = ((SparseMatrix) matrix).nonZeroRowIndices();
@@ -187,7 +191,7 @@ public class MatrixWritable implements Writable {
       for (int i = 0; i < numNonZeroRows; i++) {
         int rowIndex = rowIndices.getQuick(i);
         out.writeInt(rowIndex);
-        VectorWritable.writeVector(out, matrix.viewRow(rowIndex), false);
+        VectorWritable.writeVectorContents(out, matrix.viewRow(rowIndex), 
vectorFlags);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/a8e09cd3/mrlegacy/src/main/java/org/apache/mahout/math/VectorWritable.java
----------------------------------------------------------------------
diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/VectorWritable.java 
b/mrlegacy/src/main/java/org/apache/mahout/math/VectorWritable.java
index d1ffc73..c06cff6 100644
--- a/mrlegacy/src/main/java/org/apache/mahout/math/VectorWritable.java
+++ b/mrlegacy/src/main/java/org/apache/mahout/math/VectorWritable.java
@@ -85,13 +85,18 @@ public final class VectorWritable extends Configured 
implements Writable {
   @Override
   public void readFields(DataInput in) throws IOException {
     int flags = in.readByte();
+    int size = Varint.readUnsignedVarInt(in);
+    readFields(in, (byte) flags, size);
+  }
+
+  private void readFields(DataInput in, byte flags, int size) throws 
IOException {
+
     Preconditions.checkArgument(flags >> NUM_FLAGS == 0, "Unknown flags set: 
%d", Integer.toString(flags, 2));
     boolean dense = (flags & FLAG_DENSE) != 0;
     boolean sequential = (flags & FLAG_SEQUENTIAL) != 0;
     boolean named = (flags & FLAG_NAMED) != 0;
     boolean laxPrecision = (flags & FLAG_LAX_PRECISION) != 0;
 
-    int size = Varint.readUnsignedVarInt(in);
     Vector v;
     if (dense) {
       double[] values = new double[size];
@@ -133,17 +138,39 @@ public final class VectorWritable extends Configured 
implements Writable {
     writeVector(out, vector, false);
   }
 
-  public static void writeVector(DataOutput out, Vector vector, boolean 
laxPrecision) throws IOException {
+  public static byte flags(Vector vector, boolean laxPrecision) {
     boolean dense = vector.isDense();
     boolean sequential = vector.isSequentialAccess();
     boolean named = vector instanceof NamedVector;
 
-    out.writeByte((dense ? FLAG_DENSE : 0)
-        | (sequential ? FLAG_SEQUENTIAL : 0)
-        | (named ? FLAG_NAMED : 0)
-        | (laxPrecision ? FLAG_LAX_PRECISION : 0));
+    return (byte) ((dense ? FLAG_DENSE : 0)
+            | (sequential ? FLAG_SEQUENTIAL : 0)
+            | (named ? FLAG_NAMED : 0)
+            | (laxPrecision ? FLAG_LAX_PRECISION : 0));
+  }
+
+  /** Write out type information and size of the vector */
+  public static void writeVectorFlagsAndSize(DataOutput out, byte flags, int 
size) throws IOException {
+    out.writeByte(flags);
+    Varint.writeUnsignedVarInt(size, out);
+  }
+
+  public static void writeVector(DataOutput out, Vector vector, boolean 
laxPrecision) throws IOException {
+
+    byte flags = flags(vector, laxPrecision);
+
+    writeVectorFlagsAndSize(out, flags, vector.size());
+    writeVectorContents(out, vector, flags);
+  }
+
+  /** Write out contents of the vector */
+  public static void writeVectorContents(DataOutput out, Vector vector, byte 
flags) throws IOException {
+
+    boolean dense = (flags & FLAG_DENSE) != 0;
+    boolean sequential = (flags & FLAG_SEQUENTIAL) != 0;
+    boolean named = (flags & FLAG_NAMED) != 0;
+    boolean laxPrecision = (flags & FLAG_LAX_PRECISION) != 0;
 
-    Varint.writeUnsignedVarInt(vector.size(), out);
     if (dense) {
       for (Vector.Element element : vector.all()) {
         if (laxPrecision) {
@@ -200,6 +227,12 @@ public final class VectorWritable extends Configured 
implements Writable {
     return v.get();
   }
 
+  public static Vector readVector(DataInput in, byte vectorFlags, int size) 
throws IOException {
+    VectorWritable v = new VectorWritable();
+    v.readFields(in, vectorFlags, size);
+    return v.get();
+  }
+
   public static VectorWritable merge(Iterator<VectorWritable> vectors) {
     return new VectorWritable(mergeToVector(vectors));
   }

Reply via email to